summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/espn.py
blob: 8795e0ddf5e26f676a421173a0e1fd019cd112cb (plain)
    1 from __future__ import unicode_literals
    2 
    3 from .common import InfoExtractor
    4 from ..compat import compat_str
    5 from ..utils import (
    6     determine_ext,
    7     int_or_none,
    8     unified_timestamp,
    9 )
   10 
   11 
   12 class ESPNIE(InfoExtractor):
   13     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/video/clip(?:\?.*?\bid=|/_/id/)(?P<id>\d+)'
   14     _TESTS = [{
   15         'url': 'http://espn.go.com/video/clip?id=10365079',
   16         'info_dict': {
   17             'id': '10365079',
   18             'ext': 'mp4',
   19             'title': '30 for 30 Shorts: Judging Jewell',
   20             'description': 'md5:39370c2e016cb4ecf498ffe75bef7f0f',
   21             'timestamp': 1390936111,
   22             'upload_date': '20140128',
   23         },
   24         'params': {
   25             'skip_download': True,
   26         },
   27     }, {
   28         # intl video, from http://www.espnfc.us/video/mls-highlights/150/video/2743663/must-see-moments-best-of-the-mls-season
   29         'url': 'http://espn.go.com/video/clip?id=2743663',
   30         'info_dict': {
   31             'id': '2743663',
   32             'ext': 'mp4',
   33             'title': 'Must-See Moments: Best of the MLS season',
   34             'description': 'md5:4c2d7232beaea572632bec41004f0aeb',
   35             'timestamp': 1449446454,
   36             'upload_date': '20151207',
   37         },
   38         'params': {
   39             'skip_download': True,
   40         },
   41         'expected_warnings': ['Unable to download f4m manifest'],
   42     }, {
   43         'url': 'http://www.espn.com/video/clip?id=10365079',
   44         'only_matching': True,
   45     }, {
   46         'url': 'http://www.espn.com/video/clip/_/id/17989860',
   47         'only_matching': True,
   48     }]
   49 
   50     def _real_extract(self, url):
   51         video_id = self._match_id(url)
   52 
   53         clip = self._download_json(
   54             'http://api-app.espn.com/v1/video/clips/%s' % video_id,
   55             video_id)['videos'][0]
   56 
   57         title = clip['headline']
   58 
   59         format_urls = set()
   60         formats = []
   61 
   62         def traverse_source(source, base_source_id=None):
   63             for source_id, source in source.items():
   64                 if isinstance(source, compat_str):
   65                     extract_source(source, base_source_id)
   66                 elif isinstance(source, dict):
   67                     traverse_source(
   68                         source,
   69                         '%s-%s' % (base_source_id, source_id)
   70                         if base_source_id else source_id)
   71 
   72         def extract_source(source_url, source_id=None):
   73             if source_url in format_urls:
   74                 return
   75             format_urls.add(source_url)
   76             ext = determine_ext(source_url)
   77             if ext == 'smil':
   78                 formats.extend(self._extract_smil_formats(
   79                     source_url, video_id, fatal=False))
   80             elif ext == 'f4m':
   81                 formats.extend(self._extract_f4m_formats(
   82                     source_url, video_id, f4m_id=source_id, fatal=False))
   83             elif ext == 'm3u8':
   84                 formats.extend(self._extract_m3u8_formats(
   85                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
   86                     m3u8_id=source_id, fatal=False))
   87             else:
   88                 formats.append({
   89                     'url': source_url,
   90                     'format_id': source_id,
   91                 })
   92 
   93         traverse_source(clip['links']['source'])
   94         self._sort_formats(formats)
   95 
   96         description = clip.get('caption') or clip.get('description')
   97         thumbnail = clip.get('thumbnail')
   98         duration = int_or_none(clip.get('duration'))
   99         timestamp = unified_timestamp(clip.get('originalPublishDate'))
  100 
  101         return {
  102             'id': video_id,
  103             'title': title,
  104             'description': description,
  105             'thumbnail': thumbnail,
  106             'timestamp': timestamp,
  107             'duration': duration,
  108             'formats': formats,
  109         }
  110 
  111 
  112 class ESPNArticleIE(InfoExtractor):
  113     _VALID_URL = r'https?://(?:espn\.go|(?:www\.)?espn)\.com/(?:[^/]+/)*(?P<id>[^/]+)'
  114     _TESTS = [{
  115         'url': 'https://espn.go.com/video/iframe/twitter/?cms=espn&id=10365079',
  116         'only_matching': True,
  117     }, {
  118         'url': 'http://espn.go.com/nba/recap?gameId=400793786',
  119         'only_matching': True,
  120     }, {
  121         'url': 'http://espn.go.com/blog/golden-state-warriors/post/_/id/593/how-warriors-rapidly-regained-a-winning-edge',
  122         'only_matching': True,
  123     }, {
  124         'url': 'http://espn.go.com/sports/endurance/story/_/id/12893522/dzhokhar-tsarnaev-sentenced-role-boston-marathon-bombings',
  125         'only_matching': True,
  126     }, {
  127         'url': 'http://espn.go.com/nba/playoffs/2015/story/_/id/12887571/john-wall-washington-wizards-no-swelling-left-hand-wrist-game-5-return',
  128         'only_matching': True,
  129     }]
  130 
  131     @classmethod
  132     def suitable(cls, url):
  133         return False if ESPNIE.suitable(url) else super(ESPNArticleIE, cls).suitable(url)
  134 
  135     def _real_extract(self, url):
  136         video_id = self._match_id(url)
  137 
  138         webpage = self._download_webpage(url, video_id)
  139 
  140         video_id = self._search_regex(
  141             r'class=(["\']).*?video-play-button.*?\1[^>]+data-id=["\'](?P<id>\d+)',
  142             webpage, 'video id', group='id')
  143 
  144         return self.url_result(
  145             'http://espn.go.com/video/clip?id=%s' % video_id, ESPNIE.ie_key())

Generated by cgit