youtube_dl/extractor/discoverygo.py



    1 from __future__ import unicode_literals
    2 
    3 from .common import InfoExtractor
    4 from ..compat import compat_str
    5 from ..utils import (
    6     extract_attributes,
    7     int_or_none,
    8     parse_age_limit,
    9     ExtractorError,
   10 )
   11 
   12 
   13 class DiscoveryGoIE(InfoExtractor):
   14     _VALID_URL = r'''(?x)https?://(?:www\.)?(?:
   15             discovery|
   16             investigationdiscovery|
   17             discoverylife|
   18             animalplanet|
   19             ahctv|
   20             destinationamerica|
   21             sciencechannel|
   22             tlc|
   23             velocitychannel
   24         )go\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'''
   25     _TEST = {
   26         'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/',
   27         'info_dict': {
   28             'id': '57a33c536b66d1cd0345eeb1',
   29             'ext': 'mp4',
   30             'title': 'Kiss First, Ask Questions Later!',
   31             'description': 'md5:fe923ba34050eae468bffae10831cb22',
   32             'duration': 2579,
   33             'series': 'Love at First Kiss',
   34             'season_number': 1,
   35             'episode_number': 1,
   36             'age_limit': 14,
   37         },
   38     }
   39 
   40     def _real_extract(self, url):
   41         display_id = self._match_id(url)
   42 
   43         webpage = self._download_webpage(url, display_id)
   44 
   45         container = extract_attributes(
   46             self._search_regex(
   47                 r'(<div[^>]+class=["\']video-player-container[^>]+>)',
   48                 webpage, 'video container'))
   49 
   50         video = self._parse_json(
   51             container.get('data-video') or container.get('data-json'),
   52             display_id)
   53 
   54         title = video['name']
   55 
   56         stream = video.get('stream')
   57         if not stream:
   58             if video.get('authenticated') is True:
   59                 raise ExtractorError(
   60                     'This video is only available via cable service provider subscription that'
   61                     ' is not currently supported. You may want to use --cookies.', expected=True)
   62             else:
   63                 raise ExtractorError('Unable to find stream')
   64         STREAM_URL_SUFFIX = 'streamUrl'
   65         formats = []
   66         for stream_kind in ('', 'hds'):
   67             suffix = STREAM_URL_SUFFIX.capitalize() if stream_kind else STREAM_URL_SUFFIX
   68             stream_url = stream.get('%s%s' % (stream_kind, suffix))
   69             if not stream_url:
   70                 continue
   71             if stream_kind == '':
   72                 formats.extend(self._extract_m3u8_formats(
   73                     stream_url, display_id, 'mp4', entry_protocol='m3u8_native',
   74                     m3u8_id='hls', fatal=False))
   75             elif stream_kind == 'hds':
   76                 formats.extend(self._extract_f4m_formats(
   77                     stream_url, display_id, f4m_id=stream_kind, fatal=False))
   78         self._sort_formats(formats)
   79 
   80         video_id = video.get('id') or display_id
   81         description = video.get('description', {}).get('detailed')
   82         duration = int_or_none(video.get('duration'))
   83 
   84         series = video.get('show', {}).get('name')
   85         season_number = int_or_none(video.get('season', {}).get('number'))
   86         episode_number = int_or_none(video.get('episodeNumber'))
   87 
   88         tags = video.get('tags')
   89         age_limit = parse_age_limit(video.get('parental', {}).get('rating'))
   90 
   91         subtitles = {}
   92         captions = stream.get('captions')
   93         if isinstance(captions, list):
   94             for caption in captions:
   95                 subtitle_url = caption.get('fileUrl')
   96                 if (not subtitle_url or not isinstance(subtitle_url, compat_str) or
   97                         not subtitle_url.startswith('http')):
   98                     continue
   99                 lang = caption.get('fileLang', 'en')
  100                 subtitles.setdefault(lang, []).append({'url': subtitle_url})
  101 
  102         return {
  103             'id': video_id,
  104             'display_id': display_id,
  105             'title': title,
  106             'description': description,
  107             'duration': duration,
  108             'series': series,
  109             'season_number': season_number,
  110             'episode_number': episode_number,
  111             'tags': tags,
  112             'age_limit': age_limit,
  113             'formats': formats,
  114             'subtitles': subtitles,
  115         }