youtube_dl/extractor/curiositystream.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..utils import (
    8     int_or_none,
    9     urlencode_postdata,
   10     compat_str,
   11     ExtractorError,
   12 )
   13 
   14 
   15 class CuriosityStreamBaseIE(InfoExtractor):
   16     _NETRC_MACHINE = 'curiositystream'
   17     _auth_token = None
   18     _API_BASE_URL = 'https://api.curiositystream.com/v1/'
   19 
   20     def _handle_errors(self, result):
   21         error = result.get('error', {}).get('message')
   22         if error:
   23             if isinstance(error, dict):
   24                 error = ', '.join(error.values())
   25             raise ExtractorError(
   26                 '%s said: %s' % (self.IE_NAME, error), expected=True)
   27 
   28     def _call_api(self, path, video_id):
   29         headers = {}
   30         if self._auth_token:
   31             headers['X-Auth-Token'] = self._auth_token
   32         result = self._download_json(
   33             self._API_BASE_URL + path, video_id, headers=headers)
   34         self._handle_errors(result)
   35         return result['data']
   36 
   37     def _real_initialize(self):
   38         (email, password) = self._get_login_info()
   39         if email is None:
   40             return
   41         result = self._download_json(
   42             self._API_BASE_URL + 'login', None, data=urlencode_postdata({
   43                 'email': email,
   44                 'password': password,
   45             }))
   46         self._handle_errors(result)
   47         self._auth_token = result['message']['auth_token']
   48 
   49     def _extract_media_info(self, media):
   50         video_id = compat_str(media['id'])
   51         title = media['title']
   52 
   53         formats = []
   54         for encoding in media.get('encodings', []):
   55             m3u8_url = encoding.get('master_playlist_url')
   56             if m3u8_url:
   57                 formats.extend(self._extract_m3u8_formats(
   58                     m3u8_url, video_id, 'mp4', 'm3u8_native',
   59                     m3u8_id='hls', fatal=False))
   60             encoding_url = encoding.get('url')
   61             file_url = encoding.get('file_url')
   62             if not encoding_url and not file_url:
   63                 continue
   64             f = {
   65                 'width': int_or_none(encoding.get('width')),
   66                 'height': int_or_none(encoding.get('height')),
   67                 'vbr': int_or_none(encoding.get('video_bitrate')),
   68                 'abr': int_or_none(encoding.get('audio_bitrate')),
   69                 'filesize': int_or_none(encoding.get('size_in_bytes')),
   70                 'vcodec': encoding.get('video_codec'),
   71                 'acodec': encoding.get('audio_codec'),
   72                 'container': encoding.get('container_type'),
   73             }
   74             for f_url in (encoding_url, file_url):
   75                 if not f_url:
   76                     continue
   77                 fmt = f.copy()
   78                 rtmp = re.search(r'^(?P<url>rtmpe?://(?P<host>[^/]+)/(?P<app>.+))/(?P<playpath>mp[34]:.+)$', f_url)
   79                 if rtmp:
   80                     fmt.update({
   81                         'url': rtmp.group('url'),
   82                         'play_path': rtmp.group('playpath'),
   83                         'app': rtmp.group('app'),
   84                         'ext': 'flv',
   85                         'format_id': 'rtmp',
   86                     })
   87                 else:
   88                     fmt.update({
   89                         'url': f_url,
   90                         'format_id': 'http',
   91                     })
   92                 formats.append(fmt)
   93         self._sort_formats(formats)
   94 
   95         subtitles = {}
   96         for closed_caption in media.get('closed_captions', []):
   97             sub_url = closed_caption.get('file')
   98             if not sub_url:
   99                 continue
  100             lang = closed_caption.get('code') or closed_caption.get('language') or 'en'
  101             subtitles.setdefault(lang, []).append({
  102                 'url': sub_url,
  103             })
  104 
  105         return {
  106             'id': video_id,
  107             'formats': formats,
  108             'title': title,
  109             'description': media.get('description'),
  110             'thumbnail': media.get('image_large') or media.get('image_medium') or media.get('image_small'),
  111             'duration': int_or_none(media.get('duration')),
  112             'tags': media.get('tags'),
  113             'subtitles': subtitles,
  114         }
  115 
  116 
  117 class CuriosityStreamIE(CuriosityStreamBaseIE):
  118     IE_NAME = 'curiositystream'
  119     _VALID_URL = r'https?://app\.curiositystream\.com/video/(?P<id>\d+)'
  120     _TEST = {
  121         'url': 'https://app.curiositystream.com/video/2',
  122         'md5': '262bb2f257ff301115f1973540de8983',
  123         'info_dict': {
  124             'id': '2',
  125             'ext': 'mp4',
  126             'title': 'How Did You Develop The Internet?',
  127             'description': 'Vint Cerf, Google\'s Chief Internet Evangelist, describes how he and Bob Kahn created the internet.',
  128         }
  129     }
  130 
  131     def _real_extract(self, url):
  132         video_id = self._match_id(url)
  133         media = self._call_api('media/' + video_id, video_id)
  134         return self._extract_media_info(media)
  135 
  136 
  137 class CuriosityStreamCollectionIE(CuriosityStreamBaseIE):
  138     IE_NAME = 'curiositystream:collection'
  139     _VALID_URL = r'https?://app\.curiositystream\.com/collection/(?P<id>\d+)'
  140     _TEST = {
  141         'url': 'https://app.curiositystream.com/collection/2',
  142         'info_dict': {
  143             'id': '2',
  144             'title': 'Curious Minds: The Internet',
  145             'description': 'How is the internet shaping our lives in the 21st Century?',
  146         },
  147         'playlist_mincount': 12,
  148     }
  149 
  150     def _real_extract(self, url):
  151         collection_id = self._match_id(url)
  152         collection = self._call_api(
  153             'collections/' + collection_id, collection_id)
  154         entries = []
  155         for media in collection.get('media', []):
  156             entries.append(self._extract_media_info(media))
  157         return self.playlist_result(
  158             entries, collection_id,
  159             collection.get('title'), collection.get('description'))