summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/viewster.py
blob: d5d5b4c69ff466bbd245289d5e83fc68bc92069e (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..compat import (
    8     compat_HTTPError,
    9     compat_urllib_parse_unquote,
   10 )
   11 from ..utils import (
   12     determine_ext,
   13     ExtractorError,
   14     int_or_none,
   15     parse_iso8601,
   16     sanitized_Request,
   17     HEADRequest,
   18     url_basename,
   19 )
   20 
   21 
   22 class ViewsterIE(InfoExtractor):
   23     _VALID_URL = r'https?://(?:www\.)?viewster\.com/(?:serie|movie)/(?P<id>\d+-\d+-\d+)'
   24     _TESTS = [{
   25         # movie, Type=Movie
   26         'url': 'http://www.viewster.com/movie/1140-11855-000/the-listening-project/',
   27         'md5': 'e642d1b27fcf3a4ffa79f194f5adde36',
   28         'info_dict': {
   29             'id': '1140-11855-000',
   30             'ext': 'mp4',
   31             'title': 'The listening Project',
   32             'description': 'md5:bac720244afd1a8ea279864e67baa071',
   33             'timestamp': 1214870400,
   34             'upload_date': '20080701',
   35             'duration': 4680,
   36         },
   37     }, {
   38         # series episode, Type=Episode
   39         'url': 'http://www.viewster.com/serie/1284-19427-001/the-world-and-a-wall/',
   40         'md5': '9243079a8531809efe1b089db102c069',
   41         'info_dict': {
   42             'id': '1284-19427-001',
   43             'ext': 'mp4',
   44             'title': 'The World and a Wall',
   45             'description': 'md5:24814cf74d3453fdf5bfef9716d073e3',
   46             'timestamp': 1428192000,
   47             'upload_date': '20150405',
   48             'duration': 1500,
   49         },
   50     }, {
   51         # serie, Type=Serie
   52         'url': 'http://www.viewster.com/serie/1303-19426-000/',
   53         'info_dict': {
   54             'id': '1303-19426-000',
   55             'title': 'Is It Wrong to Try to Pick up Girls in a Dungeon?',
   56             'description': 'md5:eeda9bef25b0d524b3a29a97804c2f11',
   57         },
   58         'playlist_count': 13,
   59     }, {
   60         # unfinished serie, no Type
   61         'url': 'http://www.viewster.com/serie/1284-19427-000/baby-steps-season-2/',
   62         'info_dict': {
   63             'id': '1284-19427-000',
   64             'title': 'Baby Steps—Season 2',
   65             'description': 'md5:e7097a8fc97151e25f085c9eb7a1cdb1',
   66         },
   67         'playlist_mincount': 16,
   68     }, {
   69         # geo restricted series
   70         'url': 'https://www.viewster.com/serie/1280-18794-002/',
   71         'only_matching': True,
   72     }, {
   73         # geo restricted video
   74         'url': 'https://www.viewster.com/serie/1280-18794-002/what-is-extraterritoriality-lawo/',
   75         'only_matching': True,
   76     }]
   77 
   78     _ACCEPT_HEADER = 'application/json, text/javascript, */*; q=0.01'
   79 
   80     def _download_json(self, url, video_id, note='Downloading JSON metadata', fatal=True, query={}):
   81         request = sanitized_Request(url)
   82         request.add_header('Accept', self._ACCEPT_HEADER)
   83         request.add_header('Auth-token', self._AUTH_TOKEN)
   84         return super(ViewsterIE, self)._download_json(request, video_id, note, fatal=fatal, query=query)
   85 
   86     def _real_extract(self, url):
   87         video_id = self._match_id(url)
   88         # Get 'api_token' cookie
   89         self._request_webpage(
   90             HEADRequest('http://www.viewster.com/'),
   91             video_id, headers=self.geo_verification_headers())
   92         cookies = self._get_cookies('http://www.viewster.com/')
   93         self._AUTH_TOKEN = compat_urllib_parse_unquote(cookies['api_token'].value)
   94 
   95         info = self._download_json(
   96             'https://public-api.viewster.com/search/%s' % video_id,
   97             video_id, 'Downloading entry JSON')
   98 
   99         entry_id = info.get('Id') or info['id']
  100 
  101         # unfinished serie has no Type
  102         if info.get('Type') in ('Serie', None):
  103             try:
  104                 episodes = self._download_json(
  105                     'https://public-api.viewster.com/series/%s/episodes' % entry_id,
  106                     video_id, 'Downloading series JSON')
  107             except ExtractorError as e:
  108                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
  109                     self.raise_geo_restricted()
  110                 else:
  111                     raise
  112             entries = [
  113                 self.url_result(
  114                     'http://www.viewster.com/movie/%s' % episode['OriginId'], 'Viewster')
  115                 for episode in episodes]
  116             title = (info.get('Title') or info['Synopsis']['Title']).strip()
  117             description = info.get('Synopsis', {}).get('Detailed')
  118             return self.playlist_result(entries, video_id, title, description)
  119 
  120         formats = []
  121         for language_set in info.get('LanguageSets', []):
  122             manifest_url = None
  123             m3u8_formats = []
  124             audio = language_set.get('Audio') or ''
  125             subtitle = language_set.get('Subtitle') or ''
  126             base_format_id = audio
  127             if subtitle:
  128                 base_format_id += '-%s' % subtitle
  129 
  130             def concat(suffix, sep='-'):
  131                 return (base_format_id + '%s%s' % (sep, suffix)) if base_format_id else suffix
  132 
  133             for media_type in ('application/f4m+xml', 'application/x-mpegURL', 'video/mp4'):
  134                 media = self._download_json(
  135                     'https://public-api.viewster.com/movies/%s/video' % entry_id,
  136                     video_id, 'Downloading %s JSON' % concat(media_type, ' '), fatal=False, query={
  137                         'mediaType': media_type,
  138                         'language': audio,
  139                         'subtitle': subtitle,
  140                     })
  141                 if not media:
  142                     continue
  143                 video_url = media.get('Uri')
  144                 if not video_url:
  145                     continue
  146                 ext = determine_ext(video_url)
  147                 if ext == 'f4m':
  148                     manifest_url = video_url
  149                     video_url += '&' if '?' in video_url else '?'
  150                     video_url += 'hdcore=3.2.0&plugin=flowplayer-3.2.0.1'
  151                     formats.extend(self._extract_f4m_formats(
  152                         video_url, video_id, f4m_id=concat('hds')))
  153                 elif ext == 'm3u8':
  154                     manifest_url = video_url
  155                     m3u8_formats = self._extract_m3u8_formats(
  156                         video_url, video_id, 'mp4', m3u8_id=concat('hls'),
  157                         fatal=False)  # m3u8 sometimes fail
  158                     if m3u8_formats:
  159                         formats.extend(m3u8_formats)
  160                 else:
  161                     qualities_basename = self._search_regex(
  162                         r'/([^/]+)\.csmil/',
  163                         manifest_url, 'qualities basename', default=None)
  164                     if not qualities_basename:
  165                         continue
  166                     QUALITIES_RE = r'((,\d+k)+,?)'
  167                     qualities = self._search_regex(
  168                         QUALITIES_RE, qualities_basename,
  169                         'qualities', default=None)
  170                     if not qualities:
  171                         continue
  172                     qualities = list(map(lambda q: int(q[:-1]), qualities.strip(',').split(',')))
  173                     qualities.sort()
  174                     http_template = re.sub(QUALITIES_RE, r'%dk', qualities_basename)
  175                     http_url_basename = url_basename(video_url)
  176                     if m3u8_formats:
  177                         self._sort_formats(m3u8_formats)
  178                         m3u8_formats = list(filter(
  179                             lambda f: f.get('vcodec') != 'none', m3u8_formats))
  180                     if len(qualities) == len(m3u8_formats):
  181                         for q, m3u8_format in zip(qualities, m3u8_formats):
  182                             f = m3u8_format.copy()
  183                             f.update({
  184                                 'url': video_url.replace(http_url_basename, http_template % q),
  185                                 'format_id': f['format_id'].replace('hls', 'http'),
  186                                 'protocol': 'http',
  187                             })
  188                             formats.append(f)
  189                     else:
  190                         for q in qualities:
  191                             formats.append({
  192                                 'url': video_url.replace(http_url_basename, http_template % q),
  193                                 'ext': 'mp4',
  194                                 'format_id': 'http-%d' % q,
  195                                 'tbr': q,
  196                             })
  197 
  198         if not formats and not info.get('VODSettings'):
  199             self.raise_geo_restricted()
  200 
  201         self._sort_formats(formats)
  202 
  203         synopsis = info.get('Synopsis') or {}
  204         # Prefer title outside synopsis since it's less messy
  205         title = (info.get('Title') or synopsis['Title']).strip()
  206         description = synopsis.get('Detailed') or (info.get('Synopsis') or {}).get('Short')
  207         duration = int_or_none(info.get('Duration'))
  208         timestamp = parse_iso8601(info.get('ReleaseDate'))
  209 
  210         return {
  211             'id': video_id,
  212             'title': title,
  213             'description': description,
  214             'timestamp': timestamp,
  215             'duration': duration,
  216             'formats': formats,
  217         }

Generated by cgit