summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/crackle.py
blob: 49bf3a4f9ef0ecb239f9916367e2efff0af16b61 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals, division
    3 
    4 import hashlib
    5 import hmac
    6 import re
    7 import time
    8 
    9 from .common import InfoExtractor
   10 from ..compat import compat_HTTPError
   11 from ..utils import (
   12     determine_ext,
   13     float_or_none,
   14     int_or_none,
   15     parse_age_limit,
   16     parse_duration,
   17     url_or_none,
   18     ExtractorError
   19 )
   20 
   21 
   22 class CrackleIE(InfoExtractor):
   23     _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
   24     _TESTS = [{
   25         # geo restricted to CA
   26         'url': 'https://www.crackle.com/andromeda/2502343',
   27         'info_dict': {
   28             'id': '2502343',
   29             'ext': 'mp4',
   30             'title': 'Under The Night',
   31             'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
   32             'duration': 2583,
   33             'view_count': int,
   34             'average_rating': 0,
   35             'age_limit': 14,
   36             'genre': 'Action, Sci-Fi',
   37             'creator': 'Allan Kroeker',
   38             'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
   39             'release_year': 2000,
   40             'series': 'Andromeda',
   41             'episode': 'Under The Night',
   42             'season_number': 1,
   43             'episode_number': 1,
   44         },
   45         'params': {
   46             # m3u8 download
   47             'skip_download': True,
   48         }
   49     }, {
   50         'url': 'https://www.sonycrackle.com/andromeda/2502343',
   51         'only_matching': True,
   52     }]
   53 
   54     _MEDIA_FILE_SLOTS = {
   55         '360p.mp4': {
   56             'width': 640,
   57             'height': 360,
   58         },
   59         '480p.mp4': {
   60             'width': 768,
   61             'height': 432,
   62         },
   63         '480p_1mbps.mp4': {
   64             'width': 852,
   65             'height': 480,
   66         },
   67     }
   68 
   69     def _real_extract(self, url):
   70         video_id = self._match_id(url)
   71 
   72         country_code = self._downloader.params.get('geo_bypass_country', None)
   73         countries = [country_code] if country_code else (
   74             'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
   75 
   76         last_e = None
   77 
   78         for country in countries:
   79             try:
   80                 # Authorization generation algorithm is reverse engineered from:
   81                 # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
   82                 media_detail_url = 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id, country)
   83                 timestamp = time.strftime('%Y%m%d%H%M', time.gmtime())
   84                 h = hmac.new(b'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url, timestamp]).encode(), hashlib.sha1).hexdigest().upper()
   85                 media = self._download_json(
   86                     media_detail_url, video_id, 'Downloading media JSON as %s' % country,
   87                     'Unable to download media JSON', headers={
   88                         'Accept': 'application/json',
   89                         'Authorization': '|'.join([h, timestamp, '117', '1']),
   90                     })
   91             except ExtractorError as e:
   92                 # 401 means geo restriction, trying next country
   93                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
   94                     last_e = e
   95                     continue
   96                 raise
   97 
   98             media_urls = media.get('MediaURLs')
   99             if not media_urls or not isinstance(media_urls, list):
  100                 continue
  101 
  102             title = media['Title']
  103 
  104             formats = []
  105             for e in media['MediaURLs']:
  106                 if e.get('UseDRM') is True:
  107                     continue
  108                 format_url = url_or_none(e.get('Path'))
  109                 if not format_url:
  110                     continue
  111                 ext = determine_ext(format_url)
  112                 if ext == 'm3u8':
  113                     formats.extend(self._extract_m3u8_formats(
  114                         format_url, video_id, 'mp4', entry_protocol='m3u8_native',
  115                         m3u8_id='hls', fatal=False))
  116                 elif ext == 'mpd':
  117                     formats.extend(self._extract_mpd_formats(
  118                         format_url, video_id, mpd_id='dash', fatal=False))
  119                 elif format_url.endswith('.ism/Manifest'):
  120                     formats.extend(self._extract_ism_formats(
  121                         format_url, video_id, ism_id='mss', fatal=False))
  122                 else:
  123                     mfs_path = e.get('Type')
  124                     mfs_info = self._MEDIA_FILE_SLOTS.get(mfs_path)
  125                     if not mfs_info:
  126                         continue
  127                     formats.append({
  128                         'url': format_url,
  129                         'format_id': 'http-' + mfs_path.split('.')[0],
  130                         'width': mfs_info['width'],
  131                         'height': mfs_info['height'],
  132                     })
  133             self._sort_formats(formats)
  134 
  135             description = media.get('Description')
  136             duration = int_or_none(media.get(
  137                 'DurationInSeconds')) or parse_duration(media.get('Duration'))
  138             view_count = int_or_none(media.get('CountViews'))
  139             average_rating = float_or_none(media.get('UserRating'))
  140             age_limit = parse_age_limit(media.get('Rating'))
  141             genre = media.get('Genre')
  142             release_year = int_or_none(media.get('ReleaseYear'))
  143             creator = media.get('Directors')
  144             artist = media.get('Cast')
  145 
  146             if media.get('MediaTypeDisplayValue') == 'Full Episode':
  147                 series = media.get('ShowName')
  148                 episode = title
  149                 season_number = int_or_none(media.get('Season'))
  150                 episode_number = int_or_none(media.get('Episode'))
  151             else:
  152                 series = episode = season_number = episode_number = None
  153 
  154             subtitles = {}
  155             cc_files = media.get('ClosedCaptionFiles')
  156             if isinstance(cc_files, list):
  157                 for cc_file in cc_files:
  158                     if not isinstance(cc_file, dict):
  159                         continue
  160                     cc_url = url_or_none(cc_file.get('Path'))
  161                     if not cc_url:
  162                         continue
  163                     lang = cc_file.get('Locale') or 'en'
  164                     subtitles.setdefault(lang, []).append({'url': cc_url})
  165 
  166             thumbnails = []
  167             images = media.get('Images')
  168             if isinstance(images, list):
  169                 for image_key, image_url in images.items():
  170                     mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
  171                     if not mobj:
  172                         continue
  173                     thumbnails.append({
  174                         'url': image_url,
  175                         'width': int(mobj.group(1)),
  176                         'height': int(mobj.group(2)),
  177                     })
  178 
  179             return {
  180                 'id': video_id,
  181                 'title': title,
  182                 'description': description,
  183                 'duration': duration,
  184                 'view_count': view_count,
  185                 'average_rating': average_rating,
  186                 'age_limit': age_limit,
  187                 'genre': genre,
  188                 'creator': creator,
  189                 'artist': artist,
  190                 'release_year': release_year,
  191                 'series': series,
  192                 'episode': episode,
  193                 'season_number': season_number,
  194                 'episode_number': episode_number,
  195                 'thumbnails': thumbnails,
  196                 'subtitles': subtitles,
  197                 'formats': formats,
  198             }
  199 
  200         raise last_e

Generated by cgit