youtube_dl/extractor/videa.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import random
    5 import re
    6 import string
    7 
    8 from .common import InfoExtractor
    9 from ..utils import (
   10     ExtractorError,
   11     int_or_none,
   12     mimetype2ext,
   13     parse_codecs,
   14     update_url_query,
   15     urljoin,
   16     xpath_element,
   17     xpath_text,
   18 )
   19 from ..compat import (
   20     compat_b64decode,
   21     compat_ord,
   22     compat_struct_pack,
   23     compat_urlparse,
   24 )
   25 
   26 
   27 class VideaIE(InfoExtractor):
   28     _VALID_URL = r'''(?x)
   29                     https?://
   30                         videa(?:kid)?\.hu/
   31                         (?:
   32                             videok/(?:[^/]+/)*[^?#&]+-|
   33                             (?:videojs_)?player\?.*?\bv=|
   34                             player/v/
   35                         )
   36                         (?P<id>[^?#&]+)
   37                     '''
   38     _TESTS = [{
   39         'url': 'http://videa.hu/videok/allatok/az-orult-kigyasz-285-kigyot-kigyo-8YfIAjxwWGwT8HVQ',
   40         'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
   41         'info_dict': {
   42             'id': '8YfIAjxwWGwT8HVQ',
   43             'ext': 'mp4',
   44             'title': 'Az őrült kígyász 285 kígyót enged szabadon',
   45             'thumbnail': r're:^https?://.*',
   46             'duration': 21,
   47         },
   48     }, {
   49         'url': 'http://videa.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
   50         'md5': 'd57ccd8812c7fd491d33b1eab8c99975',
   51         'info_dict': {
   52             'id': 'jAHDWfWSJH5XuFhH',
   53             'ext': 'mp4',
   54             'title': 'Supercars előzés',
   55             'thumbnail': r're:^https?://.*',
   56             'duration': 64,
   57         },
   58     }, {
   59         'url': 'http://videa.hu/player?v=8YfIAjxwWGwT8HVQ',
   60         'md5': '97a7af41faeaffd9f1fc864a7c7e7603',
   61         'info_dict': {
   62             'id': '8YfIAjxwWGwT8HVQ',
   63             'ext': 'mp4',
   64             'title': 'Az őrült kígyász 285 kígyót enged szabadon',
   65             'thumbnail': r're:^https?://.*',
   66             'duration': 21,
   67         },
   68     }, {
   69         'url': 'http://videa.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
   70         'only_matching': True,
   71     }, {
   72         'url': 'https://videakid.hu/videok/origo/jarmuvek/supercars-elozes-jAHDWfWSJH5XuFhH',
   73         'only_matching': True,
   74     }, {
   75         'url': 'https://videakid.hu/player?v=8YfIAjxwWGwT8HVQ',
   76         'only_matching': True,
   77     }, {
   78         'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1',
   79         'only_matching': True,
   80     }]
   81     _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p'
   82 
   83     @staticmethod
   84     def _extract_urls(webpage):
   85         return [url for _, url in re.findall(
   86             r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
   87             webpage)]
   88 
   89     @staticmethod
   90     def rc4(cipher_text, key):
   91         res = b''
   92 
   93         key_len = len(key)
   94         S = list(range(256))
   95 
   96         j = 0
   97         for i in range(256):
   98             j = (j + S[i] + ord(key[i % key_len])) % 256
   99             S[i], S[j] = S[j], S[i]
  100 
  101         i = 0
  102         j = 0
  103         for m in range(len(cipher_text)):
  104             i = (i + 1) % 256
  105             j = (j + S[i]) % 256
  106             S[i], S[j] = S[j], S[i]
  107             k = S[(S[i] + S[j]) % 256]
  108             res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m]))
  109 
  110         return res.decode('utf-8')
  111 
  112     def _real_extract(self, url):
  113         video_id = self._match_id(url)
  114         video_page = self._download_webpage(url, video_id)
  115 
  116         if 'videa.hu/player' in url:
  117             player_url = url
  118             player_page = video_page
  119         else:
  120             player_url = self._search_regex(
  121                 r'<iframe.*?src="(/player\?[^"]+)"', video_page, 'player url')
  122             player_url = urljoin(url, player_url)
  123             player_page = self._download_webpage(player_url, video_id)
  124 
  125         nonce = self._search_regex(
  126             r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce')
  127         l = nonce[:32]
  128         s = nonce[32:]
  129         result = ''
  130         for i in range(0, 32):
  131             result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)]
  132 
  133         query = compat_urlparse.parse_qs(compat_urlparse.urlparse(player_url).query)
  134         random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8))
  135         query['_s'] = random_seed
  136         query['_t'] = result[:16]
  137 
  138         b64_info, handle = self._download_webpage_handle(
  139             'http://videa.hu/videaplayer_get_xml.php', video_id, query=query)
  140         if b64_info.startswith('<?xml'):
  141             info = self._parse_xml(b64_info, video_id)
  142         else:
  143             key = result[16:] + random_seed + handle.headers['x-videa-xs']
  144             info = self._parse_xml(self.rc4(
  145                 compat_b64decode(b64_info), key), video_id)
  146 
  147         video = xpath_element(info, './video', 'video')
  148         if video is None:
  149             raise ExtractorError(xpath_element(
  150                 info, './error', fatal=True), expected=True)
  151         sources = xpath_element(
  152             info, './video_sources', 'sources', fatal=True)
  153         hash_values = xpath_element(
  154             info, './hash_values', 'hash values', fatal=False)
  155 
  156         title = xpath_text(video, './title', fatal=True)
  157 
  158         formats = []
  159         for source in sources.findall('./video_source'):
  160             source_url = source.text
  161             source_name = source.get('name')
  162             source_exp = source.get('exp')
  163             if not (source_url and source_name):
  164                 continue
  165             hash_value = (
  166                 xpath_text(hash_values, 'hash_value_' + source_name)
  167                 if hash_values is not None else None)
  168             if hash_value and source_exp:
  169                 source_url = update_url_query(source_url, {
  170                     'md5': hash_value,
  171                     'expires': source_exp,
  172                 })
  173             f = parse_codecs(source.get('codecs'))
  174             f.update({
  175                 'url': self._proto_relative_url(source_url),
  176                 'ext': mimetype2ext(source.get('mimetype')) or 'mp4',
  177                 'format_id': source.get('name'),
  178                 'width': int_or_none(source.get('width')),
  179                 'height': int_or_none(source.get('height')),
  180             })
  181             formats.append(f)
  182         self._sort_formats(formats)
  183 
  184         thumbnail = self._proto_relative_url(xpath_text(video, './poster_src'))
  185 
  186         age_limit = None
  187         is_adult = xpath_text(video, './is_adult_content', default=None)
  188         if is_adult:
  189             age_limit = 18 if is_adult == '1' else 0
  190 
  191         return {
  192             'id': video_id,
  193             'title': title,
  194             'thumbnail': thumbnail,
  195             'duration': int_or_none(xpath_text(video, './duration')),
  196             'age_limit': age_limit,
  197             'formats': formats,
  198         }