youtube_dl/extractor/vshare.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..compat import compat_chr
    8 from ..utils import (
    9     decode_packed_codes,
   10     ExtractorError,
   11 )
   12 
   13 
   14 class VShareIE(InfoExtractor):
   15     _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
   16     _TESTS = [{
   17         'url': 'https://vshare.io/d/0f64ce6',
   18         'md5': '17b39f55b5497ae8b59f5fbce8e35886',
   19         'info_dict': {
   20             'id': '0f64ce6',
   21             'title': 'vl14062007715967',
   22             'ext': 'mp4',
   23         }
   24     }, {
   25         'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
   26         'only_matching': True,
   27     }]
   28 
   29     @staticmethod
   30     def _extract_urls(webpage):
   31         return re.findall(
   32             r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
   33             webpage)
   34 
   35     def _extract_packed(self, webpage):
   36         packed = self._search_regex(
   37             r'(eval\(function.+)', webpage, 'packed code')
   38         unpacked = decode_packed_codes(packed)
   39         digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
   40         digits = [int(digit) for digit in digits.split(',')]
   41         key_digit = self._search_regex(
   42             r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
   43         chars = [compat_chr(d - int(key_digit)) for d in digits]
   44         return ''.join(chars)
   45 
   46     def _real_extract(self, url):
   47         video_id = self._match_id(url)
   48 
   49         webpage = self._download_webpage(
   50             'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
   51             video_id)
   52 
   53         title = self._html_search_regex(
   54             r'<title>([^<]+)</title>', webpage, 'title')
   55         title = title.split(' - ')[0]
   56 
   57         error = self._html_search_regex(
   58             r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
   59             'error', default=None)
   60         if error:
   61             raise ExtractorError(error, expected=True)
   62 
   63         info = self._parse_html5_media_entries(
   64             url, '<video>%s</video>' % self._extract_packed(webpage),
   65             video_id)[0]
   66 
   67         self._sort_formats(info['formats'])
   68 
   69         info.update({
   70             'id': video_id,
   71             'title': title,
   72         })
   73 
   74         return info