youtube_dl/extractor/yapfiles.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..utils import (
    8     ExtractorError,
    9     int_or_none,
   10     qualities,
   11     unescapeHTML,
   12     url_or_none,
   13 )
   14 
   15 
   16 class YapFilesIE(InfoExtractor):
   17     _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
   18     _VALID_URL = r'https?:%s' % _YAPFILES_URL
   19     _TESTS = [{
   20         # with hd
   21         'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
   22         'md5': '2db19e2bfa2450568868548a1aa1956c',
   23         'info_dict': {
   24             'id': 'vMDE1NjcyNDUt0413',
   25             'ext': 'mp4',
   26             'title': 'Самый худший пароль WIFI',
   27             'thumbnail': r're:^https?://.*\.jpg$',
   28             'duration': 72,
   29         },
   30     }, {
   31         # without hd
   32         'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
   33         'only_matching': True,
   34     }]
   35 
   36     @staticmethod
   37     def _extract_urls(webpage):
   38         return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
   39             r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
   40             % YapFilesIE._YAPFILES_URL, webpage)]
   41 
   42     def _real_extract(self, url):
   43         video_id = self._match_id(url)
   44 
   45         webpage = self._download_webpage(url, video_id, fatal=False)
   46 
   47         player_url = None
   48         query = {}
   49         if webpage:
   50             player_url = self._search_regex(
   51                 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
   52                 'player url', default=None, group='url')
   53 
   54         if not player_url:
   55             player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
   56             query = {
   57                 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
   58                 'type': 'json',
   59                 'ref': url,
   60             }
   61 
   62         player = self._download_json(
   63             player_url, video_id, query=query)['player']
   64 
   65         playlist_url = player['playlist']
   66         title = player['title']
   67         thumbnail = player.get('poster')
   68 
   69         if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
   70             raise ExtractorError(
   71                 'Video %s has been removed' % video_id, expected=True)
   72 
   73         playlist = self._download_json(
   74             playlist_url, video_id)['player']['main']
   75 
   76         hd_height = int_or_none(player.get('hd'))
   77 
   78         QUALITIES = ('sd', 'hd')
   79         quality_key = qualities(QUALITIES)
   80         formats = []
   81         for format_id in QUALITIES:
   82             is_hd = format_id == 'hd'
   83             format_url = url_or_none(playlist.get(
   84                 'file%s' % ('_hd' if is_hd else '')))
   85             if not format_url:
   86                 continue
   87             formats.append({
   88                 'url': format_url,
   89                 'format_id': format_id,
   90                 'quality': quality_key(format_id),
   91                 'height': hd_height if is_hd else None,
   92             })
   93         self._sort_formats(formats)
   94 
   95         return {
   96             'id': video_id,
   97             'title': title,
   98             'thumbnail': thumbnail,
   99             'duration': int_or_none(player.get('length')),
  100             'formats': formats,
  101         }