summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/biqle.py
blob: 17ebbb25766bb500e6401f55b6105c37fcfd25f5 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from .vk import VKIE
    6 from ..compat import (
    7     compat_b64decode,
    8     compat_urllib_parse_unquote,
    9 )
   10 from ..utils import int_or_none
   11 
   12 
   13 class BIQLEIE(InfoExtractor):
   14     _VALID_URL = r'https?://(?:www\.)?biqle\.(?:com|org|ru)/watch/(?P<id>-?\d+_\d+)'
   15     _TESTS = [{
   16         # Youtube embed
   17         'url': 'https://biqle.ru/watch/-115995369_456239081',
   18         'md5': '97af5a06ee4c29bbf9c001bdb1cf5c06',
   19         'info_dict': {
   20             'id': '8v4f-avW-VI',
   21             'ext': 'mp4',
   22             'title': "PASSE-PARTOUT - L'ete c'est fait pour jouer",
   23             'description': 'Passe-Partout',
   24             'uploader_id': 'mrsimpsonstef3',
   25             'uploader': 'Phanolito',
   26             'upload_date': '20120822',
   27         },
   28     }, {
   29         'url': 'http://biqle.org/watch/-44781847_168547604',
   30         'md5': '7f24e72af1db0edf7c1aaba513174f97',
   31         'info_dict': {
   32             'id': '-44781847_168547604',
   33             'ext': 'mp4',
   34             'title': 'Ребенок в шоке от автоматической мойки',
   35             'timestamp': 1396633454,
   36             'uploader': 'Dmitry Kotov',
   37             'upload_date': '20140404',
   38             'uploader_id': '47850140',
   39         },
   40     }]
   41 
   42     def _real_extract(self, url):
   43         video_id = self._match_id(url)
   44         webpage = self._download_webpage(url, video_id)
   45         embed_url = self._proto_relative_url(self._search_regex(
   46             r'<iframe.+?src="((?:https?:)?//(?:daxab\.com|dxb\.to|[^/]+/player)/[^"]+)".*?></iframe>',
   47             webpage, 'embed url'))
   48         if VKIE.suitable(embed_url):
   49             return self.url_result(embed_url, VKIE.ie_key(), video_id)
   50 
   51         embed_page = self._download_webpage(
   52             embed_url, video_id, headers={'Referer': url})
   53         video_ext = self._get_cookies(embed_url).get('video_ext')
   54         if video_ext:
   55             video_ext = compat_urllib_parse_unquote(video_ext.value)
   56         if not video_ext:
   57             video_ext = compat_b64decode(self._search_regex(
   58                 r'video_ext\s*:\s*[\'"]([A-Za-z0-9+/=]+)',
   59                 embed_page, 'video_ext')).decode()
   60         video_id, sig, _, access_token = video_ext.split(':')
   61         item = self._download_json(
   62             'https://api.vk.com/method/video.get', video_id,
   63             headers={'User-Agent': 'okhttp/3.4.1'}, query={
   64                 'access_token': access_token,
   65                 'sig': sig,
   66                 'v': 5.44,
   67                 'videos': video_id,
   68             })['response']['items'][0]
   69         title = item['title']
   70 
   71         formats = []
   72         for f_id, f_url in item.get('files', {}).items():
   73             if f_id == 'external':
   74                 return self.url_result(f_url)
   75             ext, height = f_id.split('_')
   76             formats.append({
   77                 'format_id': height + 'p',
   78                 'url': f_url,
   79                 'height': int_or_none(height),
   80                 'ext': ext,
   81             })
   82         self._sort_formats(formats)
   83 
   84         thumbnails = []
   85         for k, v in item.items():
   86             if k.startswith('photo_') and v:
   87                 width = k.replace('photo_', '')
   88                 thumbnails.append({
   89                     'id': width,
   90                     'url': v,
   91                     'width': int_or_none(width),
   92                 })
   93 
   94         return {
   95             'id': video_id,
   96             'title': title,
   97             'formats': formats,
   98             'comment_count': int_or_none(item.get('comments')),
   99             'description': item.get('description'),
  100             'duration': int_or_none(item.get('duration')),
  101             'thumbnails': thumbnails,
  102             'timestamp': int_or_none(item.get('date')),
  103             'uploader': item.get('owner_id'),
  104             'view_count': int_or_none(item.get('views')),
  105         }

Generated by cgit