youtube_dl/extractor/gamersyde.py



    1 from __future__ import unicode_literals
    2 
    3 import re
    4 
    5 from .common import InfoExtractor
    6 from ..utils import (
    7     js_to_json,
    8     parse_duration,
    9     remove_start,
   10 )
   11 
   12 
   13 class GamersydeIE(InfoExtractor):
   14     _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
   15     _TEST = {
   16         'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
   17         'md5': 'f38d400d32f19724570040d5ce3a505f',
   18         'info_dict': {
   19             'id': '34371',
   20             'ext': 'mp4',
   21             'duration': 372,
   22             'title': 'Bloodborne - Birth of a hero',
   23             'thumbnail': r're:^https?://.*\.jpg$',
   24         }
   25     }
   26 
   27     def _real_extract(self, url):
   28         mobj = re.match(self._VALID_URL, url)
   29         video_id = mobj.group('id')
   30         display_id = mobj.group('display_id')
   31 
   32         webpage = self._download_webpage(url, display_id)
   33 
   34         playlist = self._parse_json(
   35             self._search_regex(
   36                 r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
   37             display_id, transform_source=js_to_json)
   38 
   39         formats = []
   40         for source in playlist['sources']:
   41             video_url = source.get('file')
   42             if not video_url:
   43                 continue
   44             format_id = source.get('label')
   45             f = {
   46                 'url': video_url,
   47                 'format_id': format_id,
   48             }
   49             m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
   50             if m:
   51                 f.update({
   52                     'height': int(m.group('height')),
   53                     'fps': int(m.group('fps')),
   54                 })
   55             formats.append(f)
   56         self._sort_formats(formats)
   57 
   58         title = remove_start(playlist['title'], '%s - ' % video_id)
   59         thumbnail = playlist.get('image')
   60         duration = parse_duration(self._search_regex(
   61             r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
   62 
   63         return {
   64             'id': video_id,
   65             'display_id': display_id,
   66             'title': title,
   67             'thumbnail': thumbnail,
   68             'duration': duration,
   69             'formats': formats,
   70         }