summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/golem.py
blob: 47a068e742bc0b1a8ae92f55da4834ea628005ae (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from ..compat import (
    6     compat_str,
    7     compat_urlparse,
    8 )
    9 from ..utils import (
   10     determine_ext,
   11 )
   12 
   13 
   14 class GolemIE(InfoExtractor):
   15     _VALID_URL = r'^https?://video\.golem\.de/.+?/(?P<id>.+?)/'
   16     _TEST = {
   17         'url': 'http://video.golem.de/handy/14095/iphone-6-und-6-plus-test.html',
   18         'md5': 'c1a2c0a3c863319651c7c992c5ee29bf',
   19         'info_dict': {
   20             'id': '14095',
   21             'format_id': 'high',
   22             'ext': 'mp4',
   23             'title': 'iPhone 6 und 6 Plus - Test',
   24             'duration': 300.44,
   25             'filesize': 65309548,
   26         }
   27     }
   28 
   29     _PREFIX = 'http://video.golem.de'
   30 
   31     def _real_extract(self, url):
   32         video_id = self._match_id(url)
   33 
   34         config = self._download_xml(
   35             'https://video.golem.de/xml/{0}.xml'.format(video_id), video_id)
   36 
   37         info = {
   38             'id': video_id,
   39             'title': config.findtext('./title', 'golem'),
   40             'duration': self._float(config.findtext('./playtime'), 'duration'),
   41         }
   42 
   43         formats = []
   44         for e in config:
   45             url = e.findtext('./url')
   46             if not url:
   47                 continue
   48 
   49             formats.append({
   50                 'format_id': compat_str(e.tag),
   51                 'url': compat_urlparse.urljoin(self._PREFIX, url),
   52                 'height': self._int(e.get('height'), 'height'),
   53                 'width': self._int(e.get('width'), 'width'),
   54                 'filesize': self._int(e.findtext('filesize'), 'filesize'),
   55                 'ext': determine_ext(e.findtext('./filename')),
   56             })
   57         self._sort_formats(formats)
   58         info['formats'] = formats
   59 
   60         thumbnails = []
   61         for e in config.findall('.//teaser'):
   62             url = e.findtext('./url')
   63             if not url:
   64                 continue
   65             thumbnails.append({
   66                 'url': compat_urlparse.urljoin(self._PREFIX, url),
   67                 'width': self._int(e.get('width'), 'thumbnail width'),
   68                 'height': self._int(e.get('height'), 'thumbnail height'),
   69             })
   70         info['thumbnails'] = thumbnails
   71 
   72         return info

Generated by cgit