youtube_dl/extractor/dctp.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from ..compat import compat_str
    6 from ..utils import (
    7     float_or_none,
    8     unified_strdate,
    9 )
   10 
   11 
   12 class DctpTvIE(InfoExtractor):
   13     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
   14     _TEST = {
   15         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
   16         'info_dict': {
   17             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
   18             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
   19             'ext': 'flv',
   20             'title': 'Videoinstallation für eine Kaufhausfassade',
   21             'description': 'Kurzfilm',
   22             'upload_date': '20110407',
   23             'thumbnail': r're:^https?://.*\.jpg$',
   24             'duration': 71.24,
   25         },
   26         'params': {
   27             # rtmp download
   28             'skip_download': True,
   29         },
   30     }
   31 
   32     def _real_extract(self, url):
   33         display_id = self._match_id(url)
   34 
   35         webpage = self._download_webpage(url, display_id)
   36 
   37         video_id = self._html_search_meta(
   38             'DC.identifier', webpage, 'video id',
   39             default=None) or self._search_regex(
   40             r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
   41 
   42         title = self._og_search_title(webpage)
   43 
   44         servers = self._download_json(
   45             'http://www.dctp.tv/streaming_servers/', display_id,
   46             note='Downloading server list', fatal=False)
   47 
   48         if servers:
   49             endpoint = next(
   50                 server['endpoint']
   51                 for server in servers
   52                 if isinstance(server.get('endpoint'), compat_str) and
   53                 'cloudfront' in server['endpoint'])
   54         else:
   55             endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
   56 
   57         app = self._search_regex(
   58             r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
   59 
   60         formats = [{
   61             'url': endpoint,
   62             'app': app,
   63             'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
   64             'page_url': url,
   65             'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
   66             'ext': 'flv',
   67         }]
   68 
   69         description = self._html_search_meta('DC.description', webpage)
   70         upload_date = unified_strdate(
   71             self._html_search_meta('DC.date.created', webpage))
   72         thumbnail = self._og_search_thumbnail(webpage)
   73         duration = float_or_none(self._search_regex(
   74             r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
   75             default=None), scale=1000)
   76 
   77         return {
   78             'id': video_id,
   79             'title': title,
   80             'formats': formats,
   81             'display_id': display_id,
   82             'description': description,
   83             'upload_date': upload_date,
   84             'thumbnail': thumbnail,
   85             'duration': duration,
   86         }