youtube_dl/extractor/drtv.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from ..utils import (
    6     ExtractorError,
    7     int_or_none,
    8     float_or_none,
    9     mimetype2ext,
   10     parse_iso8601,
   11     remove_end,
   12     update_url_query,
   13 )
   14 
   15 
   16 class DRTVIE(InfoExtractor):
   17     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv/se|nyheder|radio/ondemand)/(?:[^/]+/)*(?P<id>[\da-z-]+)(?:[/#?]|$)'
   18     _GEO_BYPASS = False
   19     _GEO_COUNTRIES = ['DK']
   20     IE_NAME = 'drtv'
   21     _TESTS = [{
   22         'url': 'https://www.dr.dk/tv/se/boern/ultra/klassen-ultra/klassen-darlig-taber-10',
   23         'md5': '7ae17b4e18eb5d29212f424a7511c184',
   24         'info_dict': {
   25             'id': 'klassen-darlig-taber-10',
   26             'ext': 'mp4',
   27             'title': 'Klassen - Dårlig taber (10)',
   28             'description': 'md5:815fe1b7fa656ed80580f31e8b3c79aa',
   29             'timestamp': 1471991907,
   30             'upload_date': '20160823',
   31             'duration': 606.84,
   32         },
   33     }, {
   34         # embed
   35         'url': 'https://www.dr.dk/nyheder/indland/live-christianias-rydning-af-pusher-street-er-i-gang',
   36         'info_dict': {
   37             'id': 'christiania-pusher-street-ryddes-drdkrjpo',
   38             'ext': 'mp4',
   39             'title': 'LIVE Christianias rydning af Pusher Street er i gang',
   40             'description': 'md5:2a71898b15057e9b97334f61d04e6eb5',
   41             'timestamp': 1472800279,
   42             'upload_date': '20160902',
   43             'duration': 131.4,
   44         },
   45         'params': {
   46             'skip_download': True,
   47         },
   48     }, {
   49         # with SignLanguage formats
   50         'url': 'https://www.dr.dk/tv/se/historien-om-danmark/-/historien-om-danmark-stenalder',
   51         'info_dict': {
   52             'id': 'historien-om-danmark-stenalder',
   53             'ext': 'mp4',
   54             'title': 'Historien om Danmark: Stenalder (1)',
   55             'description': 'md5:8c66dcbc1669bbc6f873879880f37f2a',
   56             'timestamp': 1490401996,
   57             'upload_date': '20170325',
   58             'duration': 3502.04,
   59             'formats': 'mincount:20',
   60         },
   61         'params': {
   62             'skip_download': True,
   63         },
   64     }]
   65 
   66     def _real_extract(self, url):
   67         video_id = self._match_id(url)
   68 
   69         webpage = self._download_webpage(url, video_id)
   70 
   71         if '>Programmet er ikke længere tilgængeligt' in webpage:
   72             raise ExtractorError(
   73                 'Video %s is not available' % video_id, expected=True)
   74 
   75         video_id = self._search_regex(
   76             (r'data-(?:material-identifier|episode-slug)="([^"]+)"',
   77                 r'data-resource="[^>"]+mu/programcard/expanded/([^"]+)"'),
   78             webpage, 'video id')
   79 
   80         programcard = self._download_json(
   81             'http://www.dr.dk/mu/programcard/expanded/%s' % video_id,
   82             video_id, 'Downloading video JSON')
   83         data = programcard['Data'][0]
   84 
   85         title = remove_end(self._og_search_title(
   86             webpage, default=None), ' | TV | DR') or data['Title']
   87         description = self._og_search_description(
   88             webpage, default=None) or data.get('Description')
   89 
   90         timestamp = parse_iso8601(data.get('CreatedTime'))
   91 
   92         thumbnail = None
   93         duration = None
   94 
   95         restricted_to_denmark = False
   96 
   97         formats = []
   98         subtitles = {}
   99 
  100         for asset in data['Assets']:
  101             kind = asset.get('Kind')
  102             if kind == 'Image':
  103                 thumbnail = asset.get('Uri')
  104             elif kind in ('VideoResource', 'AudioResource'):
  105                 duration = float_or_none(asset.get('DurationInMilliseconds'), 1000)
  106                 restricted_to_denmark = asset.get('RestrictedToDenmark')
  107                 asset_target = asset.get('Target')
  108                 for link in asset.get('Links', []):
  109                     uri = link.get('Uri')
  110                     if not uri:
  111                         continue
  112                     target = link.get('Target')
  113                     format_id = target or ''
  114                     preference = None
  115                     if asset_target in ('SpokenSubtitles', 'SignLanguage'):
  116                         preference = -1
  117                         format_id += '-%s' % asset_target
  118                     if target == 'HDS':
  119                         f4m_formats = self._extract_f4m_formats(
  120                             uri + '?hdcore=3.3.0&plugin=aasp-3.3.0.99.43',
  121                             video_id, preference, f4m_id=format_id, fatal=False)
  122                         if kind == 'AudioResource':
  123                             for f in f4m_formats:
  124                                 f['vcodec'] = 'none'
  125                         formats.extend(f4m_formats)
  126                     elif target == 'HLS':
  127                         formats.extend(self._extract_m3u8_formats(
  128                             uri, video_id, 'mp4', entry_protocol='m3u8_native',
  129                             preference=preference, m3u8_id=format_id,
  130                             fatal=False))
  131                     else:
  132                         bitrate = link.get('Bitrate')
  133                         if bitrate:
  134                             format_id += '-%s' % bitrate
  135                         formats.append({
  136                             'url': uri,
  137                             'format_id': format_id,
  138                             'tbr': int_or_none(bitrate),
  139                             'ext': link.get('FileFormat'),
  140                             'vcodec': 'none' if kind == 'AudioResource' else None,
  141                             'preference': preference,
  142                         })
  143                 subtitles_list = asset.get('SubtitlesList')
  144                 if isinstance(subtitles_list, list):
  145                     LANGS = {
  146                         'Danish': 'da',
  147                     }
  148                     for subs in subtitles_list:
  149                         if not subs.get('Uri'):
  150                             continue
  151                         lang = subs.get('Language') or 'da'
  152                         subtitles.setdefault(LANGS.get(lang, lang), []).append({
  153                             'url': subs['Uri'],
  154                             'ext': mimetype2ext(subs.get('MimeType')) or 'vtt'
  155                         })
  156 
  157         if not formats and restricted_to_denmark:
  158             self.raise_geo_restricted(
  159                 'Unfortunately, DR is not allowed to show this program outside Denmark.',
  160                 countries=self._GEO_COUNTRIES)
  161 
  162         self._sort_formats(formats)
  163 
  164         return {
  165             'id': video_id,
  166             'title': title,
  167             'description': description,
  168             'thumbnail': thumbnail,
  169             'timestamp': timestamp,
  170             'duration': duration,
  171             'formats': formats,
  172             'subtitles': subtitles,
  173         }
  174 
  175 
  176 class DRTVLiveIE(InfoExtractor):
  177     IE_NAME = 'drtv:live'
  178     _VALID_URL = r'https?://(?:www\.)?dr\.dk/(?:tv|TV)/live/(?P<id>[\da-z-]+)'
  179     _GEO_COUNTRIES = ['DK']
  180     _TEST = {
  181         'url': 'https://www.dr.dk/tv/live/dr1',
  182         'info_dict': {
  183             'id': 'dr1',
  184             'ext': 'mp4',
  185             'title': 're:^DR1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  186         },
  187         'params': {
  188             # m3u8 download
  189             'skip_download': True,
  190         },
  191     }
  192 
  193     def _real_extract(self, url):
  194         channel_id = self._match_id(url)
  195         channel_data = self._download_json(
  196             'https://www.dr.dk/mu-online/api/1.0/channel/' + channel_id,
  197             channel_id)
  198         title = self._live_title(channel_data['Title'])
  199 
  200         formats = []
  201         for streaming_server in channel_data.get('StreamingServers', []):
  202             server = streaming_server.get('Server')
  203             if not server:
  204                 continue
  205             link_type = streaming_server.get('LinkType')
  206             for quality in streaming_server.get('Qualities', []):
  207                 for stream in quality.get('Streams', []):
  208                     stream_path = stream.get('Stream')
  209                     if not stream_path:
  210                         continue
  211                     stream_url = update_url_query(
  212                         '%s/%s' % (server, stream_path), {'b': ''})
  213                     if link_type == 'HLS':
  214                         formats.extend(self._extract_m3u8_formats(
  215                             stream_url, channel_id, 'mp4',
  216                             m3u8_id=link_type, fatal=False, live=True))
  217                     elif link_type == 'HDS':
  218                         formats.extend(self._extract_f4m_formats(update_url_query(
  219                             '%s/%s' % (server, stream_path), {'hdcore': '3.7.0'}),
  220                             channel_id, f4m_id=link_type, fatal=False))
  221         self._sort_formats(formats)
  222 
  223         return {
  224             'id': channel_id,
  225             'title': title,
  226             'thumbnail': channel_data.get('PrimaryImageUri'),
  227             'formats': formats,
  228             'is_live': True,
  229         }