summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/telegraaf.py
blob: 5174898f2ab0da614c731ebd6a82234ca5b90f73 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from ..utils import (
    6     determine_ext,
    7     int_or_none,
    8     parse_iso8601,
    9     try_get,
   10 )
   11 
   12 
   13 class TelegraafIE(InfoExtractor):
   14     _VALID_URL = r'https?://(?:www\.)?telegraaf\.nl/video/(?P<id>\d+)'
   15     _TEST = {
   16         'url': 'https://www.telegraaf.nl/video/734366489/historisch-scheepswrak-slaat-na-100-jaar-los',
   17         'info_dict': {
   18             'id': 'gaMItuoSeUg2',
   19             'ext': 'mp4',
   20             'title': 'Historisch scheepswrak slaat na 100 jaar los',
   21             'description': 'md5:6f53b7c4f55596722ac24d6c0ec00cfb',
   22             'thumbnail': r're:^https?://.*\.jpg',
   23             'duration': 55,
   24             'timestamp': 1572805527,
   25             'upload_date': '20191103',
   26         },
   27         'params': {
   28             # m3u8 download
   29             'skip_download': True,
   30         },
   31     }
   32 
   33     def _real_extract(self, url):
   34         article_id = self._match_id(url)
   35 
   36         video_id = self._download_json(
   37             'https://app.telegraaf.nl/graphql', article_id,
   38             headers={'User-Agent': 'De Telegraaf/6.8.11 (Android 11; en_US)'},
   39             query={
   40                 'query': '''{
   41   article(uid: %s) {
   42     videos {
   43       videoId
   44     }
   45   }
   46 }''' % article_id,
   47             })['data']['article']['videos'][0]['videoId']
   48 
   49         item = self._download_json(
   50             'https://content.tmgvideo.nl/playlist/item=%s/playlist.json' % video_id,
   51             video_id)['items'][0]
   52         title = item['title']
   53 
   54         formats = []
   55         locations = item.get('locations') or {}
   56         for location in locations.get('adaptive', []):
   57             manifest_url = location.get('src')
   58             if not manifest_url:
   59                 continue
   60             ext = determine_ext(manifest_url)
   61             if ext == 'm3u8':
   62                 formats.extend(self._extract_m3u8_formats(
   63                     manifest_url, video_id, ext='mp4', m3u8_id='hls', fatal=False))
   64             elif ext == 'mpd':
   65                 formats.extend(self._extract_mpd_formats(
   66                     manifest_url, video_id, mpd_id='dash', fatal=False))
   67             else:
   68                 self.report_warning('Unknown adaptive format %s' % ext)
   69         for location in locations.get('progressive', []):
   70             src = try_get(location, lambda x: x['sources'][0]['src'])
   71             if not src:
   72                 continue
   73             label = location.get('label')
   74             formats.append({
   75                 'url': src,
   76                 'width': int_or_none(location.get('width')),
   77                 'height': int_or_none(location.get('height')),
   78                 'format_id': 'http' + ('-%s' % label if label else ''),
   79             })
   80 
   81         self._sort_formats(formats)
   82 
   83         return {
   84             'id': video_id,
   85             'title': title,
   86             'description': item.get('description'),
   87             'formats': formats,
   88             'duration': int_or_none(item.get('duration')),
   89             'thumbnail': item.get('poster'),
   90             'timestamp': parse_iso8601(item.get('datecreated'), ' '),
   91         }

Generated by cgit