summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/twentymin.py
blob: b721ecb0a106a710b6d140d7d21309307196a684 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..utils import remove_end
    8 
    9 
   10 class TwentyMinutenIE(InfoExtractor):
   11     IE_NAME = '20min'
   12     _VALID_URL = r'https?://(?:www\.)?20min\.ch/(?:videotv/*\?.*\bvid=(?P<id>\d+)|(?:[^/]+/)*(?P<display_id>[^/#?]+))'
   13     _TESTS = [{
   14         # regular video
   15         'url': 'http://www.20min.ch/videotv/?vid=469148&cid=2',
   16         'md5': 'b52d6bc6ea6398e6a38f12cfd418149c',
   17         'info_dict': {
   18             'id': '469148',
   19             'ext': 'flv',
   20             'title': '85 000 Franken für 15 perfekte Minuten',
   21             'description': 'Was die Besucher vom Silvesterzauber erwarten können. (Video: Alice Grosjean/Murat Temel)',
   22             'thumbnail': 'http://thumbnails.20min-tv.ch/server063/469148/frame-72-469148.jpg'
   23         }
   24     }, {
   25         # news article with video
   26         'url': 'http://www.20min.ch/schweiz/news/story/-Wir-muessen-mutig-nach-vorne-schauen--22050469',
   27         'md5': 'cd4cbb99b94130cff423e967cd275e5e',
   28         'info_dict': {
   29             'id': '469408',
   30             'display_id': '-Wir-muessen-mutig-nach-vorne-schauen--22050469',
   31             'ext': 'flv',
   32             'title': '«Wir müssen mutig nach vorne schauen»',
   33             'description': 'Kein Land sei innovativer als die Schweiz, sagte Johann Schneider-Ammann in seiner Neujahrsansprache. Das Land müsse aber seine Hausaufgaben machen.',
   34             'thumbnail': 'http://www.20min.ch/images/content/2/2/0/22050469/10/teaserbreit.jpg'
   35         },
   36         'skip': '"This video is no longer available" is shown both on the web page and in the downloaded file.',
   37     }, {
   38         # YouTube embed
   39         'url': 'http://www.20min.ch/ro/sports/football/story/Il-marque-une-bicyclette-de-plus-de-30-metres--21115184',
   40         'md5': 'cec64d59aa01c0ed9dbba9cf639dd82f',
   41         'info_dict': {
   42             'id': 'ivM7A7SpDOs',
   43             'ext': 'mp4',
   44             'title': 'GOLAZO DE CHILENA DE JAVI GÓMEZ, FINALISTA AL BALÓN DE CLM 2016',
   45             'description': 'md5:903c92fbf2b2f66c09de514bc25e9f5a',
   46             'upload_date': '20160424',
   47             'uploader': 'RTVCM Castilla-La Mancha',
   48             'uploader_id': 'RTVCM',
   49         },
   50         'add_ie': ['Youtube'],
   51     }, {
   52         'url': 'http://www.20min.ch/videotv/?cid=44&vid=468738',
   53         'only_matching': True,
   54     }, {
   55         'url': 'http://www.20min.ch/ro/sortir/cinema/story/Grandir-au-bahut--c-est-dur-18927411',
   56         'only_matching': True,
   57     }]
   58 
   59     def _real_extract(self, url):
   60         mobj = re.match(self._VALID_URL, url)
   61         video_id = mobj.group('id')
   62         display_id = mobj.group('display_id') or video_id
   63 
   64         webpage = self._download_webpage(url, display_id)
   65 
   66         youtube_url = self._html_search_regex(
   67             r'<iframe[^>]+src="((?:https?:)?//www\.youtube\.com/embed/[^"]+)"',
   68             webpage, 'YouTube embed URL', default=None)
   69         if youtube_url is not None:
   70             return self.url_result(youtube_url, 'Youtube')
   71 
   72         title = self._html_search_regex(
   73             r'<h1>.*?<span>(.+?)</span></h1>',
   74             webpage, 'title', default=None)
   75         if not title:
   76             title = remove_end(re.sub(
   77                 r'^20 [Mm]inuten.*? -', '', self._og_search_title(webpage)), ' - News')
   78 
   79         if not video_id:
   80             video_id = self._search_regex(
   81                 r'"file\d?"\s*,\s*\"(\d+)', webpage, 'video id')
   82 
   83         description = self._html_search_meta(
   84             'description', webpage, 'description')
   85         thumbnail = self._og_search_thumbnail(webpage)
   86 
   87         return {
   88             'id': video_id,
   89             'display_id': display_id,
   90             'url': 'http://speed.20min-tv.ch/%sm.flv' % video_id,
   91             'title': title,
   92             'description': description,
   93             'thumbnail': thumbnail,
   94         }

Generated by cgit