summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/ynet.py
blob: c4ae4d88eb0f64dd24d63c7e882ab49552a962c2 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 import json
    6 
    7 from .common import InfoExtractor
    8 from ..compat import compat_urllib_parse_unquote_plus
    9 
   10 
   11 class YnetIE(InfoExtractor):
   12     _VALID_URL = r'https?://(?:.+?\.)?ynet\.co\.il/(?:.+?/)?0,7340,(?P<id>L(?:-[0-9]+)+),00\.html'
   13     _TESTS = [
   14         {
   15             'url': 'http://hot.ynet.co.il/home/0,7340,L-11659-99244,00.html',
   16             'info_dict': {
   17                 'id': 'L-11659-99244',
   18                 'ext': 'flv',
   19                 'title': 'איש לא יודע מאיפה באנו',
   20                 'thumbnail': r're:^https?://.*\.jpg',
   21             }
   22         }, {
   23             'url': 'http://hot.ynet.co.il/home/0,7340,L-8859-84418,00.html',
   24             'info_dict': {
   25                 'id': 'L-8859-84418',
   26                 'ext': 'flv',
   27                 'title': "צפו: הנשיקה הלוהטת של תורגי' ויוליה פלוטקין",
   28                 'thumbnail': r're:^https?://.*\.jpg',
   29             }
   30         }
   31     ]
   32 
   33     def _real_extract(self, url):
   34         video_id = self._match_id(url)
   35         webpage = self._download_webpage(url, video_id)
   36 
   37         content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage))
   38         config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config'))
   39         f4m_url = config['clip']['url']
   40         title = self._og_search_title(webpage)
   41         m = re.search(r'ynet - HOT -- (["\']+)(?P<title>.+?)\1', title)
   42         if m:
   43             title = m.group('title')
   44         formats = self._extract_f4m_formats(f4m_url, video_id)
   45         self._sort_formats(formats)
   46 
   47         return {
   48             'id': video_id,
   49             'title': title,
   50             'formats': formats,
   51             'thumbnail': self._og_search_thumbnail(webpage),
   52         }

Generated by cgit