summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/cbslocal.py
blob: 8d5f11dd11de8bb85a9f6a2ddc86710a65c56a94 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .anvato import AnvatoIE
    5 from .sendtonews import SendtoNewsIE
    6 from ..compat import compat_urlparse
    7 from ..utils import (
    8     parse_iso8601,
    9     unified_timestamp,
   10 )
   11 
   12 
   13 class CBSLocalIE(AnvatoIE):
   14     _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
   15 
   16     _TESTS = [{
   17         # Anvato backend
   18         'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
   19         'md5': 'f0ee3081e3843f575fccef901199b212',
   20         'info_dict': {
   21             'id': '3401037',
   22             'ext': 'mp4',
   23             'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
   24             'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
   25             'thumbnail': 're:^https?://.*',
   26             'timestamp': 1463440500,
   27             'upload_date': '20160516',
   28             'uploader': 'CBS',
   29             'subtitles': {
   30                 'en': 'mincount:5',
   31             },
   32             'categories': [
   33                 'Stations\\Spoken Word\\KCBSTV',
   34                 'Syndication\\MSN',
   35                 'Syndication\\NDN',
   36                 'Syndication\\AOL',
   37                 'Syndication\\Yahoo',
   38                 'Syndication\\Tribune',
   39                 'Syndication\\Curb.tv',
   40                 'Content\\News'
   41             ],
   42             'tags': ['CBS 2 News Evening'],
   43         },
   44     }, {
   45         # SendtoNews embed
   46         'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
   47         'info_dict': {
   48             'id': 'GxfCe0Zo7D-175909-5588',
   49         },
   50         'playlist_count': 9,
   51         'params': {
   52             # m3u8 download
   53             'skip_download': True,
   54         },
   55     }, {
   56         'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
   57         'info_dict': {
   58             'id': '3580809',
   59             'ext': 'mp4',
   60             'title': 'A Very Blue Anniversary',
   61             'description': 'CBS2’s Cindy Hsu has more.',
   62             'thumbnail': 're:^https?://.*',
   63             'timestamp': 1479962220,
   64             'upload_date': '20161124',
   65             'uploader': 'CBS',
   66             'subtitles': {
   67                 'en': 'mincount:5',
   68             },
   69             'categories': [
   70                 'Stations\\Spoken Word\\WCBSTV',
   71                 'Syndication\\AOL',
   72                 'Syndication\\MSN',
   73                 'Syndication\\NDN',
   74                 'Syndication\\Yahoo',
   75                 'Content\\News',
   76                 'Content\\News\\Local News',
   77             ],
   78             'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
   79         },
   80     }]
   81 
   82     def _real_extract(self, url):
   83         display_id = self._match_id(url)
   84         webpage = self._download_webpage(url, display_id)
   85 
   86         sendtonews_url = SendtoNewsIE._extract_url(webpage)
   87         if sendtonews_url:
   88             return self.url_result(
   89                 compat_urlparse.urljoin(url, sendtonews_url),
   90                 ie=SendtoNewsIE.ie_key())
   91 
   92         info_dict = self._extract_anvato_videos(webpage, display_id)
   93 
   94         time_str = self._html_search_regex(
   95             r'class="entry-date">([^<]+)<', webpage, 'released date', default=None)
   96         if time_str:
   97             timestamp = unified_timestamp(time_str)
   98         else:
   99             timestamp = parse_iso8601(self._html_search_meta('uploadDate', webpage))
  100 
  101         info_dict.update({
  102             'display_id': display_id,
  103             'timestamp': timestamp,
  104         })
  105 
  106         return info_dict

Generated by cgit