summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/cbslocal.py
blob: 3b7e1a8b9f655dc7fcde1e663b538e02175f055b (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .anvato import AnvatoIE
    5 from .sendtonews import SendtoNewsIE
    6 from ..compat import compat_urlparse
    7 from ..utils import (
    8     parse_iso8601,
    9     unified_timestamp,
   10 )
   11 
   12 
   13 class CBSLocalIE(AnvatoIE):
   14     _VALID_URL_BASE = r'https?://[a-z]+\.cbslocal\.com/'
   15     _VALID_URL = _VALID_URL_BASE + r'video/(?P<id>\d+)'
   16 
   17     _TESTS = [{
   18         'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
   19         'info_dict': {
   20             'id': '3580809',
   21             'ext': 'mp4',
   22             'title': 'A Very Blue Anniversary',
   23             'description': 'CBS2’s Cindy Hsu has more.',
   24             'thumbnail': 're:^https?://.*',
   25             'timestamp': int,
   26             'upload_date': r're:^\d{8}$',
   27             'uploader': 'CBS',
   28             'subtitles': {
   29                 'en': 'mincount:5',
   30             },
   31             'categories': [
   32                 'Stations\\Spoken Word\\WCBSTV',
   33                 'Syndication\\AOL',
   34                 'Syndication\\MSN',
   35                 'Syndication\\NDN',
   36                 'Syndication\\Yahoo',
   37                 'Content\\News',
   38                 'Content\\News\\Local News',
   39             ],
   40             'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
   41         },
   42         'params': {
   43             'skip_download': True,
   44         },
   45     }]
   46 
   47     def _real_extract(self, url):
   48         mcp_id = self._match_id(url)
   49         return self.url_result(
   50             'anvato:anvato_cbslocal_app_web_prod_547f3e49241ef0e5d30c79b2efbca5d92c698f67:' + mcp_id, 'Anvato', mcp_id)
   51 
   52 
   53 class CBSLocalArticleIE(AnvatoIE):
   54     _VALID_URL = CBSLocalIE._VALID_URL_BASE + r'\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
   55 
   56     _TESTS = [{
   57         # Anvato backend
   58         'url': 'http://losangeles.cbslocal.com/2016/05/16/safety-advocates-say-fatal-car-seat-failures-are-public-health-crisis',
   59         'md5': 'f0ee3081e3843f575fccef901199b212',
   60         'info_dict': {
   61             'id': '3401037',
   62             'ext': 'mp4',
   63             'title': 'Safety Advocates Say Fatal Car Seat Failures Are \'Public Health Crisis\'',
   64             'description': 'Collapsing seats have been the focus of scrutiny for decades, though experts say remarkably little has been done to address the issue. Randy Paige reports.',
   65             'thumbnail': 're:^https?://.*',
   66             'timestamp': 1463440500,
   67             'upload_date': '20160516',
   68             'uploader': 'CBS',
   69             'subtitles': {
   70                 'en': 'mincount:5',
   71             },
   72             'categories': [
   73                 'Stations\\Spoken Word\\KCBSTV',
   74                 'Syndication\\MSN',
   75                 'Syndication\\NDN',
   76                 'Syndication\\AOL',
   77                 'Syndication\\Yahoo',
   78                 'Syndication\\Tribune',
   79                 'Syndication\\Curb.tv',
   80                 'Content\\News'
   81             ],
   82             'tags': ['CBS 2 News Evening'],
   83         },
   84     }, {
   85         # SendtoNews embed
   86         'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
   87         'info_dict': {
   88             'id': 'GxfCe0Zo7D-175909-5588',
   89         },
   90         'playlist_count': 9,
   91         'params': {
   92             # m3u8 download
   93             'skip_download': True,
   94         },
   95     }]
   96 
   97     def _real_extract(self, url):
   98         display_id = self._match_id(url)
   99         webpage = self._download_webpage(url, display_id)
  100 
  101         sendtonews_url = SendtoNewsIE._extract_url(webpage)
  102         if sendtonews_url:
  103             return self.url_result(
  104                 compat_urlparse.urljoin(url, sendtonews_url),
  105                 ie=SendtoNewsIE.ie_key())
  106 
  107         info_dict = self._extract_anvato_videos(webpage, display_id)
  108 
  109         timestamp = unified_timestamp(self._html_search_regex(
  110             r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
  111             'released date', default=None)) or parse_iso8601(
  112             self._html_search_meta('uploadDate', webpage))
  113 
  114         info_dict.update({
  115             'display_id': display_id,
  116             'timestamp': timestamp,
  117         })
  118 
  119         return info_dict

Generated by cgit