summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/cspan.py
blob: d5730684dc497b37d7ff57098f5a156ff620e40e (plain)
    1 import re
    2 
    3 from .common import InfoExtractor
    4 from ..utils import (
    5     compat_urllib_parse,
    6 )
    7 
    8 class CSpanIE(InfoExtractor):
    9     _VALID_URL = r'http://www\.c-spanvideo\.org/program/(.*)'
   10     _TEST = {
   11         u'url': u'http://www.c-spanvideo.org/program/HolderonV',
   12         u'file': u'315139.flv',
   13         u'md5': u'74a623266956f69e4df0068ab6c80fe4',
   14         u'info_dict': {
   15             u"title": u"Attorney General Eric Holder on Voting Rights Act Decision"
   16         },
   17         u'skip': u'Requires rtmpdump'
   18     }
   19 
   20     def _real_extract(self, url):
   21         mobj = re.match(self._VALID_URL, url)
   22         prog_name = mobj.group(1)
   23         webpage = self._download_webpage(url, prog_name)
   24         video_id = self._search_regex(r'programid=(.*?)&', webpage, 'video id')
   25         data = compat_urllib_parse.urlencode({'programid': video_id,
   26                                               'dynamic':'1'})
   27         info_url = 'http://www.c-spanvideo.org/common/services/flashXml.php?' + data
   28         video_info = self._download_webpage(info_url, video_id, u'Downloading video info')
   29 
   30         self.report_extraction(video_id)
   31 
   32         title = self._html_search_regex(r'<string name="title">(.*?)</string>',
   33                                         video_info, 'title')
   34         description = self._html_search_regex(r'<meta (?:property="og:|name=")description" content="(.*?)"',
   35                                               webpage, 'description',
   36                                               flags=re.MULTILINE|re.DOTALL)
   37 
   38         url = self._search_regex(r'<string name="URL">(.*?)</string>',
   39                                  video_info, 'video url')
   40         url = url.replace('$(protocol)', 'rtmp').replace('$(port)', '443')
   41         path = self._search_regex(r'<string name="path">(.*?)</string>',
   42                             video_info, 'rtmp play path')
   43 
   44         return {'id': video_id,
   45                 'title': title,
   46                 'ext': 'flv',
   47                 'url': url,
   48                 'play_path': path,
   49                 'description': description,
   50                 'thumbnail': self._og_search_thumbnail(webpage),
   51                 }

Generated by cgit