summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/infoq.py
blob: cca0b8a9323c0d2412c65610a3acb3ef2943ba6f (plain)
    1 # coding: utf-8
    2 
    3 from __future__ import unicode_literals
    4 
    5 import base64
    6 
    7 from ..compat import compat_urllib_parse_unquote
    8 from ..utils import determine_ext
    9 from .bokecc import BokeCCBaseIE
   10 
   11 
   12 class InfoQIE(BokeCCBaseIE):
   13     _VALID_URL = r'https?://(?:www\.)?infoq\.com/(?:[^/]+/)+(?P<id>[^/]+)'
   14 
   15     _TESTS = [{
   16         'url': 'http://www.infoq.com/presentations/A-Few-of-My-Favorite-Python-Things',
   17         'md5': 'b5ca0e0a8c1fed93b0e65e48e462f9a2',
   18         'info_dict': {
   19             'id': 'A-Few-of-My-Favorite-Python-Things',
   20             'ext': 'mp4',
   21             'description': 'Mike Pirnat presents some tips and tricks, standard libraries and third party packages that make programming in Python a richer experience.',
   22             'title': 'A Few of My Favorite [Python] Things',
   23         },
   24     }, {
   25         'url': 'http://www.infoq.com/fr/presentations/changez-avis-sur-javascript',
   26         'only_matching': True,
   27     }, {
   28         'url': 'http://www.infoq.com/cn/presentations/openstack-continued-delivery',
   29         'md5': '4918d0cca1497f2244572caf626687ef',
   30         'info_dict': {
   31             'id': 'openstack-continued-delivery',
   32             'title': 'OpenStack持续交付之路',
   33             'ext': 'flv',
   34             'description': 'md5:308d981fb28fa42f49f9568322c683ff',
   35         },
   36     }]
   37 
   38     def _extract_rtmp_videos(self, webpage):
   39         # The server URL is hardcoded
   40         video_url = 'rtmpe://video.infoq.com/cfx/st/'
   41 
   42         # Extract video URL
   43         encoded_id = self._search_regex(
   44             r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id', default=None)
   45 
   46         real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8'))
   47         playpath = 'mp4:' + real_id
   48 
   49         return [{
   50             'format_id': 'rtmp',
   51             'url': video_url,
   52             'ext': determine_ext(playpath),
   53             'play_path': playpath,
   54         }]
   55 
   56     def _extract_http_videos(self, webpage):
   57         http_video_url = self._search_regex(r'P\.s\s*=\s*\'([^\']+)\'', webpage, 'video URL')
   58 
   59         policy = self._search_regex(r'InfoQConstants.scp\s*=\s*\'([^\']+)\'', webpage, 'policy')
   60         signature = self._search_regex(r'InfoQConstants.scs\s*=\s*\'([^\']+)\'', webpage, 'signature')
   61         key_pair_id = self._search_regex(r'InfoQConstants.sck\s*=\s*\'([^\']+)\'', webpage, 'key-pair-id')
   62 
   63         return [{
   64             'format_id': 'http',
   65             'url': http_video_url,
   66             'http_headers': {
   67                 'Cookie': 'CloudFront-Policy=%s; CloudFront-Signature=%s; CloudFront-Key-Pair-Id=%s' % (
   68                     policy, signature, key_pair_id),
   69             },
   70         }]
   71 
   72     def _real_extract(self, url):
   73         video_id = self._match_id(url)
   74         webpage = self._download_webpage(url, video_id)
   75 
   76         video_title = self._html_search_regex(r'<title>(.*?)</title>', webpage, 'title')
   77         video_description = self._html_search_meta('description', webpage, 'description')
   78 
   79         if '/cn/' in url:
   80             # for China videos, HTTP video URL exists but always fails with 403
   81             formats = self._extract_bokecc_formats(webpage, video_id)
   82         else:
   83             formats = self._extract_rtmp_videos(webpage) + self._extract_http_videos(webpage)
   84 
   85         self._sort_formats(formats)
   86 
   87         return {
   88             'id': video_id,
   89             'title': video_title,
   90             'description': video_description,
   91             'formats': formats,
   92         }

Generated by cgit