youtube_dl/extractor/steam.py



    1 from __future__ import unicode_literals
    2 
    3 import re
    4 
    5 from .common import InfoExtractor
    6 from ..utils import (
    7     ExtractorError,
    8     unescapeHTML,
    9 )
   10 
   11 
   12 class SteamIE(InfoExtractor):
   13     _VALID_URL = r"""(?x)
   14         https?://store\.steampowered\.com/
   15             (agecheck/)?
   16             (?P<urltype>video|app)/ #If the page is only for videos or for a game
   17             (?P<gameID>\d+)/?
   18             (?P<videoID>\d*)(?P<extra>\??) # For urltype == video we sometimes get the videoID
   19         |
   20         https?://(?:www\.)?steamcommunity\.com/sharedfiles/filedetails/\?id=(?P<fileID>[0-9]+)
   21     """
   22     _VIDEO_PAGE_TEMPLATE = 'http://store.steampowered.com/video/%s/'
   23     _AGECHECK_TEMPLATE = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970'
   24     _TESTS = [{
   25         'url': 'http://store.steampowered.com/video/105600/',
   26         'playlist': [
   27             {
   28                 'md5': 'f870007cee7065d7c76b88f0a45ecc07',
   29                 'info_dict': {
   30                     'id': '81300',
   31                     'ext': 'flv',
   32                     'title': 'Terraria 1.1 Trailer',
   33                     'playlist_index': 1,
   34                 }
   35             },
   36             {
   37                 'md5': '61aaf31a5c5c3041afb58fb83cbb5751',
   38                 'info_dict': {
   39                     'id': '80859',
   40                     'ext': 'flv',
   41                     'title': 'Terraria Trailer',
   42                     'playlist_index': 2,
   43                 }
   44             }
   45         ],
   46         'params': {
   47             'playlistend': 2,
   48         }
   49     }, {
   50         'url': 'http://steamcommunity.com/sharedfiles/filedetails/?id=242472205',
   51         'info_dict': {
   52             'id': 'WB5DvDOOvAY',
   53             'ext': 'mp4',
   54             'upload_date': '20140329',
   55             'title': 'FRONTIERS - Final Greenlight Trailer',
   56             'description': 'md5:dc96a773669d0ca1b36c13c1f30250d9',
   57             'uploader': 'AAD Productions',
   58             'uploader_id': 'AtomicAgeDogGames',
   59         }
   60     }]
   61 
   62     def _real_extract(self, url):
   63         m = re.match(self._VALID_URL, url)
   64         fileID = m.group('fileID')
   65         if fileID:
   66             videourl = url
   67             playlist_id = fileID
   68         else:
   69             gameID = m.group('gameID')
   70             playlist_id = gameID
   71             videourl = self._VIDEO_PAGE_TEMPLATE % playlist_id
   72         webpage = self._download_webpage(videourl, playlist_id)
   73 
   74         if re.search('<h2>Please enter your birth date to continue:</h2>', webpage) is not None:
   75             videourl = self._AGECHECK_TEMPLATE % playlist_id
   76             self.report_age_confirmation()
   77             webpage = self._download_webpage(videourl, playlist_id)
   78 
   79         if fileID:
   80             playlist_title = self._html_search_regex(
   81                 r'<div class="workshopItemTitle">(.+)</div>', webpage, 'title')
   82             mweb = re.finditer(r'''(?x)
   83                 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
   84                 YOUTUBE_VIDEO_ID:\s*"(?P<youtube_id>[^"]+)",
   85                 ''', webpage)
   86             videos = [{
   87                 '_type': 'url',
   88                 'url': vid.group('youtube_id'),
   89                 'ie_key': 'Youtube',
   90             } for vid in mweb]
   91         else:
   92             playlist_title = self._html_search_regex(
   93                 r'<h2 class="pageheader">(.*?)</h2>', webpage, 'game title')
   94 
   95             mweb = re.finditer(r'''(?x)
   96                 'movie_(?P<videoID>[0-9]+)':\s*\{\s*
   97                 FILENAME:\s*"(?P<videoURL>[\w:/\.\?=]+)"
   98                 (,\s*MOVIE_NAME:\s*\"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\},
   99                 ''', webpage)
  100             titles = re.finditer(
  101                 r'<span class="title">(?P<videoName>.+?)</span>', webpage)
  102             thumbs = re.finditer(
  103                 r'<img class="movie_thumb" src="(?P<thumbnail>.+?)">', webpage)
  104             videos = []
  105 
  106             for vid, vtitle, thumb in zip(mweb, titles, thumbs):
  107                 video_id = vid.group('videoID')
  108                 title = vtitle.group('videoName')
  109                 video_url = vid.group('videoURL')
  110                 video_thumb = thumb.group('thumbnail')
  111                 if not video_url:
  112                     raise ExtractorError('Cannot find video url for %s' % video_id)
  113                 videos.append({
  114                     'id': video_id,
  115                     'url': video_url,
  116                     'ext': 'flv',
  117                     'title': unescapeHTML(title),
  118                     'thumbnail': video_thumb
  119                 })
  120         if not videos:
  121             raise ExtractorError('Could not find any videos')
  122 
  123         return self.playlist_result(videos, playlist_id, playlist_title)