youtube_dl/extractor/bambuser.py



    1 from __future__ import unicode_literals
    2 
    3 import re
    4 import itertools
    5 
    6 from .common import InfoExtractor
    7 from ..compat import compat_str
    8 from ..utils import (
    9     ExtractorError,
   10     float_or_none,
   11     int_or_none,
   12     sanitized_Request,
   13     urlencode_postdata,
   14 )
   15 
   16 
   17 class BambuserIE(InfoExtractor):
   18     IE_NAME = 'bambuser'
   19     _VALID_URL = r'https?://bambuser\.com/v/(?P<id>\d+)'
   20     _API_KEY = '005f64509e19a868399060af746a00aa'
   21     _LOGIN_URL = 'https://bambuser.com/user'
   22     _NETRC_MACHINE = 'bambuser'
   23 
   24     _TEST = {
   25         'url': 'http://bambuser.com/v/4050584',
   26         # MD5 seems to be flaky, see https://travis-ci.org/rg3/youtube-dl/jobs/14051016#L388
   27         # 'md5': 'fba8f7693e48fd4e8641b3fd5539a641',
   28         'info_dict': {
   29             'id': '4050584',
   30             'ext': 'flv',
   31             'title': 'Education engineering days - lightning talks',
   32             'duration': 3741,
   33             'uploader': 'pixelversity',
   34             'uploader_id': '344706',
   35             'timestamp': 1382976692,
   36             'upload_date': '20131028',
   37             'view_count': int,
   38         },
   39         'params': {
   40             # It doesn't respect the 'Range' header, it would download the whole video
   41             # caused the travis builds to fail: https://travis-ci.org/rg3/youtube-dl/jobs/14493845#L59
   42             'skip_download': True,
   43         },
   44     }
   45 
   46     def _login(self):
   47         (username, password) = self._get_login_info()
   48         if username is None:
   49             return
   50 
   51         login_form = {
   52             'form_id': 'user_login',
   53             'op': 'Log in',
   54             'name': username,
   55             'pass': password,
   56         }
   57 
   58         request = sanitized_Request(
   59             self._LOGIN_URL, urlencode_postdata(login_form))
   60         request.add_header('Referer', self._LOGIN_URL)
   61         response = self._download_webpage(
   62             request, None, 'Logging in')
   63 
   64         login_error = self._html_search_regex(
   65             r'(?s)<div class="messages error">(.+?)</div>',
   66             response, 'login error', default=None)
   67         if login_error:
   68             raise ExtractorError(
   69                 'Unable to login: %s' % login_error, expected=True)
   70 
   71     def _real_initialize(self):
   72         self._login()
   73 
   74     def _real_extract(self, url):
   75         video_id = self._match_id(url)
   76 
   77         info = self._download_json(
   78             'http://player-c.api.bambuser.com/getVideo.json?api_key=%s&vid=%s'
   79             % (self._API_KEY, video_id), video_id)
   80 
   81         error = info.get('error')
   82         if error:
   83             raise ExtractorError(
   84                 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
   85 
   86         result = info['result']
   87 
   88         return {
   89             'id': video_id,
   90             'title': result['title'],
   91             'url': result['url'],
   92             'thumbnail': result.get('preview'),
   93             'duration': int_or_none(result.get('length')),
   94             'uploader': result.get('username'),
   95             'uploader_id': compat_str(result.get('owner', {}).get('uid')),
   96             'timestamp': int_or_none(result.get('created')),
   97             'fps': float_or_none(result.get('framerate')),
   98             'view_count': int_or_none(result.get('views_total')),
   99             'comment_count': int_or_none(result.get('comment_count')),
  100         }
  101 
  102 
  103 class BambuserChannelIE(InfoExtractor):
  104     IE_NAME = 'bambuser:channel'
  105     _VALID_URL = r'https?://bambuser\.com/channel/(?P<user>.*?)(?:/|#|\?|$)'
  106     # The maximum number we can get with each request
  107     _STEP = 50
  108     _TEST = {
  109         'url': 'http://bambuser.com/channel/pixelversity',
  110         'info_dict': {
  111             'title': 'pixelversity',
  112         },
  113         'playlist_mincount': 60,
  114     }
  115 
  116     def _real_extract(self, url):
  117         mobj = re.match(self._VALID_URL, url)
  118         user = mobj.group('user')
  119         urls = []
  120         last_id = ''
  121         for i in itertools.count(1):
  122             req_url = (
  123                 'http://bambuser.com/xhr-api/index.php?username={user}'
  124                 '&sort=created&access_mode=0%2C1%2C2&limit={count}'
  125                 '&method=broadcast&format=json&vid_older_than={last}'
  126             ).format(user=user, count=self._STEP, last=last_id)
  127             req = sanitized_Request(req_url)
  128             # Without setting this header, we wouldn't get any result
  129             req.add_header('Referer', 'http://bambuser.com/channel/%s' % user)
  130             data = self._download_json(
  131                 req, user, 'Downloading page %d' % i)
  132             results = data['result']
  133             if not results:
  134                 break
  135             last_id = results[-1]['vid']
  136             urls.extend(self.url_result(v['page'], 'Bambuser') for v in results)
  137 
  138         return {
  139             '_type': 'playlist',
  140             'title': user,
  141             'entries': urls,
  142         }