youtube_dl/extractor/spankbang.py



    1 from __future__ import unicode_literals
    2 
    3 import re
    4 
    5 from .common import InfoExtractor
    6 
    7 
    8 class SpankBangIE(InfoExtractor):
    9     _VALID_URL = r'https?://(?:(?:www|[a-z]{2})\.)?spankbang\.com/(?P<id>[\da-z]+)/video'
   10     _TESTS = [{
   11         'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
   12         'md5': '1cc433e1d6aa14bc376535b8679302f7',
   13         'info_dict': {
   14             'id': '3vvn',
   15             'ext': 'mp4',
   16             'title': 'fantasy solo',
   17             'description': 'Watch fantasy solo free HD porn video - 05 minutes - dillion harper masturbates on a bed free adult movies.',
   18             'thumbnail': r're:^https?://.*\.jpg$',
   19             'uploader': 'silly2587',
   20             'age_limit': 18,
   21         }
   22     }, {
   23         # 480p only
   24         'url': 'http://spankbang.com/1vt0/video/solvane+gangbang',
   25         'only_matching': True,
   26     }]
   27 
   28     def _real_extract(self, url):
   29         video_id = self._match_id(url)
   30         webpage = self._download_webpage(url, video_id)
   31 
   32         stream_key = self._html_search_regex(
   33             r'''var\s+stream_key\s*=\s*['"](.+?)['"]''',
   34             webpage, 'stream key')
   35 
   36         formats = [{
   37             'url': 'http://spankbang.com/_%s/%s/title/%sp__mp4' % (video_id, stream_key, height),
   38             'ext': 'mp4',
   39             'format_id': '%sp' % height,
   40             'height': int(height),
   41         } for height in re.findall(r'<(?:span|li|p)[^>]+[qb]_(\d+)p', webpage)]
   42         self._check_formats(formats, video_id)
   43         self._sort_formats(formats)
   44 
   45         title = self._html_search_regex(
   46             r'(?s)<h1[^>]*>(.+?)</h1>', webpage, 'title')
   47         description = self._og_search_description(webpage)
   48         thumbnail = self._og_search_thumbnail(webpage)
   49         uploader = self._search_regex(
   50             r'class="user"[^>]*><img[^>]+>([^<]+)',
   51             webpage, 'uploader', fatal=False)
   52 
   53         age_limit = self._rta_search(webpage)
   54 
   55         return {
   56             'id': video_id,
   57             'title': title,
   58             'description': description,
   59             'thumbnail': thumbnail,
   60             'uploader': uploader,
   61             'formats': formats,
   62             'age_limit': age_limit,
   63         }