youtube_dl/extractor/sunporno.py



    1 from __future__ import unicode_literals
    2 
    3 import re
    4 
    5 from .common import InfoExtractor
    6 from ..utils import (
    7     parse_duration,
    8     int_or_none,
    9     qualities,
   10     determine_ext,
   11 )
   12 
   13 
   14 class SunPornoIE(InfoExtractor):
   15     _VALID_URL = r'https?://(?:(?:www\.)?sunporno\.com/videos|embeds\.sunporno\.com/embed)/(?P<id>\d+)'
   16     _TESTS = [{
   17         'url': 'http://www.sunporno.com/videos/807778/',
   18         'md5': '507887e29033502f29dba69affeebfc9',
   19         'info_dict': {
   20             'id': '807778',
   21             'ext': 'mp4',
   22             'title': 'md5:0a400058e8105d39e35c35e7c5184164',
   23             'description': 'md5:a31241990e1bd3a64e72ae99afb325fb',
   24             'thumbnail': r're:^https?://.*\.jpg$',
   25             'duration': 302,
   26             'age_limit': 18,
   27         }
   28     }, {
   29         'url': 'http://embeds.sunporno.com/embed/807778',
   30         'only_matching': True,
   31     }]
   32 
   33     def _real_extract(self, url):
   34         video_id = self._match_id(url)
   35 
   36         webpage = self._download_webpage(
   37             'http://www.sunporno.com/videos/%s' % video_id, video_id)
   38 
   39         title = self._html_search_regex(
   40             r'<title>([^<]+)</title>', webpage, 'title')
   41         description = self._html_search_meta(
   42             'description', webpage, 'description')
   43         thumbnail = self._html_search_regex(
   44             r'poster="([^"]+)"', webpage, 'thumbnail', fatal=False)
   45 
   46         duration = parse_duration(self._search_regex(
   47             (r'itemprop="duration"[^>]*>\s*(\d+:\d+)\s*<',
   48              r'>Duration:\s*<span[^>]+>\s*(\d+:\d+)\s*<'),
   49             webpage, 'duration', fatal=False))
   50 
   51         view_count = int_or_none(self._html_search_regex(
   52             r'class="views">(?:<noscript>)?\s*(\d+)\s*<',
   53             webpage, 'view count', fatal=False))
   54         comment_count = int_or_none(self._html_search_regex(
   55             r'(\d+)</b> Comments?',
   56             webpage, 'comment count', fatal=False, default=None))
   57 
   58         formats = []
   59         quality = qualities(['mp4', 'flv'])
   60         for video_url in re.findall(r'<(?:source|video) src="([^"]+)"', webpage):
   61             video_ext = determine_ext(video_url)
   62             formats.append({
   63                 'url': video_url,
   64                 'format_id': video_ext,
   65                 'quality': quality(video_ext),
   66             })
   67         self._sort_formats(formats)
   68 
   69         return {
   70             'id': video_id,
   71             'title': title,
   72             'description': description,
   73             'thumbnail': thumbnail,
   74             'duration': duration,
   75             'view_count': view_count,
   76             'comment_count': comment_count,
   77             'formats': formats,
   78             'age_limit': 18,
   79         }