summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/pornoxo.py
blob: 1a0cce7e0274bb4a06bf9b0604d9ebdf75cf3df5 (plain)
    1 from __future__ import unicode_literals
    2 
    3 import re
    4 
    5 from .jwplatform import JWPlatformBaseIE
    6 from ..utils import (
    7     str_to_int,
    8 )
    9 
   10 
   11 class PornoXOIE(JWPlatformBaseIE):
   12     _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html'
   13     _TEST = {
   14         'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html',
   15         'md5': '582f28ecbaa9e6e24cb90f50f524ce87',
   16         'info_dict': {
   17             'id': '7564',
   18             'ext': 'flv',
   19             'title': 'Striptease From Sexy Secretary!',
   20             'display_id': 'striptease-from-sexy-secretary',
   21             'description': 'md5:0ee35252b685b3883f4a1d38332f9980',
   22             'categories': list,  # NSFW
   23             'thumbnail': r're:https?://.*\.jpg$',
   24             'age_limit': 18,
   25         }
   26     }
   27 
   28     def _real_extract(self, url):
   29         mobj = re.match(self._VALID_URL, url)
   30         video_id, display_id = mobj.groups()
   31 
   32         webpage = self._download_webpage(url, video_id)
   33         video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False)
   34 
   35         title = self._html_search_regex(
   36             r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title')
   37 
   38         view_count = str_to_int(self._html_search_regex(
   39             r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False))
   40 
   41         categories_str = self._html_search_regex(
   42             r'<meta name="description" content=".*featuring\s*([^"]+)"',
   43             webpage, 'categories', fatal=False)
   44         categories = (
   45             None if categories_str is None
   46             else categories_str.split(','))
   47 
   48         video_data.update({
   49             'id': video_id,
   50             'title': title,
   51             'display_id': display_id,
   52             'description': self._html_search_meta('description', webpage),
   53             'categories': categories,
   54             'view_count': view_count,
   55             'age_limit': 18,
   56         })
   57 
   58         return video_data

Generated by cgit