summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/sapo.py
blob: 49a9b313a87a5bf9b80fa8a3b8d78c104722cd5b (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..utils import (
    8     parse_duration,
    9     unified_strdate,
   10 )
   11 
   12 
   13 class SapoIE(InfoExtractor):
   14     IE_DESC = 'SAPO Vídeos'
   15     _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
   16 
   17     _TESTS = [
   18         {
   19             'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
   20             'md5': '79ee523f6ecb9233ac25075dee0eda83',
   21             'note': 'SD video',
   22             'info_dict': {
   23                 'id': 'UBz95kOtiWYUMTA5Ghfi',
   24                 'ext': 'mp4',
   25                 'title': 'Benfica - Marcas na Hitória',
   26                 'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
   27                 'duration': 264,
   28                 'uploader': 'tiago_1988',
   29                 'upload_date': '20080229',
   30                 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
   31             },
   32         },
   33         {
   34             'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
   35             'md5': '90a2f283cfb49193fe06e861613a72aa',
   36             'note': 'HD video',
   37             'info_dict': {
   38                 'id': 'IyusNAZ791ZdoCY5H5IF',
   39                 'ext': 'mp4',
   40                 'title': 'Codebits VII - Report',
   41                 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
   42                 'duration': 144,
   43                 'uploader': 'codebits',
   44                 'upload_date': '20140427',
   45                 'categories': ['codebits', 'codebits2014'],
   46             },
   47         },
   48         {
   49             'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
   50             'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
   51             'note': 'v2 video',
   52             'info_dict': {
   53                 'id': 'yLqjzPtbTimsn2wWBKHz',
   54                 'ext': 'mp4',
   55                 'title': 'Hipnose Condicionativa 4',
   56                 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
   57                 'duration': 692,
   58                 'uploader': 'sapozen',
   59                 'upload_date': '20090609',
   60                 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
   61             },
   62         },
   63     ]
   64 
   65     def _real_extract(self, url):
   66         mobj = re.match(self._VALID_URL, url)
   67         video_id = mobj.group('id')
   68 
   69         item = self._download_xml(
   70             'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
   71 
   72         title = item.find('./title').text
   73         description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
   74         thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
   75         duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
   76         uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
   77         upload_date = unified_strdate(item.find('./pubDate').text)
   78         view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
   79         comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
   80         tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
   81         categories = tags.split() if tags else []
   82         age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
   83 
   84         video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
   85         video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
   86 
   87         formats = [{
   88             'url': video_url,
   89             'ext': 'mp4',
   90             'format_id': 'sd',
   91             'width': int(video_size[0]),
   92             'height': int(video_size[1]),
   93         }]
   94 
   95         if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
   96             formats.append({
   97                 'url': re.sub(r'/mov/1$', '/mov/39', video_url),
   98                 'ext': 'mp4',
   99                 'format_id': 'hd',
  100                 'width': 1280,
  101                 'height': 720,
  102             })
  103 
  104         self._sort_formats(formats)
  105 
  106         return {
  107             'id': video_id,
  108             'title': title,
  109             'description': description,
  110             'thumbnail': thumbnail,
  111             'duration': duration,
  112             'uploader': uploader,
  113             'upload_date': upload_date,
  114             'view_count': view_count,
  115             'comment_count': comment_count,
  116             'categories': categories,
  117             'age_limit': age_limit,
  118             'formats': formats,
  119         }

Generated by cgit