summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/medaltv.py
blob: 67bb4debb6440e490d1e2b9e7ee5b72fc3a9f0fa (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..compat import compat_str
    8 from ..utils import (
    9     ExtractorError,
   10     float_or_none,
   11     int_or_none,
   12     str_or_none,
   13     try_get,
   14 )
   15 
   16 
   17 class MedalTVIE(InfoExtractor):
   18     _VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[^/?#&]+)'
   19     _TESTS = [{
   20         'url': 'https://medal.tv/clips/2mA60jWAGQCBH',
   21         'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
   22         'info_dict': {
   23             'id': '2mA60jWAGQCBH',
   24             'ext': 'mp4',
   25             'title': 'Quad Cold',
   26             'description': 'Medal,https://medal.tv/desktop/',
   27             'uploader': 'MowgliSB',
   28             'timestamp': 1603165266,
   29             'upload_date': '20201020',
   30             'uploader_id': '10619174',
   31         }
   32     }, {
   33         'url': 'https://medal.tv/clips/2um24TWdty0NA',
   34         'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
   35         'info_dict': {
   36             'id': '2um24TWdty0NA',
   37             'ext': 'mp4',
   38             'title': 'u tk me i tk u bigger',
   39             'description': 'Medal,https://medal.tv/desktop/',
   40             'uploader': 'Mimicc',
   41             'timestamp': 1605580939,
   42             'upload_date': '20201117',
   43             'uploader_id': '5156321',
   44         }
   45     }, {
   46         'url': 'https://medal.tv/clips/37rMeFpryCC-9',
   47         'only_matching': True,
   48     }, {
   49         'url': 'https://medal.tv/clips/2WRj40tpY_EU9',
   50         'only_matching': True,
   51     }]
   52 
   53     def _real_extract(self, url):
   54         video_id = self._match_id(url)
   55         webpage = self._download_webpage(url, video_id)
   56 
   57         hydration_data = self._parse_json(self._search_regex(
   58             r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
   59             webpage, 'hydration data', default='{}'), video_id)
   60 
   61         clip = try_get(
   62             hydration_data, lambda x: x['clips'][video_id], dict) or {}
   63         if not clip:
   64             raise ExtractorError(
   65                 'Could not find video information.', video_id=video_id)
   66 
   67         title = clip['contentTitle']
   68 
   69         source_width = int_or_none(clip.get('sourceWidth'))
   70         source_height = int_or_none(clip.get('sourceHeight'))
   71 
   72         aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
   73 
   74         def add_item(container, item_url, height, id_key='format_id', item_id=None):
   75             item_id = item_id or '%dp' % height
   76             if item_id not in item_url:
   77                 return
   78             width = int(round(aspect_ratio * height))
   79             container.append({
   80                 'url': item_url,
   81                 id_key: item_id,
   82                 'width': width,
   83                 'height': height
   84             })
   85 
   86         formats = []
   87         thumbnails = []
   88         for k, v in clip.items():
   89             if not (v and isinstance(v, compat_str)):
   90                 continue
   91             mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
   92             if not mobj:
   93                 continue
   94             prefix = mobj.group(1)
   95             height = int_or_none(mobj.group(2))
   96             if prefix == 'contentUrl':
   97                 add_item(
   98                     formats, v, height or source_height,
   99                     item_id=None if height else 'source')
  100             elif prefix == 'thumbnail':
  101                 add_item(thumbnails, v, height, 'id')
  102 
  103         error = clip.get('error')
  104         if not formats and error:
  105             if error == 404:
  106                 raise ExtractorError(
  107                     'That clip does not exist.',
  108                     expected=True, video_id=video_id)
  109             else:
  110                 raise ExtractorError(
  111                     'An unknown error occurred ({0}).'.format(error),
  112                     video_id=video_id)
  113 
  114         self._sort_formats(formats)
  115 
  116         # Necessary because the id of the author is not known in advance.
  117         # Won't raise an issue if no profile can be found as this is optional.
  118         author = try_get(
  119             hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
  120         author_id = str_or_none(author.get('id'))
  121         author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
  122 
  123         return {
  124             'id': video_id,
  125             'title': title,
  126             'formats': formats,
  127             'thumbnails': thumbnails,
  128             'description': clip.get('contentDescription'),
  129             'uploader': author.get('displayName'),
  130             'timestamp': float_or_none(clip.get('created'), 1000),
  131             'uploader_id': author_id,
  132             'uploader_url': author_url,
  133             'duration': int_or_none(clip.get('videoLengthSeconds')),
  134             'view_count': int_or_none(clip.get('views')),
  135             'like_count': int_or_none(clip.get('likes')),
  136             'comment_count': int_or_none(clip.get('comments')),
  137         }

Generated by cgit