youtube_dl/extractor/cartoonnetwork.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .turner import TurnerBaseIE
    5 from ..utils import int_or_none
    6 
    7 
    8 class CartoonNetworkIE(TurnerBaseIE):
    9     _VALID_URL = r'https?://(?:www\.)?cartoonnetwork\.com/video/(?:[^/]+/)+(?P<id>[^/?#]+)-(?:clip|episode)\.html'
   10     _TEST = {
   11         'url': 'https://www.cartoonnetwork.com/video/ben-10/how-to-draw-upgrade-episode.html',
   12         'info_dict': {
   13             'id': '6e3375097f63874ebccec7ef677c1c3845fa850e',
   14             'ext': 'mp4',
   15             'title': 'How to Draw Upgrade',
   16             'description': 'md5:2061d83776db7e8be4879684eefe8c0f',
   17         },
   18         'params': {
   19             # m3u8 download
   20             'skip_download': True,
   21         },
   22     }
   23 
   24     def _real_extract(self, url):
   25         display_id = self._match_id(url)
   26         webpage = self._download_webpage(url, display_id)
   27 
   28         def find_field(global_re, name, content_re=None, value_re='[^"]+', fatal=False):
   29             metadata_re = ''
   30             if content_re:
   31                 metadata_re = r'|video_metadata\.content_' + content_re
   32             return self._search_regex(
   33                 r'(?:_cnglobal\.currentVideo\.%s%s)\s*=\s*"(%s)";' % (global_re, metadata_re, value_re),
   34                 webpage, name, fatal=fatal)
   35 
   36         media_id = find_field('mediaId', 'media id', 'id', '[0-9a-f]{40}', True)
   37         title = find_field('episodeTitle', 'title', '(?:episodeName|name)', fatal=True)
   38 
   39         info = self._extract_ngtv_info(
   40             media_id, {'networkId': 'cartoonnetwork'}, {
   41                 'url': url,
   42                 'site_name': 'CartoonNetwork',
   43                 'auth_required': find_field('authType', 'auth type') != 'unauth',
   44             })
   45 
   46         series = find_field(
   47             'propertyName', 'series', 'showName') or self._html_search_meta('partOfSeries', webpage)
   48         info.update({
   49             'id': media_id,
   50             'display_id': display_id,
   51             'title': title,
   52             'description': self._html_search_meta('description', webpage),
   53             'series': series,
   54             'episode': title,
   55         })
   56 
   57         for field in ('season', 'episode'):
   58             field_name = field + 'Number'
   59             info[field + '_number'] = int_or_none(find_field(
   60                 field_name, field + ' number', value_re=r'\d+') or self._html_search_meta(field_name, webpage))
   61 
   62         return info