youtube_dl/extractor/kinopoisk.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from .common import InfoExtractor
    5 from ..utils import (
    6     dict_get,
    7     int_or_none,
    8 )
    9 
   10 
   11 class KinoPoiskIE(InfoExtractor):
   12     _GEO_COUNTRIES = ['RU']
   13     _VALID_URL = r'https?://(?:www\.)?kinopoisk\.ru/film/(?P<id>\d+)'
   14     _TESTS = [{
   15         'url': 'https://www.kinopoisk.ru/film/81041/watch/',
   16         'md5': '4f71c80baea10dfa54a837a46111d326',
   17         'info_dict': {
   18             'id': '81041',
   19             'ext': 'mp4',
   20             'title': 'Алеша попович и тугарин змей',
   21             'description': 'md5:43787e673d68b805d0aa1df5a5aea701',
   22             'thumbnail': r're:^https?://.*',
   23             'duration': 4533,
   24             'age_limit': 12,
   25         },
   26         'params': {
   27             'format': 'bestvideo',
   28         },
   29     }, {
   30         'url': 'https://www.kinopoisk.ru/film/81041',
   31         'only_matching': True,
   32     }]
   33 
   34     def _real_extract(self, url):
   35         video_id = self._match_id(url)
   36 
   37         webpage = self._download_webpage(
   38             'https://ott-widget.kinopoisk.ru/v1/kp/', video_id,
   39             query={'kpId': video_id})
   40 
   41         data = self._parse_json(
   42             self._search_regex(
   43                 r'(?s)<script[^>]+\btype=["\']application/json[^>]+>(.+?)<',
   44                 webpage, 'data'),
   45             video_id)['models']
   46 
   47         film = data['filmStatus']
   48         title = film.get('title') or film['originalTitle']
   49 
   50         formats = self._extract_m3u8_formats(
   51             data['playlistEntity']['uri'], video_id, 'mp4',
   52             entry_protocol='m3u8_native', m3u8_id='hls')
   53         self._sort_formats(formats)
   54 
   55         description = dict_get(
   56             film, ('descriptscription', 'description',
   57                    'shortDescriptscription', 'shortDescription'))
   58         thumbnail = film.get('coverUrl') or film.get('posterUrl')
   59         duration = int_or_none(film.get('duration'))
   60         age_limit = int_or_none(film.get('restrictionAge'))
   61 
   62         return {
   63             'id': video_id,
   64             'title': title,
   65             'description': description,
   66             'thumbnail': thumbnail,
   67             'duration': duration,
   68             'age_limit': age_limit,
   69             'formats': formats,
   70         }