youtube_dl/extractor/shahid.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import json
    5 import math
    6 import re
    7 
    8 from .aws import AWSIE
    9 from ..compat import compat_HTTPError
   10 from ..utils import (
   11     clean_html,
   12     ExtractorError,
   13     InAdvancePagedList,
   14     int_or_none,
   15     parse_iso8601,
   16     str_or_none,
   17     urlencode_postdata,
   18 )
   19 
   20 
   21 class ShahidBaseIE(AWSIE):
   22     _AWS_PROXY_HOST = 'api2.shahid.net'
   23     _AWS_API_KEY = '2RRtuMHx95aNI1Kvtn2rChEuwsCogUd4samGPjLh'
   24     _VALID_URL_BASE = r'https?://shahid\.mbc\.net/[a-z]{2}/'
   25 
   26     def _handle_error(self, e):
   27         fail_data = self._parse_json(
   28             e.cause.read().decode('utf-8'), None, fatal=False)
   29         if fail_data:
   30             faults = fail_data.get('faults', [])
   31             faults_message = ', '.join([clean_html(fault['userMessage']) for fault in faults if fault.get('userMessage')])
   32             if faults_message:
   33                 raise ExtractorError(faults_message, expected=True)
   34 
   35     def _call_api(self, path, video_id, request=None):
   36         query = {}
   37         if request:
   38             query['request'] = json.dumps(request)
   39         try:
   40             return self._aws_execute_api({
   41                 'uri': '/proxy/v2/' + path,
   42                 'access_key': 'AKIAI6X4TYCIXM2B7MUQ',
   43                 'secret_key': '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',
   44             }, video_id, query)
   45         except ExtractorError as e:
   46             if isinstance(e.cause, compat_HTTPError):
   47                 self._handle_error(e)
   48             raise
   49 
   50 
   51 class ShahidIE(ShahidBaseIE):
   52     _NETRC_MACHINE = 'shahid'
   53     _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:serie|show|movie)s/[^/]+/(?P<type>episode|clip|movie)-(?P<id>\d+)'
   54     _TESTS = [{
   55         'url': 'https://shahid.mbc.net/ar/shows/%D9%85%D8%AA%D8%AD%D9%81-%D8%A7%D9%84%D8%AF%D8%AD%D9%8A%D8%AD-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D9%83%D9%84%D9%8A%D8%A8-1/clip-816924',
   56         'info_dict': {
   57             'id': '816924',
   58             'ext': 'mp4',
   59             'title': 'متحف الدحيح الموسم 1 كليب 1',
   60             'timestamp': 1602806400,
   61             'upload_date': '20201016',
   62             'description': 'برومو',
   63             'duration': 22,
   64             'categories': ['كوميديا'],
   65         },
   66         'params': {
   67             # m3u8 download
   68             'skip_download': True,
   69         }
   70     }, {
   71         'url': 'https://shahid.mbc.net/ar/movies/%D8%A7%D9%84%D9%82%D9%86%D8%A7%D8%B5%D8%A9/movie-151746',
   72         'only_matching': True
   73     }, {
   74         # shahid plus subscriber only
   75         'url': 'https://shahid.mbc.net/ar/series/%D9%85%D8%B1%D8%A7%D9%8A%D8%A7-2011-%D8%A7%D9%84%D9%85%D9%88%D8%B3%D9%85-1-%D8%A7%D9%84%D8%AD%D9%84%D9%82%D8%A9-1/episode-90511',
   76         'only_matching': True
   77     }, {
   78         'url': 'https://shahid.mbc.net/en/shows/Ramez-Fi-Al-Shallal-season-1-episode-1/episode-359319',
   79         'only_matching': True
   80     }]
   81 
   82     def _real_initialize(self):
   83         email, password = self._get_login_info()
   84         if email is None:
   85             return
   86 
   87         try:
   88             user_data = self._download_json(
   89                 'https://shahid.mbc.net/wd/service/users/login',
   90                 None, 'Logging in', data=json.dumps({
   91                     'email': email,
   92                     'password': password,
   93                     'basic': 'false',
   94                 }).encode('utf-8'), headers={
   95                     'Content-Type': 'application/json; charset=UTF-8',
   96                 })['user']
   97         except ExtractorError as e:
   98             if isinstance(e.cause, compat_HTTPError):
   99                 self._handle_error(e)
  100             raise
  101 
  102         self._download_webpage(
  103             'https://shahid.mbc.net/populateContext',
  104             None, 'Populate Context', data=urlencode_postdata({
  105                 'firstName': user_data['firstName'],
  106                 'lastName': user_data['lastName'],
  107                 'userName': user_data['email'],
  108                 'csg_user_name': user_data['email'],
  109                 'subscriberId': user_data['id'],
  110                 'sessionId': user_data['sessionId'],
  111             }))
  112 
  113     def _real_extract(self, url):
  114         page_type, video_id = re.match(self._VALID_URL, url).groups()
  115         if page_type == 'clip':
  116             page_type = 'episode'
  117 
  118         playout = self._call_api(
  119             'playout/new/url/' + video_id, video_id)['playout']
  120 
  121         if playout.get('drm'):
  122             raise ExtractorError('This video is DRM protected.', expected=True)
  123 
  124         formats = self._extract_m3u8_formats(re.sub(
  125             # https://docs.aws.amazon.com/mediapackage/latest/ug/manifest-filtering.html
  126             r'aws\.manifestfilter=[\w:;,-]+&?',
  127             '', playout['url']), video_id, 'mp4')
  128         self._sort_formats(formats)
  129 
  130         # video = self._call_api(
  131         #     'product/id', video_id, {
  132         #         'id': video_id,
  133         #         'productType': 'ASSET',
  134         #         'productSubType': page_type.upper()
  135         #     })['productModel']
  136 
  137         response = self._download_json(
  138             'http://api.shahid.net/api/v1_1/%s/%s' % (page_type, video_id),
  139             video_id, 'Downloading video JSON', query={
  140                 'apiKey': 'sh@hid0nlin3',
  141                 'hash': 'b2wMCTHpSmyxGqQjJFOycRmLSex+BpTK/ooxy6vHaqs=',
  142             })
  143         data = response.get('data', {})
  144         error = data.get('error')
  145         if error:
  146             raise ExtractorError(
  147                 '%s returned error: %s' % (self.IE_NAME, '\n'.join(error.values())),
  148                 expected=True)
  149 
  150         video = data[page_type]
  151         title = video['title']
  152         categories = [
  153             category['name']
  154             for category in video.get('genres', []) if 'name' in category]
  155 
  156         return {
  157             'id': video_id,
  158             'title': title,
  159             'description': video.get('description'),
  160             'thumbnail': video.get('thumbnailUrl'),
  161             'duration': int_or_none(video.get('duration')),
  162             'timestamp': parse_iso8601(video.get('referenceDate')),
  163             'categories': categories,
  164             'series': video.get('showTitle') or video.get('showName'),
  165             'season': video.get('seasonTitle'),
  166             'season_number': int_or_none(video.get('seasonNumber')),
  167             'season_id': str_or_none(video.get('seasonId')),
  168             'episode_number': int_or_none(video.get('number')),
  169             'episode_id': video_id,
  170             'formats': formats,
  171         }
  172 
  173 
  174 class ShahidShowIE(ShahidBaseIE):
  175     _VALID_URL = ShahidBaseIE._VALID_URL_BASE + r'(?:show|serie)s/[^/]+/(?:show|series)-(?P<id>\d+)'
  176     _TESTS = [{
  177         'url': 'https://shahid.mbc.net/ar/shows/%D8%B1%D8%A7%D9%85%D8%B2-%D9%82%D8%B1%D8%B4-%D8%A7%D9%84%D8%A8%D8%AD%D8%B1/show-79187',
  178         'info_dict': {
  179             'id': '79187',
  180             'title': 'رامز قرش البحر',
  181             'description': 'md5:c88fa7e0f02b0abd39d417aee0d046ff',
  182         },
  183         'playlist_mincount': 32,
  184     }, {
  185         'url': 'https://shahid.mbc.net/ar/series/How-to-live-Longer-(The-Big-Think)/series-291861',
  186         'only_matching': True
  187     }]
  188     _PAGE_SIZE = 30
  189 
  190     def _real_extract(self, url):
  191         show_id = self._match_id(url)
  192 
  193         product = self._call_api(
  194             'playableAsset', show_id, {'showId': show_id})['productModel']
  195         playlist = product['playlist']
  196         playlist_id = playlist['id']
  197         show = product.get('show', {})
  198 
  199         def page_func(page_num):
  200             playlist = self._call_api(
  201                 'product/playlist', show_id, {
  202                     'playListId': playlist_id,
  203                     'pageNumber': page_num,
  204                     'pageSize': 30,
  205                     'sorts': [{
  206                         'order': 'DESC',
  207                         'type': 'SORTDATE'
  208                     }],
  209                 })
  210             for product in playlist.get('productList', {}).get('products', []):
  211                 product_url = product.get('productUrl', []).get('url')
  212                 if not product_url:
  213                     continue
  214                 yield self.url_result(
  215                     product_url, 'Shahid',
  216                     str_or_none(product.get('id')),
  217                     product.get('title'))
  218 
  219         entries = InAdvancePagedList(
  220             page_func,
  221             math.ceil(playlist['count'] / self._PAGE_SIZE),
  222             self._PAGE_SIZE)
  223 
  224         return self.playlist_result(
  225             entries, show_id, show.get('title'), show.get('description'))