summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/neteasemusic.py
blob: 2bbfc78585f882e73a510185762edd40d2cda5e3 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from base64 import b64encode
    5 from binascii import hexlify
    6 from datetime import datetime
    7 from hashlib import md5
    8 from random import randint
    9 import json
   10 import re
   11 import time
   12 
   13 from .common import InfoExtractor
   14 from ..aes import aes_ecb_encrypt, pkcs7_padding
   15 from ..compat import (
   16     compat_urllib_parse_urlencode,
   17     compat_str,
   18     compat_itertools_count,
   19 )
   20 from ..utils import (
   21     ExtractorError,
   22     bytes_to_intlist,
   23     error_to_compat_str,
   24     float_or_none,
   25     int_or_none,
   26     intlist_to_bytes,
   27     sanitized_Request,
   28     std_headers,
   29     try_get,
   30 )
   31 
   32 
   33 class NetEaseMusicBaseIE(InfoExtractor):
   34     _FORMATS = ['bMusic', 'mMusic', 'hMusic']
   35     _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
   36     _API_BASE = 'http://music.163.com/api/'
   37 
   38     @classmethod
   39     def _encrypt(cls, dfsid):
   40         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
   41         string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
   42         salt_len = len(salt_bytes)
   43         for i in range(len(string_bytes)):
   44             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
   45         m = md5()
   46         m.update(bytes(string_bytes))
   47         result = b64encode(m.digest()).decode('ascii')
   48         return result.replace('/', '_').replace('+', '-')
   49 
   50     @classmethod
   51     def make_player_api_request_data_and_headers(cls, song_id, bitrate):
   52         KEY = b'e82ckenh8dichen8'
   53         URL = '/api/song/enhance/player/url'
   54         now = int(time.time() * 1000)
   55         rand = randint(0, 1000)
   56         cookie = {
   57             'osver': None,
   58             'deviceId': None,
   59             'appver': '8.0.0',
   60             'versioncode': '140',
   61             'mobilename': None,
   62             'buildver': '1623435496',
   63             'resolution': '1920x1080',
   64             '__csrf': '',
   65             'os': 'pc',
   66             'channel': None,
   67             'requestId': '{0}_{1:04}'.format(now, rand),
   68         }
   69         request_text = json.dumps(
   70             {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
   71             separators=(',', ':'))
   72         message = 'nobody{0}use{1}md5forencrypt'.format(
   73             URL, request_text).encode('latin1')
   74         msg_digest = md5(message).hexdigest()
   75 
   76         data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
   77             URL, request_text, msg_digest)
   78         data = pkcs7_padding(bytes_to_intlist(data))
   79         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
   80         encrypted_params = hexlify(encrypted).decode('ascii').upper()
   81 
   82         cookie = '; '.join(
   83             ['{0}={1}'.format(k, v if v is not None else 'undefined')
   84              for [k, v] in cookie.items()])
   85 
   86         headers = {
   87             'User-Agent': std_headers['User-Agent'],
   88             'Content-Type': 'application/x-www-form-urlencoded',
   89             'Referer': 'https://music.163.com',
   90             'Cookie': cookie,
   91         }
   92         return ('params={0}'.format(encrypted_params), headers)
   93 
   94     def _call_player_api(self, song_id, bitrate):
   95         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
   96         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
   97         try:
   98             msg = 'empty result'
   99             result = self._download_json(
  100                 url, song_id, data=data.encode('ascii'), headers=headers)
  101             if result:
  102                 return result
  103         except ExtractorError as e:
  104             if type(e.cause) in (ValueError, TypeError):
  105                 # JSON load failure
  106                 raise
  107         except Exception as e:
  108             msg = error_to_compat_str(e)
  109             self.report_warning('%s API call (%s) failed: %s' % (
  110                 song_id, bitrate, msg))
  111         return {}
  112 
  113     def extract_formats(self, info):
  114         err = 0
  115         formats = []
  116         song_id = info['id']
  117         for song_format in self._FORMATS:
  118             details = info.get(song_format)
  119             if not details:
  120                 continue
  121 
  122             bitrate = int_or_none(details.get('bitrate')) or 999000
  123             data = self._call_player_api(song_id, bitrate)
  124             for song in try_get(data, lambda x: x['data'], list) or []:
  125                 song_url = try_get(song, lambda x: x['url'])
  126                 if not song_url:
  127                     continue
  128                 if self._is_valid_url(song_url, info['id'], 'song'):
  129                     formats.append({
  130                         'url': song_url,
  131                         'ext': details.get('extension'),
  132                         'abr': float_or_none(song.get('br'), scale=1000),
  133                         'format_id': song_format,
  134                         'filesize': int_or_none(song.get('size')),
  135                         'asr': int_or_none(details.get('sr')),
  136                     })
  137                 elif err == 0:
  138                     err = try_get(song, lambda x: x['code'], int)
  139 
  140         if not formats:
  141             msg = 'No media links found'
  142             if err != 0 and (err < 200 or err >= 400):
  143                 raise ExtractorError(
  144                     '%s (site code %d)' % (msg, err, ), expected=True)
  145             else:
  146                 self.raise_geo_restricted(
  147                     msg + ': probably this video is not available from your location due to geo restriction.',
  148                     countries=['CN'])
  149 
  150         return formats
  151 
  152     @classmethod
  153     def convert_milliseconds(cls, ms):
  154         return int(round(ms / 1000.0))
  155 
  156     def query_api(self, endpoint, video_id, note):
  157         req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
  158         req.add_header('Referer', self._API_BASE)
  159         return self._download_json(req, video_id, note)
  160 
  161 
  162 class NetEaseMusicIE(NetEaseMusicBaseIE):
  163     IE_NAME = 'netease:song'
  164     IE_DESC = '网易云音乐'
  165     _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
  166     _TESTS = [{
  167         'url': 'http://music.163.com/#/song?id=32102397',
  168         'md5': '3e909614ce09b1ccef4a3eb205441190',
  169         'info_dict': {
  170             'id': '32102397',
  171             'ext': 'mp3',
  172             'title': 'Bad Blood',
  173             'creator': 'Taylor Swift / Kendrick Lamar',
  174             'upload_date': '20150516',
  175             'timestamp': 1431792000,
  176             'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
  177         },
  178     }, {
  179         'note': 'No lyrics.',
  180         'url': 'http://music.163.com/song?id=17241424',
  181         'info_dict': {
  182             'id': '17241424',
  183             'ext': 'mp3',
  184             'title': 'Opus 28',
  185             'creator': 'Dustin O\'Halloran',
  186             'upload_date': '20080211',
  187             'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
  188             'timestamp': 1202745600,
  189         },
  190     }, {
  191         'note': 'Has translated name.',
  192         'url': 'http://music.163.com/#/song?id=22735043',
  193         'info_dict': {
  194             'id': '22735043',
  195             'ext': 'mp3',
  196             'title': '소원을 말해봐 (Genie)',
  197             'creator': '少女时代',
  198             'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
  199             'upload_date': '20100127',
  200             'timestamp': 1264608000,
  201             'alt_title': '说出愿望吧(Genie)',
  202         },
  203     }]
  204 
  205     def _process_lyrics(self, lyrics_info):
  206         original = lyrics_info.get('lrc', {}).get('lyric')
  207         translated = lyrics_info.get('tlyric', {}).get('lyric')
  208 
  209         if not translated:
  210             return original
  211 
  212         lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
  213         original_ts_texts = re.findall(lyrics_expr, original)
  214         translation_ts_dict = dict(
  215             (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
  216         )
  217         lyrics = '\n'.join([
  218             '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
  219             for time_stamp, text in original_ts_texts
  220         ])
  221         return lyrics
  222 
  223     def _real_extract(self, url):
  224         song_id = self._match_id(url)
  225 
  226         params = {
  227             'id': song_id,
  228             'ids': '[%s]' % song_id
  229         }
  230         info = self.query_api(
  231             'song/detail?' + compat_urllib_parse_urlencode(params),
  232             song_id, 'Downloading song info')['songs'][0]
  233 
  234         formats = self.extract_formats(info)
  235         self._sort_formats(formats)
  236 
  237         lyrics_info = self.query_api(
  238             'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
  239             song_id, 'Downloading lyrics data')
  240         lyrics = self._process_lyrics(lyrics_info)
  241 
  242         alt_title = None
  243         if info.get('transNames'):
  244             alt_title = '/'.join(info.get('transNames'))
  245 
  246         return {
  247             'id': song_id,
  248             'title': info['name'],
  249             'alt_title': alt_title,
  250             'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
  251             'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
  252             'thumbnail': info.get('album', {}).get('picUrl'),
  253             'duration': self.convert_milliseconds(info.get('duration', 0)),
  254             'description': lyrics,
  255             'formats': formats,
  256         }
  257 
  258 
  259 class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
  260     IE_NAME = 'netease:album'
  261     IE_DESC = '网易云音乐 - 专辑'
  262     _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
  263     _TEST = {
  264         'url': 'http://music.163.com/#/album?id=220780',
  265         'info_dict': {
  266             'id': '220780',
  267             'title': 'B\'day',
  268         },
  269         'playlist_count': 23,
  270         'skip': 'Blocked outside Mainland China',
  271     }
  272 
  273     def _real_extract(self, url):
  274         album_id = self._match_id(url)
  275 
  276         info = self.query_api(
  277             'album/%s?id=%s' % (album_id, album_id),
  278             album_id, 'Downloading album data')['album']
  279 
  280         name = info['name']
  281         desc = info.get('description')
  282         entries = [
  283             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  284                             'NetEaseMusic', song['id'])
  285             for song in info['songs']
  286         ]
  287         return self.playlist_result(entries, album_id, name, desc)
  288 
  289 
  290 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
  291     IE_NAME = 'netease:singer'
  292     IE_DESC = '网易云音乐 - 歌手'
  293     _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
  294     _TESTS = [{
  295         'note': 'Singer has aliases.',
  296         'url': 'http://music.163.com/#/artist?id=10559',
  297         'info_dict': {
  298             'id': '10559',
  299             'title': '张惠妹 - aMEI;阿密特',
  300         },
  301         'playlist_count': 50,
  302         'skip': 'Blocked outside Mainland China',
  303     }, {
  304         'note': 'Singer has translated name.',
  305         'url': 'http://music.163.com/#/artist?id=124098',
  306         'info_dict': {
  307             'id': '124098',
  308             'title': '李昇基 - 이승기',
  309         },
  310         'playlist_count': 50,
  311         'skip': 'Blocked outside Mainland China',
  312     }]
  313 
  314     def _real_extract(self, url):
  315         singer_id = self._match_id(url)
  316 
  317         info = self.query_api(
  318             'artist/%s?id=%s' % (singer_id, singer_id),
  319             singer_id, 'Downloading singer data')
  320 
  321         name = info['artist']['name']
  322         if info['artist']['trans']:
  323             name = '%s - %s' % (name, info['artist']['trans'])
  324         if info['artist']['alias']:
  325             name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
  326 
  327         entries = [
  328             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  329                             'NetEaseMusic', song['id'])
  330             for song in info['hotSongs']
  331         ]
  332         return self.playlist_result(entries, singer_id, name)
  333 
  334 
  335 class NetEaseMusicListIE(NetEaseMusicBaseIE):
  336     IE_NAME = 'netease:playlist'
  337     IE_DESC = '网易云音乐 - 歌单'
  338     _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
  339     _TESTS = [{
  340         'url': 'http://music.163.com/#/playlist?id=79177352',
  341         'info_dict': {
  342             'id': '79177352',
  343             'title': 'Billboard 2007 Top 100',
  344             'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
  345         },
  346         'playlist_count': 99,
  347         'skip': 'Blocked outside Mainland China',
  348     }, {
  349         'note': 'Toplist/Charts sample',
  350         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
  351         'info_dict': {
  352             'id': '3733003',
  353             'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
  354             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
  355         },
  356         'playlist_count': 50,
  357         'skip': 'Blocked outside Mainland China',
  358     }]
  359 
  360     def _real_extract(self, url):
  361         list_id = self._match_id(url)
  362 
  363         info = self.query_api(
  364             'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
  365             list_id, 'Downloading playlist data')['result']
  366 
  367         name = info['name']
  368         desc = info.get('description')
  369 
  370         if info.get('specialType') == 10:  # is a chart/toplist
  371             datestamp = datetime.fromtimestamp(
  372                 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
  373             name = '%s %s' % (name, datestamp)
  374 
  375         entries = [
  376             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  377                             'NetEaseMusic', song['id'])
  378             for song in info['tracks']
  379         ]
  380         return self.playlist_result(entries, list_id, name, desc)
  381 
  382 
  383 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
  384     IE_NAME = 'netease:mv'
  385     IE_DESC = '网易云音乐 - MV'
  386     _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
  387     _TEST = {
  388         'url': 'http://music.163.com/#/mv?id=415350',
  389         'info_dict': {
  390             'id': '415350',
  391             'ext': 'mp4',
  392             'title': '이럴거면 그러지말지',
  393             'description': '白雅言自作曲唱甜蜜爱情',
  394             'creator': '白雅言',
  395             'upload_date': '20150520',
  396         },
  397         'skip': 'Blocked outside Mainland China',
  398     }
  399 
  400     def _real_extract(self, url):
  401         mv_id = self._match_id(url)
  402 
  403         info = self.query_api(
  404             'mv/detail?id=%s&type=mp4' % mv_id,
  405             mv_id, 'Downloading mv info')['data']
  406 
  407         formats = [
  408             {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
  409             for brs, mv_url in info['brs'].items()
  410         ]
  411         self._sort_formats(formats)
  412 
  413         return {
  414             'id': mv_id,
  415             'title': info['name'],
  416             'description': info.get('desc') or info.get('briefDesc'),
  417             'creator': info['artistName'],
  418             'upload_date': info['publishTime'].replace('-', ''),
  419             'formats': formats,
  420             'thumbnail': info.get('cover'),
  421             'duration': self.convert_milliseconds(info.get('duration', 0)),
  422         }
  423 
  424 
  425 class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
  426     IE_NAME = 'netease:program'
  427     IE_DESC = '网易云音乐 - 电台节目'
  428     _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
  429     _TESTS = [{
  430         'url': 'http://music.163.com/#/program?id=10109055',
  431         'info_dict': {
  432             'id': '10109055',
  433             'ext': 'mp3',
  434             'title': '不丹足球背后的故事',
  435             'description': '喜马拉雅人的足球梦 ...',
  436             'creator': '大话西藏',
  437             'timestamp': 1434179342,
  438             'upload_date': '20150613',
  439             'duration': 900,
  440         },
  441         'skip': 'Blocked outside Mainland China',
  442     }, {
  443         'note': 'This program has accompanying songs.',
  444         'url': 'http://music.163.com/#/program?id=10141022',
  445         'info_dict': {
  446             'id': '10141022',
  447             'title': '25岁,你是自在如风的少年<27°C>',
  448             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  449         },
  450         'playlist_count': 4,
  451         'skip': 'Blocked outside Mainland China',
  452     }, {
  453         'note': 'This program has accompanying songs.',
  454         'url': 'http://music.163.com/#/program?id=10141022',
  455         'info_dict': {
  456             'id': '10141022',
  457             'ext': 'mp3',
  458             'title': '25岁,你是自在如风的少年<27°C>',
  459             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  460             'timestamp': 1434450841,
  461             'upload_date': '20150616',
  462         },
  463         'params': {
  464             'noplaylist': True
  465         },
  466         'skip': 'Blocked outside Mainland China',
  467     }]
  468 
  469     def _real_extract(self, url):
  470         program_id = self._match_id(url)
  471 
  472         info = self.query_api(
  473             'dj/program/detail?id=%s' % program_id,
  474             program_id, 'Downloading program info')['program']
  475 
  476         name = info['name']
  477         description = info['description']
  478 
  479         if not info['songs'] or self._downloader.params.get('noplaylist'):
  480             if info['songs']:
  481                 self.to_screen(
  482                     'Downloading just the main audio %s because of --no-playlist'
  483                     % info['mainSong']['id'])
  484 
  485             formats = self.extract_formats(info['mainSong'])
  486             self._sort_formats(formats)
  487 
  488             return {
  489                 'id': program_id,
  490                 'title': name,
  491                 'description': description,
  492                 'creator': info['dj']['brand'],
  493                 'timestamp': self.convert_milliseconds(info['createTime']),
  494                 'thumbnail': info['coverUrl'],
  495                 'duration': self.convert_milliseconds(info.get('duration', 0)),
  496                 'formats': formats,
  497             }
  498 
  499         self.to_screen(
  500             'Downloading playlist %s - add --no-playlist to just download the main audio %s'
  501             % (program_id, info['mainSong']['id']))
  502 
  503         song_ids = [info['mainSong']['id']]
  504         song_ids.extend([song['id'] for song in info['songs']])
  505         entries = [
  506             self.url_result('http://music.163.com/#/song?id=%s' % song_id,
  507                             'NetEaseMusic', song_id)
  508             for song_id in song_ids
  509         ]
  510         return self.playlist_result(entries, program_id, name, description)
  511 
  512 
  513 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
  514     IE_NAME = 'netease:djradio'
  515     IE_DESC = '网易云音乐 - 电台'
  516     _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
  517     _TEST = {
  518         'url': 'http://music.163.com/#/djradio?id=42',
  519         'info_dict': {
  520             'id': '42',
  521             'title': '声音蔓延',
  522             'description': 'md5:766220985cbd16fdd552f64c578a6b15'
  523         },
  524         'playlist_mincount': 40,
  525         'skip': 'Blocked outside Mainland China',
  526     }
  527     _PAGE_SIZE = 1000
  528 
  529     def _real_extract(self, url):
  530         dj_id = self._match_id(url)
  531 
  532         name = None
  533         desc = None
  534         entries = []
  535         for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
  536             info = self.query_api(
  537                 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
  538                 % (self._PAGE_SIZE, dj_id, offset),
  539                 dj_id, 'Downloading dj programs - %d' % offset)
  540 
  541             entries.extend([
  542                 self.url_result(
  543                     'http://music.163.com/#/program?id=%s' % program['id'],
  544                     'NetEaseMusicProgram', program['id'])
  545                 for program in info['programs']
  546             ])
  547 
  548             if name is None:
  549                 radio = info['programs'][0]['radio']
  550                 name = radio['name']
  551                 desc = radio['desc']
  552 
  553             if not info['more']:
  554                 break
  555 
  556         return self.playlist_result(entries, dj_id, name, desc)

Generated by cgit