summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/neteasemusic.py
blob: 5e5c6271bbedeb8aa9e35dc0977426be8322d425 (plain)
    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 from base64 import b64encode
    5 from binascii import hexlify
    6 from datetime import datetime
    7 from hashlib import md5
    8 from random import randint
    9 import json
   10 import re
   11 import time
   12 
   13 from .common import InfoExtractor
   14 from ..aes import aes_ecb_encrypt, pkcs7_padding
   15 from ..compat import (
   16     compat_urllib_parse_urlencode,
   17     compat_str,
   18     compat_itertools_count,
   19 )
   20 from ..utils import (
   21     ExtractorError,
   22     bytes_to_intlist,
   23     error_to_compat_str,
   24     float_or_none,
   25     int_or_none,
   26     intlist_to_bytes,
   27     sanitized_Request,
   28     std_headers,
   29     try_get,
   30 )
   31 
   32 
   33 class NetEaseMusicBaseIE(InfoExtractor):
   34     _FORMATS = ['bMusic', 'mMusic', 'hMusic']
   35     _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
   36     _API_BASE = 'http://music.163.com/api/'
   37 
   38     @classmethod
   39     def _encrypt(cls, dfsid):
   40         salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
   41         string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
   42         salt_len = len(salt_bytes)
   43         for i in range(len(string_bytes)):
   44             string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
   45         m = md5()
   46         m.update(bytes(string_bytes))
   47         result = b64encode(m.digest()).decode('ascii')
   48         return result.replace('/', '_').replace('+', '-')
   49 
   50     @classmethod
   51     def make_player_api_request_data_and_headers(cls, song_id, bitrate):
   52         KEY = b'e82ckenh8dichen8'
   53         URL = '/api/song/enhance/player/url'
   54         now = int(time.time() * 1000)
   55         rand = randint(0, 1000)
   56         cookie = {
   57             'osver': None,
   58             'deviceId': None,
   59             'appver': '8.0.0',
   60             'versioncode': '140',
   61             'mobilename': None,
   62             'buildver': '1623435496',
   63             'resolution': '1920x1080',
   64             '__csrf': '',
   65             'os': 'pc',
   66             'channel': None,
   67             'requestId': '{0}_{1:04}'.format(now, rand),
   68         }
   69         request_text = json.dumps(
   70             {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
   71             separators=(',', ':'))
   72         message = 'nobody{0}use{1}md5forencrypt'.format(
   73             URL, request_text).encode('latin1')
   74         msg_digest = md5(message).hexdigest()
   75 
   76         data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
   77             URL, request_text, msg_digest)
   78         data = pkcs7_padding(bytes_to_intlist(data))
   79         encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
   80         encrypted_params = hexlify(encrypted).decode('ascii').upper()
   81 
   82         cookie = '; '.join(
   83             ['{0}={1}'.format(k, v if v is not None else 'undefined')
   84              for [k, v] in cookie.items()])
   85 
   86         headers = {
   87             'User-Agent': std_headers['User-Agent'],
   88             'Content-Type': 'application/x-www-form-urlencoded',
   89             'Referer': 'https://music.163.com',
   90             'Cookie': cookie,
   91         }
   92         return ('params={0}'.format(encrypted_params), headers)
   93 
   94     def _call_player_api(self, song_id, bitrate):
   95         url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
   96         data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
   97         try:
   98             msg = 'empty result'
   99             result = self._download_json(
  100                 url, song_id, data=data.encode('ascii'), headers=headers)
  101             if result:
  102                 return result
  103         except ExtractorError as e:
  104             if type(e.cause) in (ValueError, TypeError):
  105                 # JSON load failure
  106                 raise
  107         except Exception as e:
  108             msg = error_to_compat_str(e)
  109             self.report_warning('%s API call (%s) failed: %s' % (
  110                 song_id, bitrate, msg))
  111         return {}
  112 
  113     def extract_formats(self, info):
  114         err = 0
  115         formats = []
  116         song_id = info['id']
  117         for song_format in self._FORMATS:
  118             details = info.get(song_format)
  119             if not details:
  120                 continue
  121 
  122             bitrate = int_or_none(details.get('bitrate')) or 999000
  123             data = self._call_player_api(song_id, bitrate)
  124             for song in try_get(data, lambda x: x['data'], list) or []:
  125                 song_url = try_get(song, lambda x: x['url'])
  126                 if not song_url:
  127                     continue
  128                 if self._is_valid_url(song_url, info['id'], 'song'):
  129                     formats.append({
  130                         'url': song_url,
  131                         'ext': details.get('extension'),
  132                         'abr': float_or_none(song.get('br'), scale=1000),
  133                         'format_id': song_format,
  134                         'filesize': int_or_none(song.get('size')),
  135                         'asr': int_or_none(details.get('sr')),
  136                     })
  137                 elif err == 0:
  138                     err = try_get(song, lambda x: x['code'], int)
  139 
  140         if not formats:
  141             msg = 'No media links found'
  142             if err != 0 and (err < 200 or err >= 400):
  143                 raise ExtractorError(
  144                     '%s (site code %d)' % (msg, err, ), expected=True)
  145             else:
  146                 self.raise_geo_restricted(
  147                     msg + ': probably this video is not available from your location due to geo restriction.',
  148                     countries=['CN'])
  149 
  150         return formats
  151 
  152     @classmethod
  153     def convert_milliseconds(cls, ms):
  154         return int(round(ms / 1000.0))
  155 
  156     def query_api(self, endpoint, video_id, note):
  157         req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
  158         req.add_header('Referer', self._API_BASE)
  159         return self._download_json(req, video_id, note)
  160 
  161 
  162 class NetEaseMusicIE(NetEaseMusicBaseIE):
  163     IE_NAME = 'netease:song'
  164     IE_DESC = '网易云音乐'
  165     _VALID_URL = r'https?://(y\.)?music\.163\.com/(?:[#m]/)?song\?.*?\bid=(?P<id>[0-9]+)'
  166     _TESTS = [{
  167         'url': 'http://music.163.com/#/song?id=32102397',
  168         'md5': '3e909614ce09b1ccef4a3eb205441190',
  169         'info_dict': {
  170             'id': '32102397',
  171             'ext': 'mp3',
  172             'title': 'Bad Blood',
  173             'creator': 'Taylor Swift / Kendrick Lamar',
  174             'upload_date': '20150516',
  175             'timestamp': 1431792000,
  176             'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
  177         },
  178     }, {
  179         'note': 'No lyrics.',
  180         'url': 'http://music.163.com/song?id=17241424',
  181         'info_dict': {
  182             'id': '17241424',
  183             'ext': 'mp3',
  184             'title': 'Opus 28',
  185             'creator': 'Dustin O\'Halloran',
  186             'upload_date': '20080211',
  187             'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
  188             'timestamp': 1202745600,
  189         },
  190     }, {
  191         'note': 'Has translated name.',
  192         'url': 'http://music.163.com/#/song?id=22735043',
  193         'info_dict': {
  194             'id': '22735043',
  195             'ext': 'mp3',
  196             'title': '소원을 말해봐 (Genie)',
  197             'creator': '少女时代',
  198             'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
  199             'upload_date': '20100127',
  200             'timestamp': 1264608000,
  201             'alt_title': '说出愿望吧(Genie)',
  202         },
  203     }, {
  204         'url': 'https://y.music.163.com/m/song?app_version=8.8.45&id=95670&uct2=sKnvS4+0YStsWkqsPhFijw%3D%3D&dlt=0846',
  205         'md5': '95826c73ea50b1c288b22180ec9e754d',
  206         'info_dict': {
  207             'id': '95670',
  208             'ext': 'mp3',
  209             'title': '国际歌',
  210             'creator': '马备',
  211             'upload_date': '19911130',
  212             'timestamp': 691516800,
  213             'description': 'md5:1ba2f911a2b0aa398479f595224f2141',
  214         },
  215     }]
  216 
  217     def _process_lyrics(self, lyrics_info):
  218         original = lyrics_info.get('lrc', {}).get('lyric')
  219         translated = lyrics_info.get('tlyric', {}).get('lyric')
  220 
  221         if not translated:
  222             return original
  223 
  224         lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
  225         original_ts_texts = re.findall(lyrics_expr, original)
  226         translation_ts_dict = dict(
  227             (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
  228         )
  229         lyrics = '\n'.join([
  230             '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
  231             for time_stamp, text in original_ts_texts
  232         ])
  233         return lyrics
  234 
  235     def _real_extract(self, url):
  236         song_id = self._match_id(url)
  237 
  238         params = {
  239             'id': song_id,
  240             'ids': '[%s]' % song_id
  241         }
  242         info = self.query_api(
  243             'song/detail?' + compat_urllib_parse_urlencode(params),
  244             song_id, 'Downloading song info')['songs'][0]
  245 
  246         formats = self.extract_formats(info)
  247         self._sort_formats(formats)
  248 
  249         lyrics_info = self.query_api(
  250             'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
  251             song_id, 'Downloading lyrics data')
  252         lyrics = self._process_lyrics(lyrics_info)
  253 
  254         alt_title = None
  255         if info.get('transNames'):
  256             alt_title = '/'.join(info.get('transNames'))
  257 
  258         return {
  259             'id': song_id,
  260             'title': info['name'],
  261             'alt_title': alt_title,
  262             'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
  263             'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
  264             'thumbnail': info.get('album', {}).get('picUrl'),
  265             'duration': self.convert_milliseconds(info.get('duration', 0)),
  266             'description': lyrics,
  267             'formats': formats,
  268         }
  269 
  270 
  271 class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
  272     IE_NAME = 'netease:album'
  273     IE_DESC = '网易云音乐 - 专辑'
  274     _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
  275     _TEST = {
  276         'url': 'http://music.163.com/#/album?id=220780',
  277         'info_dict': {
  278             'id': '220780',
  279             'title': 'B\'day',
  280         },
  281         'playlist_count': 23,
  282         'skip': 'Blocked outside Mainland China',
  283     }
  284 
  285     def _real_extract(self, url):
  286         album_id = self._match_id(url)
  287 
  288         info = self.query_api(
  289             'album/%s?id=%s' % (album_id, album_id),
  290             album_id, 'Downloading album data')['album']
  291 
  292         name = info['name']
  293         desc = info.get('description')
  294         entries = [
  295             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  296                             'NetEaseMusic', song['id'])
  297             for song in info['songs']
  298         ]
  299         return self.playlist_result(entries, album_id, name, desc)
  300 
  301 
  302 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
  303     IE_NAME = 'netease:singer'
  304     IE_DESC = '网易云音乐 - 歌手'
  305     _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
  306     _TESTS = [{
  307         'note': 'Singer has aliases.',
  308         'url': 'http://music.163.com/#/artist?id=10559',
  309         'info_dict': {
  310             'id': '10559',
  311             'title': '张惠妹 - aMEI;阿密特',
  312         },
  313         'playlist_count': 50,
  314         'skip': 'Blocked outside Mainland China',
  315     }, {
  316         'note': 'Singer has translated name.',
  317         'url': 'http://music.163.com/#/artist?id=124098',
  318         'info_dict': {
  319             'id': '124098',
  320             'title': '李昇基 - 이승기',
  321         },
  322         'playlist_count': 50,
  323         'skip': 'Blocked outside Mainland China',
  324     }]
  325 
  326     def _real_extract(self, url):
  327         singer_id = self._match_id(url)
  328 
  329         info = self.query_api(
  330             'artist/%s?id=%s' % (singer_id, singer_id),
  331             singer_id, 'Downloading singer data')
  332 
  333         name = info['artist']['name']
  334         if info['artist']['trans']:
  335             name = '%s - %s' % (name, info['artist']['trans'])
  336         if info['artist']['alias']:
  337             name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
  338 
  339         entries = [
  340             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  341                             'NetEaseMusic', song['id'])
  342             for song in info['hotSongs']
  343         ]
  344         return self.playlist_result(entries, singer_id, name)
  345 
  346 
  347 class NetEaseMusicListIE(NetEaseMusicBaseIE):
  348     IE_NAME = 'netease:playlist'
  349     IE_DESC = '网易云音乐 - 歌单'
  350     _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
  351     _TESTS = [{
  352         'url': 'http://music.163.com/#/playlist?id=79177352',
  353         'info_dict': {
  354             'id': '79177352',
  355             'title': 'Billboard 2007 Top 100',
  356             'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
  357         },
  358         'playlist_count': 99,
  359         'skip': 'Blocked outside Mainland China',
  360     }, {
  361         'note': 'Toplist/Charts sample',
  362         'url': 'http://music.163.com/#/discover/toplist?id=3733003',
  363         'info_dict': {
  364             'id': '3733003',
  365             'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
  366             'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
  367         },
  368         'playlist_count': 50,
  369         'skip': 'Blocked outside Mainland China',
  370     }]
  371 
  372     def _real_extract(self, url):
  373         list_id = self._match_id(url)
  374 
  375         info = self.query_api(
  376             'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
  377             list_id, 'Downloading playlist data')['result']
  378 
  379         name = info['name']
  380         desc = info.get('description')
  381 
  382         if info.get('specialType') == 10:  # is a chart/toplist
  383             datestamp = datetime.fromtimestamp(
  384                 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
  385             name = '%s %s' % (name, datestamp)
  386 
  387         entries = [
  388             self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
  389                             'NetEaseMusic', song['id'])
  390             for song in info['tracks']
  391         ]
  392         return self.playlist_result(entries, list_id, name, desc)
  393 
  394 
  395 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
  396     IE_NAME = 'netease:mv'
  397     IE_DESC = '网易云音乐 - MV'
  398     _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
  399     _TEST = {
  400         'url': 'http://music.163.com/#/mv?id=415350',
  401         'info_dict': {
  402             'id': '415350',
  403             'ext': 'mp4',
  404             'title': '이럴거면 그러지말지',
  405             'description': '白雅言自作曲唱甜蜜爱情',
  406             'creator': '白雅言',
  407             'upload_date': '20150520',
  408         },
  409         'skip': 'Blocked outside Mainland China',
  410     }
  411 
  412     def _real_extract(self, url):
  413         mv_id = self._match_id(url)
  414 
  415         info = self.query_api(
  416             'mv/detail?id=%s&type=mp4' % mv_id,
  417             mv_id, 'Downloading mv info')['data']
  418 
  419         formats = [
  420             {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
  421             for brs, mv_url in info['brs'].items()
  422         ]
  423         self._sort_formats(formats)
  424 
  425         return {
  426             'id': mv_id,
  427             'title': info['name'],
  428             'description': info.get('desc') or info.get('briefDesc'),
  429             'creator': info['artistName'],
  430             'upload_date': info['publishTime'].replace('-', ''),
  431             'formats': formats,
  432             'thumbnail': info.get('cover'),
  433             'duration': self.convert_milliseconds(info.get('duration', 0)),
  434         }
  435 
  436 
  437 class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
  438     IE_NAME = 'netease:program'
  439     IE_DESC = '网易云音乐 - 电台节目'
  440     _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
  441     _TESTS = [{
  442         'url': 'http://music.163.com/#/program?id=10109055',
  443         'info_dict': {
  444             'id': '10109055',
  445             'ext': 'mp3',
  446             'title': '不丹足球背后的故事',
  447             'description': '喜马拉雅人的足球梦 ...',
  448             'creator': '大话西藏',
  449             'timestamp': 1434179342,
  450             'upload_date': '20150613',
  451             'duration': 900,
  452         },
  453         'skip': 'Blocked outside Mainland China',
  454     }, {
  455         'note': 'This program has accompanying songs.',
  456         'url': 'http://music.163.com/#/program?id=10141022',
  457         'info_dict': {
  458             'id': '10141022',
  459             'title': '25岁,你是自在如风的少年<27°C>',
  460             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  461         },
  462         'playlist_count': 4,
  463         'skip': 'Blocked outside Mainland China',
  464     }, {
  465         'note': 'This program has accompanying songs.',
  466         'url': 'http://music.163.com/#/program?id=10141022',
  467         'info_dict': {
  468             'id': '10141022',
  469             'ext': 'mp3',
  470             'title': '25岁,你是自在如风的少年<27°C>',
  471             'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
  472             'timestamp': 1434450841,
  473             'upload_date': '20150616',
  474         },
  475         'params': {
  476             'noplaylist': True
  477         },
  478         'skip': 'Blocked outside Mainland China',
  479     }]
  480 
  481     def _real_extract(self, url):
  482         program_id = self._match_id(url)
  483 
  484         info = self.query_api(
  485             'dj/program/detail?id=%s' % program_id,
  486             program_id, 'Downloading program info')['program']
  487 
  488         name = info['name']
  489         description = info['description']
  490 
  491         if not info['songs'] or self._downloader.params.get('noplaylist'):
  492             if info['songs']:
  493                 self.to_screen(
  494                     'Downloading just the main audio %s because of --no-playlist'
  495                     % info['mainSong']['id'])
  496 
  497             formats = self.extract_formats(info['mainSong'])
  498             self._sort_formats(formats)
  499 
  500             return {
  501                 'id': program_id,
  502                 'title': name,
  503                 'description': description,
  504                 'creator': info['dj']['brand'],
  505                 'timestamp': self.convert_milliseconds(info['createTime']),
  506                 'thumbnail': info['coverUrl'],
  507                 'duration': self.convert_milliseconds(info.get('duration', 0)),
  508                 'formats': formats,
  509             }
  510 
  511         self.to_screen(
  512             'Downloading playlist %s - add --no-playlist to just download the main audio %s'
  513             % (program_id, info['mainSong']['id']))
  514 
  515         song_ids = [info['mainSong']['id']]
  516         song_ids.extend([song['id'] for song in info['songs']])
  517         entries = [
  518             self.url_result('http://music.163.com/#/song?id=%s' % song_id,
  519                             'NetEaseMusic', song_id)
  520             for song_id in song_ids
  521         ]
  522         return self.playlist_result(entries, program_id, name, description)
  523 
  524 
  525 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
  526     IE_NAME = 'netease:djradio'
  527     IE_DESC = '网易云音乐 - 电台'
  528     _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
  529     _TEST = {
  530         'url': 'http://music.163.com/#/djradio?id=42',
  531         'info_dict': {
  532             'id': '42',
  533             'title': '声音蔓延',
  534             'description': 'md5:766220985cbd16fdd552f64c578a6b15'
  535         },
  536         'playlist_mincount': 40,
  537         'skip': 'Blocked outside Mainland China',
  538     }
  539     _PAGE_SIZE = 1000
  540 
  541     def _real_extract(self, url):
  542         dj_id = self._match_id(url)
  543 
  544         name = None
  545         desc = None
  546         entries = []
  547         for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
  548             info = self.query_api(
  549                 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
  550                 % (self._PAGE_SIZE, dj_id, offset),
  551                 dj_id, 'Downloading dj programs - %d' % offset)
  552 
  553             entries.extend([
  554                 self.url_result(
  555                     'http://music.163.com/#/program?id=%s' % program['id'],
  556                     'NetEaseMusicProgram', program['id'])
  557                 for program in info['programs']
  558             ])
  559 
  560             if name is None:
  561                 radio = info['programs'][0]['radio']
  562                 name = radio['name']
  563                 desc = radio['desc']
  564 
  565             if not info['more']:
  566                 break
  567 
  568         return self.playlist_result(entries, dj_id, name, desc)

Generated by cgit