1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from base64 import b64encode
5 from binascii import hexlify
6 from datetime import datetime
7 from hashlib import md5
8 from random import randint
9 import json
10 import re
11 import time
12
13 from .common import InfoExtractor
14 from ..aes import aes_ecb_encrypt, pkcs7_padding
15 from ..compat import (
16 compat_urllib_parse_urlencode,
17 compat_str,
18 compat_itertools_count,
19 )
20 from ..utils import (
21 ExtractorError,
22 bytes_to_intlist,
23 float_or_none,
24 int_or_none,
25 intlist_to_bytes,
26 sanitized_Request,
27 std_headers,
28 try_get,
29 )
30
31
32 class NetEaseMusicBaseIE(InfoExtractor):
33 _FORMATS = ['bMusic', 'mMusic', 'hMusic']
34 _NETEASE_SALT = '3go8&$8*3*3h0k(2)2'
35 _API_BASE = 'http://music.163.com/api/'
36
37 @classmethod
38 def _encrypt(cls, dfsid):
39 salt_bytes = bytearray(cls._NETEASE_SALT.encode('utf-8'))
40 string_bytes = bytearray(compat_str(dfsid).encode('ascii'))
41 salt_len = len(salt_bytes)
42 for i in range(len(string_bytes)):
43 string_bytes[i] = string_bytes[i] ^ salt_bytes[i % salt_len]
44 m = md5()
45 m.update(bytes(string_bytes))
46 result = b64encode(m.digest()).decode('ascii')
47 return result.replace('/', '_').replace('+', '-')
48
49 @classmethod
50 def make_player_api_request_data_and_headers(cls, song_id, bitrate):
51 KEY = b'e82ckenh8dichen8'
52 URL = '/api/song/enhance/player/url'
53 now = int(time.time() * 1000)
54 rand = randint(0, 1000)
55 cookie = {
56 'osver': None,
57 'deviceId': None,
58 'appver': '8.0.0',
59 'versioncode': '140',
60 'mobilename': None,
61 'buildver': '1623435496',
62 'resolution': '1920x1080',
63 '__csrf': '',
64 'os': 'pc',
65 'channel': None,
66 'requestId': '{0}_{1:04}'.format(now, rand),
67 }
68 request_text = json.dumps(
69 {'ids': '[{0}]'.format(song_id), 'br': bitrate, 'header': cookie},
70 separators=(',', ':'))
71 message = 'nobody{0}use{1}md5forencrypt'.format(
72 URL, request_text).encode('latin1')
73 msg_digest = md5(message).hexdigest()
74
75 data = '{0}-36cd479b6b5-{1}-36cd479b6b5-{2}'.format(
76 URL, request_text, msg_digest)
77 data = pkcs7_padding(bytes_to_intlist(data))
78 encrypted = intlist_to_bytes(aes_ecb_encrypt(data, bytes_to_intlist(KEY)))
79 encrypted_params = hexlify(encrypted).decode('ascii').upper()
80
81 cookie = '; '.join(
82 ['{0}={1}'.format(k, v if v is not None else 'undefined')
83 for [k, v] in cookie.items()])
84
85 headers = {
86 'User-Agent': std_headers['User-Agent'],
87 'Content-Type': 'application/x-www-form-urlencoded',
88 'Referer': 'https://music.163.com',
89 'Cookie': cookie,
90 }
91 return ('params={0}'.format(encrypted_params), headers)
92
93 def _call_player_api(self, song_id, bitrate):
94 url = 'https://interface3.music.163.com/eapi/song/enhance/player/url'
95 data, headers = self.make_player_api_request_data_and_headers(song_id, bitrate)
96 try:
97 return self._download_json(
98 url, song_id, data=data.encode('ascii'), headers=headers)
99 except ExtractorError as e:
100 if type(e.cause) in (ValueError, TypeError):
101 # JSON load failure
102 raise
103 except Exception:
104 pass
105 return {}
106
107 def extract_formats(self, info):
108 formats = []
109 song_id = info['id']
110 for song_format in self._FORMATS:
111 details = info.get(song_format)
112 if not details:
113 continue
114
115 bitrate = int_or_none(details.get('bitrate')) or 999000
116 data = self._call_player_api(song_id, bitrate)
117 for song in try_get(data, lambda x: x['data'], list) or []:
118 song_url = try_get(song, lambda x: x['url'])
119 if self._is_valid_url(song_url, info['id'], 'song'):
120 formats.append({
121 'url': song_url,
122 'ext': details.get('extension'),
123 'abr': float_or_none(song.get('br'), scale=1000),
124 'format_id': song_format,
125 'filesize': int_or_none(song.get('size')),
126 'asr': int_or_none(details.get('sr')),
127 })
128 return formats
129
130 @classmethod
131 def convert_milliseconds(cls, ms):
132 return int(round(ms / 1000.0))
133
134 def query_api(self, endpoint, video_id, note):
135 req = sanitized_Request('%s%s' % (self._API_BASE, endpoint))
136 req.add_header('Referer', self._API_BASE)
137 return self._download_json(req, video_id, note)
138
139
140 class NetEaseMusicIE(NetEaseMusicBaseIE):
141 IE_NAME = 'netease:song'
142 IE_DESC = '网易云音乐'
143 _VALID_URL = r'https?://music\.163\.com/(#/)?song\?id=(?P<id>[0-9]+)'
144 _TESTS = [{
145 'url': 'http://music.163.com/#/song?id=32102397',
146 'md5': '3e909614ce09b1ccef4a3eb205441190',
147 'info_dict': {
148 'id': '32102397',
149 'ext': 'mp3',
150 'title': 'Bad Blood',
151 'creator': 'Taylor Swift / Kendrick Lamar',
152 'upload_date': '20150516',
153 'timestamp': 1431792000,
154 'description': 'md5:25fc5f27e47aad975aa6d36382c7833c',
155 },
156 }, {
157 'note': 'No lyrics.',
158 'url': 'http://music.163.com/song?id=17241424',
159 'info_dict': {
160 'id': '17241424',
161 'ext': 'mp3',
162 'title': 'Opus 28',
163 'creator': 'Dustin O\'Halloran',
164 'upload_date': '20080211',
165 'description': 'md5:f12945b0f6e0365e3b73c5032e1b0ff4',
166 'timestamp': 1202745600,
167 },
168 }, {
169 'note': 'Has translated name.',
170 'url': 'http://music.163.com/#/song?id=22735043',
171 'info_dict': {
172 'id': '22735043',
173 'ext': 'mp3',
174 'title': '소원을 말해봐 (Genie)',
175 'creator': '少女时代',
176 'description': 'md5:79d99cc560e4ca97e0c4d86800ee4184',
177 'upload_date': '20100127',
178 'timestamp': 1264608000,
179 'alt_title': '说出愿望吧(Genie)',
180 },
181 }]
182
183 def _process_lyrics(self, lyrics_info):
184 original = lyrics_info.get('lrc', {}).get('lyric')
185 translated = lyrics_info.get('tlyric', {}).get('lyric')
186
187 if not translated:
188 return original
189
190 lyrics_expr = r'(\[[0-9]{2}:[0-9]{2}\.[0-9]{2,}\])([^\n]+)'
191 original_ts_texts = re.findall(lyrics_expr, original)
192 translation_ts_dict = dict(
193 (time_stamp, text) for time_stamp, text in re.findall(lyrics_expr, translated)
194 )
195 lyrics = '\n'.join([
196 '%s%s / %s' % (time_stamp, text, translation_ts_dict.get(time_stamp, ''))
197 for time_stamp, text in original_ts_texts
198 ])
199 return lyrics
200
201 def _real_extract(self, url):
202 song_id = self._match_id(url)
203
204 params = {
205 'id': song_id,
206 'ids': '[%s]' % song_id
207 }
208 info = self.query_api(
209 'song/detail?' + compat_urllib_parse_urlencode(params),
210 song_id, 'Downloading song info')['songs'][0]
211
212 formats = self.extract_formats(info)
213 self._sort_formats(formats)
214
215 lyrics_info = self.query_api(
216 'song/lyric?id=%s&lv=-1&tv=-1' % song_id,
217 song_id, 'Downloading lyrics data')
218 lyrics = self._process_lyrics(lyrics_info)
219
220 alt_title = None
221 if info.get('transNames'):
222 alt_title = '/'.join(info.get('transNames'))
223
224 return {
225 'id': song_id,
226 'title': info['name'],
227 'alt_title': alt_title,
228 'creator': ' / '.join([artist['name'] for artist in info.get('artists', [])]),
229 'timestamp': self.convert_milliseconds(info.get('album', {}).get('publishTime')),
230 'thumbnail': info.get('album', {}).get('picUrl'),
231 'duration': self.convert_milliseconds(info.get('duration', 0)),
232 'description': lyrics,
233 'formats': formats,
234 }
235
236
237 class NetEaseMusicAlbumIE(NetEaseMusicBaseIE):
238 IE_NAME = 'netease:album'
239 IE_DESC = '网易云音乐 - 专辑'
240 _VALID_URL = r'https?://music\.163\.com/(#/)?album\?id=(?P<id>[0-9]+)'
241 _TEST = {
242 'url': 'http://music.163.com/#/album?id=220780',
243 'info_dict': {
244 'id': '220780',
245 'title': 'B\'day',
246 },
247 'playlist_count': 23,
248 'skip': 'Blocked outside Mainland China',
249 }
250
251 def _real_extract(self, url):
252 album_id = self._match_id(url)
253
254 info = self.query_api(
255 'album/%s?id=%s' % (album_id, album_id),
256 album_id, 'Downloading album data')['album']
257
258 name = info['name']
259 desc = info.get('description')
260 entries = [
261 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
262 'NetEaseMusic', song['id'])
263 for song in info['songs']
264 ]
265 return self.playlist_result(entries, album_id, name, desc)
266
267
268 class NetEaseMusicSingerIE(NetEaseMusicBaseIE):
269 IE_NAME = 'netease:singer'
270 IE_DESC = '网易云音乐 - 歌手'
271 _VALID_URL = r'https?://music\.163\.com/(#/)?artist\?id=(?P<id>[0-9]+)'
272 _TESTS = [{
273 'note': 'Singer has aliases.',
274 'url': 'http://music.163.com/#/artist?id=10559',
275 'info_dict': {
276 'id': '10559',
277 'title': '张惠妹 - aMEI;阿密特',
278 },
279 'playlist_count': 50,
280 'skip': 'Blocked outside Mainland China',
281 }, {
282 'note': 'Singer has translated name.',
283 'url': 'http://music.163.com/#/artist?id=124098',
284 'info_dict': {
285 'id': '124098',
286 'title': '李昇基 - 이승기',
287 },
288 'playlist_count': 50,
289 'skip': 'Blocked outside Mainland China',
290 }]
291
292 def _real_extract(self, url):
293 singer_id = self._match_id(url)
294
295 info = self.query_api(
296 'artist/%s?id=%s' % (singer_id, singer_id),
297 singer_id, 'Downloading singer data')
298
299 name = info['artist']['name']
300 if info['artist']['trans']:
301 name = '%s - %s' % (name, info['artist']['trans'])
302 if info['artist']['alias']:
303 name = '%s - %s' % (name, ';'.join(info['artist']['alias']))
304
305 entries = [
306 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
307 'NetEaseMusic', song['id'])
308 for song in info['hotSongs']
309 ]
310 return self.playlist_result(entries, singer_id, name)
311
312
313 class NetEaseMusicListIE(NetEaseMusicBaseIE):
314 IE_NAME = 'netease:playlist'
315 IE_DESC = '网易云音乐 - 歌单'
316 _VALID_URL = r'https?://music\.163\.com/(#/)?(playlist|discover/toplist)\?id=(?P<id>[0-9]+)'
317 _TESTS = [{
318 'url': 'http://music.163.com/#/playlist?id=79177352',
319 'info_dict': {
320 'id': '79177352',
321 'title': 'Billboard 2007 Top 100',
322 'description': 'md5:12fd0819cab2965b9583ace0f8b7b022'
323 },
324 'playlist_count': 99,
325 'skip': 'Blocked outside Mainland China',
326 }, {
327 'note': 'Toplist/Charts sample',
328 'url': 'http://music.163.com/#/discover/toplist?id=3733003',
329 'info_dict': {
330 'id': '3733003',
331 'title': 're:韩国Melon排行榜周榜 [0-9]{4}-[0-9]{2}-[0-9]{2}',
332 'description': 'md5:73ec782a612711cadc7872d9c1e134fc',
333 },
334 'playlist_count': 50,
335 'skip': 'Blocked outside Mainland China',
336 }]
337
338 def _real_extract(self, url):
339 list_id = self._match_id(url)
340
341 info = self.query_api(
342 'playlist/detail?id=%s&lv=-1&tv=-1' % list_id,
343 list_id, 'Downloading playlist data')['result']
344
345 name = info['name']
346 desc = info.get('description')
347
348 if info.get('specialType') == 10: # is a chart/toplist
349 datestamp = datetime.fromtimestamp(
350 self.convert_milliseconds(info['updateTime'])).strftime('%Y-%m-%d')
351 name = '%s %s' % (name, datestamp)
352
353 entries = [
354 self.url_result('http://music.163.com/#/song?id=%s' % song['id'],
355 'NetEaseMusic', song['id'])
356 for song in info['tracks']
357 ]
358 return self.playlist_result(entries, list_id, name, desc)
359
360
361 class NetEaseMusicMvIE(NetEaseMusicBaseIE):
362 IE_NAME = 'netease:mv'
363 IE_DESC = '网易云音乐 - MV'
364 _VALID_URL = r'https?://music\.163\.com/(#/)?mv\?id=(?P<id>[0-9]+)'
365 _TEST = {
366 'url': 'http://music.163.com/#/mv?id=415350',
367 'info_dict': {
368 'id': '415350',
369 'ext': 'mp4',
370 'title': '이럴거면 그러지말지',
371 'description': '白雅言自作曲唱甜蜜爱情',
372 'creator': '白雅言',
373 'upload_date': '20150520',
374 },
375 'skip': 'Blocked outside Mainland China',
376 }
377
378 def _real_extract(self, url):
379 mv_id = self._match_id(url)
380
381 info = self.query_api(
382 'mv/detail?id=%s&type=mp4' % mv_id,
383 mv_id, 'Downloading mv info')['data']
384
385 formats = [
386 {'url': mv_url, 'ext': 'mp4', 'format_id': '%sp' % brs, 'height': int(brs)}
387 for brs, mv_url in info['brs'].items()
388 ]
389 self._sort_formats(formats)
390
391 return {
392 'id': mv_id,
393 'title': info['name'],
394 'description': info.get('desc') or info.get('briefDesc'),
395 'creator': info['artistName'],
396 'upload_date': info['publishTime'].replace('-', ''),
397 'formats': formats,
398 'thumbnail': info.get('cover'),
399 'duration': self.convert_milliseconds(info.get('duration', 0)),
400 }
401
402
403 class NetEaseMusicProgramIE(NetEaseMusicBaseIE):
404 IE_NAME = 'netease:program'
405 IE_DESC = '网易云音乐 - 电台节目'
406 _VALID_URL = r'https?://music\.163\.com/(#/?)program\?id=(?P<id>[0-9]+)'
407 _TESTS = [{
408 'url': 'http://music.163.com/#/program?id=10109055',
409 'info_dict': {
410 'id': '10109055',
411 'ext': 'mp3',
412 'title': '不丹足球背后的故事',
413 'description': '喜马拉雅人的足球梦 ...',
414 'creator': '大话西藏',
415 'timestamp': 1434179342,
416 'upload_date': '20150613',
417 'duration': 900,
418 },
419 'skip': 'Blocked outside Mainland China',
420 }, {
421 'note': 'This program has accompanying songs.',
422 'url': 'http://music.163.com/#/program?id=10141022',
423 'info_dict': {
424 'id': '10141022',
425 'title': '25岁,你是自在如风的少年<27°C>',
426 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
427 },
428 'playlist_count': 4,
429 'skip': 'Blocked outside Mainland China',
430 }, {
431 'note': 'This program has accompanying songs.',
432 'url': 'http://music.163.com/#/program?id=10141022',
433 'info_dict': {
434 'id': '10141022',
435 'ext': 'mp3',
436 'title': '25岁,你是自在如风的少年<27°C>',
437 'description': 'md5:8d594db46cc3e6509107ede70a4aaa3b',
438 'timestamp': 1434450841,
439 'upload_date': '20150616',
440 },
441 'params': {
442 'noplaylist': True
443 },
444 'skip': 'Blocked outside Mainland China',
445 }]
446
447 def _real_extract(self, url):
448 program_id = self._match_id(url)
449
450 info = self.query_api(
451 'dj/program/detail?id=%s' % program_id,
452 program_id, 'Downloading program info')['program']
453
454 name = info['name']
455 description = info['description']
456
457 if not info['songs'] or self._downloader.params.get('noplaylist'):
458 if info['songs']:
459 self.to_screen(
460 'Downloading just the main audio %s because of --no-playlist'
461 % info['mainSong']['id'])
462
463 formats = self.extract_formats(info['mainSong'])
464 self._sort_formats(formats)
465
466 return {
467 'id': program_id,
468 'title': name,
469 'description': description,
470 'creator': info['dj']['brand'],
471 'timestamp': self.convert_milliseconds(info['createTime']),
472 'thumbnail': info['coverUrl'],
473 'duration': self.convert_milliseconds(info.get('duration', 0)),
474 'formats': formats,
475 }
476
477 self.to_screen(
478 'Downloading playlist %s - add --no-playlist to just download the main audio %s'
479 % (program_id, info['mainSong']['id']))
480
481 song_ids = [info['mainSong']['id']]
482 song_ids.extend([song['id'] for song in info['songs']])
483 entries = [
484 self.url_result('http://music.163.com/#/song?id=%s' % song_id,
485 'NetEaseMusic', song_id)
486 for song_id in song_ids
487 ]
488 return self.playlist_result(entries, program_id, name, description)
489
490
491 class NetEaseMusicDjRadioIE(NetEaseMusicBaseIE):
492 IE_NAME = 'netease:djradio'
493 IE_DESC = '网易云音乐 - 电台'
494 _VALID_URL = r'https?://music\.163\.com/(#/)?djradio\?id=(?P<id>[0-9]+)'
495 _TEST = {
496 'url': 'http://music.163.com/#/djradio?id=42',
497 'info_dict': {
498 'id': '42',
499 'title': '声音蔓延',
500 'description': 'md5:766220985cbd16fdd552f64c578a6b15'
501 },
502 'playlist_mincount': 40,
503 'skip': 'Blocked outside Mainland China',
504 }
505 _PAGE_SIZE = 1000
506
507 def _real_extract(self, url):
508 dj_id = self._match_id(url)
509
510 name = None
511 desc = None
512 entries = []
513 for offset in compat_itertools_count(start=0, step=self._PAGE_SIZE):
514 info = self.query_api(
515 'dj/program/byradio?asc=false&limit=%d&radioId=%s&offset=%d'
516 % (self._PAGE_SIZE, dj_id, offset),
517 dj_id, 'Downloading dj programs - %d' % offset)
518
519 entries.extend([
520 self.url_result(
521 'http://music.163.com/#/program?id=%s' % program['id'],
522 'NetEaseMusicProgram', program['id'])
523 for program in info['programs']
524 ])
525
526 if name is None:
527 radio = info['programs'][0]['radio']
528 name = radio['name']
529 desc = radio['desc']
530
531 if not info['more']:
532 break
533
534 return self.playlist_result(entries, dj_id, name, desc)
|