youtube_dl/extractor/eighttracks.py



    1 import itertools
    2 import json
    3 import random
    4 import re
    5 
    6 from .common import InfoExtractor
    7 from ..utils import (
    8     ExtractorError,
    9 )
   10 
   11 
   12 class EightTracksIE(InfoExtractor):
   13     IE_NAME = '8tracks'
   14     _VALID_URL = r'https?://8tracks.com/(?P<user>[^/]+)/(?P<id>[^/#]+)(?:#.*)?$'
   15     _TEST = {
   16         u"name": u"EightTracks",
   17         u"url": u"http://8tracks.com/ytdl/youtube-dl-test-tracks-a",
   18         u"playlist": [
   19             {
   20                 u"file": u"11885610.m4a",
   21                 u"md5": u"96ce57f24389fc8734ce47f4c1abcc55",
   22                 u"info_dict": {
   23                     u"title": u"youtue-dl project<>\"' - youtube-dl test track 1 \"'/\\\u00e4\u21ad",
   24                     u"uploader_id": u"ytdl"
   25                 }
   26             },
   27             {
   28                 u"file": u"11885608.m4a",
   29                 u"md5": u"4ab26f05c1f7291ea460a3920be8021f",
   30                 u"info_dict": {
   31                     u"title": u"youtube-dl project - youtube-dl test track 2 \"'/\\\u00e4\u21ad",
   32                     u"uploader_id": u"ytdl"
   33                 }
   34             },
   35             {
   36                 u"file": u"11885679.m4a",
   37                 u"md5": u"d30b5b5f74217410f4689605c35d1fd7",
   38                 u"info_dict": {
   39                     u"title": u"youtube-dl project as well - youtube-dl test track 3 \"'/\\\u00e4\u21ad",
   40                     u"uploader_id": u"ytdl"
   41                 }
   42             },
   43             {
   44                 u"file": u"11885680.m4a",
   45                 u"md5": u"4eb0a669317cd725f6bbd336a29f923a",
   46                 u"info_dict": {
   47                     u"title": u"youtube-dl project as well - youtube-dl test track 4 \"'/\\\u00e4\u21ad",
   48                     u"uploader_id": u"ytdl"
   49                 }
   50             },
   51             {
   52                 u"file": u"11885682.m4a",
   53                 u"md5": u"1893e872e263a2705558d1d319ad19e8",
   54                 u"info_dict": {
   55                     u"title": u"PH - youtube-dl test track 5 \"'/\\\u00e4\u21ad",
   56                     u"uploader_id": u"ytdl"
   57                 }
   58             },
   59             {
   60                 u"file": u"11885683.m4a",
   61                 u"md5": u"b673c46f47a216ab1741ae8836af5899",
   62                 u"info_dict": {
   63                     u"title": u"PH - youtube-dl test track 6 \"'/\\\u00e4\u21ad",
   64                     u"uploader_id": u"ytdl"
   65                 }
   66             },
   67             {
   68                 u"file": u"11885684.m4a",
   69                 u"md5": u"1d74534e95df54986da7f5abf7d842b7",
   70                 u"info_dict": {
   71                     u"title": u"phihag - youtube-dl test track 7 \"'/\\\u00e4\u21ad",
   72                     u"uploader_id": u"ytdl"
   73                 }
   74             },
   75             {
   76                 u"file": u"11885685.m4a",
   77                 u"md5": u"f081f47af8f6ae782ed131d38b9cd1c0",
   78                 u"info_dict": {
   79                     u"title": u"phihag - youtube-dl test track 8 \"'/\\\u00e4\u21ad",
   80                     u"uploader_id": u"ytdl"
   81                 }
   82             }
   83         ]
   84     }
   85 
   86 
   87     def _real_extract(self, url):
   88         mobj = re.match(self._VALID_URL, url)
   89         if mobj is None:
   90             raise ExtractorError(u'Invalid URL: %s' % url)
   91         playlist_id = mobj.group('id')
   92 
   93         webpage = self._download_webpage(url, playlist_id)
   94 
   95         json_like = self._search_regex(r"PAGE.mix = (.*?);\n", webpage, u'trax information', flags=re.DOTALL)
   96         data = json.loads(json_like)
   97 
   98         session = str(random.randint(0, 1000000000))
   99         mix_id = data['id']
  100         track_count = data['tracks_count']
  101         first_url = 'http://8tracks.com/sets/%s/play?player=sm&mix_id=%s&format=jsonh' % (session, mix_id)
  102         next_url = first_url
  103         res = []
  104         for i in itertools.count():
  105             api_json = self._download_webpage(next_url, playlist_id,
  106                 note=u'Downloading song information %s/%s' % (str(i+1), track_count),
  107                 errnote=u'Failed to download song information')
  108             api_data = json.loads(api_json)
  109             track_data = api_data[u'set']['track']
  110             info = {
  111                 'id': track_data['id'],
  112                 'url': track_data['track_file_stream_url'],
  113                 'title': track_data['performer'] + u' - ' + track_data['name'],
  114                 'raw_title': track_data['name'],
  115                 'uploader_id': data['user']['login'],
  116                 'ext': 'm4a',
  117             }
  118             res.append(info)
  119             if api_data['set']['at_last_track']:
  120                 break
  121             next_url = 'http://8tracks.com/sets/%s/next?player=sm&mix_id=%s&format=jsonh&track_id=%s' % (session, mix_id, track_data['id'])
  122         return res