youtube_dl/extractor/soundgasm.py



    1 # coding: utf-8
    2 from __future__ import unicode_literals
    3 
    4 import re
    5 
    6 from .common import InfoExtractor
    7 
    8 
    9 class SoundgasmIE(InfoExtractor):
   10     IE_NAME = 'soundgasm'
   11     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<user>[0-9a-zA-Z_-]+)/(?P<display_id>[0-9a-zA-Z_-]+)'
   12     _TEST = {
   13         'url': 'http://soundgasm.net/u/ytdl/Piano-sample',
   14         'md5': '010082a2c802c5275bb00030743e75ad',
   15         'info_dict': {
   16             'id': '88abd86ea000cafe98f96321b23cc1206cbcbcc9',
   17             'ext': 'm4a',
   18             'title': 'Piano sample',
   19             'description': 'Royalty Free Sample Music',
   20             'uploader': 'ytdl',
   21         }
   22     }
   23 
   24     def _real_extract(self, url):
   25         mobj = re.match(self._VALID_URL, url)
   26         display_id = mobj.group('display_id')
   27 
   28         webpage = self._download_webpage(url, display_id)
   29 
   30         audio_url = self._html_search_regex(
   31             r'(?s)m4a\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
   32             'audio URL', group='url')
   33 
   34         title = self._search_regex(
   35             r'<div[^>]+\bclass=["\']jp-title[^>]+>([^<]+)',
   36             webpage, 'title', default=display_id)
   37 
   38         description = self._html_search_regex(
   39             (r'(?s)<div[^>]+\bclass=["\']jp-description[^>]+>(.+?)</div>',
   40              r'(?s)<li>Description:\s(.*?)<\/li>'),
   41             webpage, 'description', fatal=False)
   42 
   43         audio_id = self._search_regex(
   44             r'/([^/]+)\.m4a', audio_url, 'audio id', default=display_id)
   45 
   46         return {
   47             'id': audio_id,
   48             'display_id': display_id,
   49             'url': audio_url,
   50             'vcodec': 'none',
   51             'title': title,
   52             'description': description,
   53             'uploader': mobj.group('user'),
   54         }
   55 
   56 
   57 class SoundgasmProfileIE(InfoExtractor):
   58     IE_NAME = 'soundgasm:profile'
   59     _VALID_URL = r'https?://(?:www\.)?soundgasm\.net/u/(?P<id>[^/]+)/?(?:\#.*)?$'
   60     _TEST = {
   61         'url': 'http://soundgasm.net/u/ytdl',
   62         'info_dict': {
   63             'id': 'ytdl',
   64         },
   65         'playlist_count': 1,
   66     }
   67 
   68     def _real_extract(self, url):
   69         profile_id = self._match_id(url)
   70 
   71         webpage = self._download_webpage(url, profile_id)
   72 
   73         entries = [
   74             self.url_result(audio_url, 'Soundgasm')
   75             for audio_url in re.findall(r'href="([^"]+/u/%s/[^"]+)' % profile_id, webpage)]
   76 
   77         return self.playlist_result(entries, profile_id)