summaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/vimeo.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/vimeo.py')
-rw-r--r--youtube_dl/extractor/vimeo.py102
1 files changed, 73 insertions, 29 deletions
diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py
index 2e98b0e6f..6af705657 100644
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -21,12 +21,12 @@ from ..utils import (
sanitized_Request,
smuggle_url,
std_headers,
- unified_strdate,
+ try_get,
+ unified_timestamp,
unsmuggle_url,
urlencode_postdata,
unescapeHTML,
parse_filesize,
- try_get,
)
@@ -142,10 +142,25 @@ class VimeoBaseInfoExtractor(InfoExtractor):
note='Downloading %s m3u8 information' % cdn_name,
fatal=False))
elif files_type == 'dash':
- formats.extend(self._extract_mpd_formats(
- manifest_url.replace('/master.json', '/master.mpd'), video_id, format_id,
- 'Downloading %s MPD information' % cdn_name,
- fatal=False))
+ mpd_pattern = r'/%s/(?:sep/)?video/' % video_id
+ mpd_manifest_urls = []
+ if re.search(mpd_pattern, manifest_url):
+ for suffix, repl in (('', 'video'), ('_sep', 'sep/video')):
+ mpd_manifest_urls.append((format_id + suffix, re.sub(
+ mpd_pattern, '/%s/%s/' % (video_id, repl), manifest_url)))
+ else:
+ mpd_manifest_urls = [(format_id, manifest_url)]
+ for f_id, m_url in mpd_manifest_urls:
+ mpd_formats = self._extract_mpd_formats(
+ m_url.replace('/master.json', '/master.mpd'), video_id, f_id,
+ 'Downloading %s MPD information' % cdn_name,
+ fatal=False)
+ for f in mpd_formats:
+ if f.get('vcodec') == 'none':
+ f['preference'] = -50
+ elif f.get('acodec') == 'none':
+ f['preference'] = -40
+ formats.extend(mpd_formats)
subtitles = {}
text_tracks = config['request'].get('text_tracks')
@@ -204,11 +219,13 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",
'description': 'md5:2d3305bad981a06ff79f027f19865021',
+ 'timestamp': 1355990239,
'upload_date': '20121220',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user7108434',
'uploader_id': 'user7108434',
'uploader': 'Filippo Valsorda',
'duration': 10,
+ 'license': 'by-sa',
},
},
{
@@ -249,6 +266,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'id': '68375962',
'ext': 'mp4',
'title': 'youtube-dl password protected test video',
+ 'timestamp': 1371200155,
'upload_date': '20130614',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/user18948128',
'uploader_id': 'user18948128',
@@ -271,7 +289,8 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/atencio',
'uploader_id': 'atencio',
'uploader': 'Peter Atencio',
- 'upload_date': '20130927',
+ 'timestamp': 1380339469,
+ 'upload_date': '20130928',
'duration': 187,
},
},
@@ -283,6 +302,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'ext': 'mp4',
'title': 'The New Vimeo Player (You Know, For Videos)',
'description': 'md5:2ec900bf97c3f389378a96aee11260ea',
+ 'timestamp': 1381846109,
'upload_date': '20131015',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/staff',
'uploader_id': 'staff',
@@ -314,6 +334,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'The DMCI',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/dmci',
'uploader_id': 'dmci',
+ 'timestamp': 1324343742,
'upload_date': '20111220',
'description': 'md5:ae23671e82d05415868f7ad1aec21147',
},
@@ -329,6 +350,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'uploader': 'Casey Donahue',
'uploader_url': r're:https?://(?:www\.)?vimeo\.com/caseydonahue',
'uploader_id': 'caseydonahue',
+ 'timestamp': 1250886430,
'upload_date': '20090821',
'description': 'md5:bdbf314014e58713e6e5b66eb252f4a6',
},
@@ -338,7 +360,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
'expected_warnings': ['Unable to download JSON metadata'],
},
{
- # redirects to ondemand extractor and should be passed throught it
+ # redirects to ondemand extractor and should be passed through it
# for successful extraction
'url': 'https://vimeo.com/73445910',
'info_dict': {
@@ -390,7 +412,7 @@ class VimeoIE(VimeoBaseInfoExtractor):
urls = []
# Look for embedded (iframe) Vimeo player
for mobj in re.finditer(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/.+?)\1',
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//player\.vimeo\.com/video/\d+.*?)\1',
webpage):
urls.append(VimeoIE._smuggle_referrer(unescapeHTML(mobj.group('url')), url))
PLAIN_EMBED_RE = (
@@ -446,11 +468,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
request = sanitized_Request(url, headers=headers)
try:
webpage, urlh = self._download_webpage_handle(request, video_id)
+ redirect_url = compat_str(urlh.geturl())
# Some URLs redirect to ondemand can't be extracted with
# this extractor right away thus should be passed through
# ondemand extractor (e.g. https://vimeo.com/73445910)
- if VimeoOndemandIE.suitable(urlh.geturl()):
- return self.url_result(urlh.geturl(), VimeoOndemandIE.ie_key())
+ if VimeoOndemandIE.suitable(redirect_url):
+ return self.url_result(redirect_url, VimeoOndemandIE.ie_key())
except ExtractorError as ee:
if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403:
errmsg = ee.cause.read()
@@ -477,6 +500,9 @@ class VimeoIE(VimeoBaseInfoExtractor):
'%s said: %s' % (self.IE_NAME, seed_status['title']),
expected=True)
+ cc_license = None
+ timestamp = None
+
# Extract the config JSON
try:
try:
@@ -490,8 +516,12 @@ class VimeoIE(VimeoBaseInfoExtractor):
vimeo_clip_page_config = self._search_regex(
r'vimeo\.clip_page_config\s*=\s*({.+?});', webpage,
'vimeo clip page config')
- config_url = self._parse_json(
- vimeo_clip_page_config, video_id)['player']['config_url']
+ page_config = self._parse_json(vimeo_clip_page_config, video_id)
+ config_url = page_config['player']['config_url']
+ cc_license = page_config.get('cc_license')
+ timestamp = try_get(
+ page_config, lambda x: x['clip']['uploaded_on'],
+ compat_str)
config_json = self._download_webpage(config_url, video_id)
config = json.loads(config_json)
except RegexNotFoundError:
@@ -512,15 +542,15 @@ class VimeoIE(VimeoBaseInfoExtractor):
if re.search(r'<form[^>]+?id="pw_form"', webpage) is not None:
if '_video_password_verified' in data:
raise ExtractorError('video password verification failed!')
- self._verify_video_password(url, video_id, webpage)
+ self._verify_video_password(redirect_url, video_id, webpage)
return self._real_extract(
- smuggle_url(url, {'_video_password_verified': 'verified'}))
+ smuggle_url(redirect_url, {'_video_password_verified': 'verified'}))
else:
raise ExtractorError('Unable to extract info section',
cause=e)
else:
if config.get('view') == 4:
- config = self._verify_player_video_password(url, video_id)
+ config = self._verify_player_video_password(redirect_url, video_id)
def is_rented():
if '>You rented this title.<' in webpage:
@@ -560,10 +590,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
self._downloader.report_warning('Cannot find video description')
# Extract upload date
- video_upload_date = None
- mobj = re.search(r'<time[^>]+datetime="([^"]+)"', webpage)
- if mobj is not None:
- video_upload_date = unified_strdate(mobj.group(1))
+ if not timestamp:
+ timestamp = self._search_regex(
+ r'<time[^>]+datetime="([^"]+)"', webpage,
+ 'timestamp', default=None)
try:
view_count = int(self._search_regex(r'UserPlays:(\d+)', webpage, 'view count'))
@@ -586,7 +616,10 @@ class VimeoIE(VimeoBaseInfoExtractor):
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
source_name = source_file.get('public_name', 'Original')
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
- ext = source_file.get('extension', determine_ext(download_url)).lower()
+ ext = (try_get(
+ source_file, lambda x: x['extension'],
+ compat_str) or determine_ext(
+ download_url, None) or 'mp4').lower()
formats.append({
'url': download_url,
'ext': ext,
@@ -600,15 +633,22 @@ class VimeoIE(VimeoBaseInfoExtractor):
info_dict = self._parse_config(config, video_id)
formats.extend(info_dict['formats'])
self._vimeo_sort_formats(formats)
+
+ if not cc_license:
+ cc_license = self._search_regex(
+ r'<link[^>]+rel=["\']license["\'][^>]+href=(["\'])(?P<license>(?:(?!\1).)+)\1',
+ webpage, 'license', default=None, group='license')
+
info_dict.update({
'id': video_id,
'formats': formats,
- 'upload_date': video_upload_date,
+ 'timestamp': unified_timestamp(timestamp),
'description': video_description,
'webpage_url': url,
'view_count': view_count,
'like_count': like_count,
'comment_count': comment_count,
+ 'license': cc_license,
})
return info_dict
@@ -730,12 +770,12 @@ class VimeoChannelIE(VimeoBaseInfoExtractor):
# Try extracting href first since not all videos are available via
# short https://vimeo.com/id URL (e.g. https://vimeo.com/channels/tributes/6213729)
clips = re.findall(
- r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)', webpage)
+ r'id="clip_(\d+)"[^>]*>\s*<a[^>]+href="(/(?:[^/]+/)*\1)(?:[^>]+\btitle="([^"]+)")?', webpage)
if clips:
- for video_id, video_url in clips:
+ for video_id, video_url, video_title in clips:
yield self.url_result(
compat_urlparse.urljoin(base_url, video_url),
- VimeoIE.ie_key(), video_id=video_id)
+ VimeoIE.ie_key(), video_id=video_id, video_title=video_title)
# More relaxed fallback
else:
for video_id in re.findall(r'id=["\']clip_(\d+)', webpage):
@@ -884,10 +924,14 @@ class VimeoReviewIE(VimeoBaseInfoExtractor):
def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
webpage = self._download_webpage(webpage_url, video_id)
- data = self._parse_json(self._search_regex(
- r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
- default=NO_DEFAULT if video_password_verified else '{}'), video_id)
- config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
+ config_url = self._html_search_regex(
+ r'data-config-url=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
+ 'config URL', default=None, group='url')
+ if not config_url:
+ data = self._parse_json(self._search_regex(
+ r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
+ default=NO_DEFAULT if video_password_verified else '{}'), video_id)
+ config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
if config_url is None:
self._verify_video_password(webpage_url, video_id, webpage)
config_url = self._get_config_url(

Generated by cgit