1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import absolute_import, unicode_literals
5
6 import collections
7 import contextlib
8 import copy
9 import datetime
10 import errno
11 import fileinput
12 import io
13 import itertools
14 import json
15 import locale
16 import operator
17 import os
18 import platform
19 import re
20 import shutil
21 import subprocess
22 import socket
23 import sys
24 import time
25 import tokenize
26 import traceback
27 import random
28
29 from string import ascii_letters
30
31 from .compat import (
32 compat_basestring,
33 compat_cookiejar,
34 compat_get_terminal_size,
35 compat_http_client,
36 compat_kwargs,
37 compat_numeric_types,
38 compat_os_name,
39 compat_str,
40 compat_tokenize_tokenize,
41 compat_urllib_error,
42 compat_urllib_request,
43 compat_urllib_request_DataHandler,
44 )
45 from .utils import (
46 age_restricted,
47 args_to_str,
48 ContentTooShortError,
49 date_from_str,
50 DateRange,
51 DEFAULT_OUTTMPL,
52 determine_ext,
53 determine_protocol,
54 DownloadError,
55 encode_compat_str,
56 encodeFilename,
57 error_to_compat_str,
58 expand_path,
59 ExtractorError,
60 format_bytes,
61 formatSeconds,
62 GeoRestrictedError,
63 int_or_none,
64 ISO3166Utils,
65 locked_file,
66 make_HTTPS_handler,
67 MaxDownloadsReached,
68 orderedSet,
69 PagedList,
70 parse_filesize,
71 PerRequestProxyHandler,
72 platform_name,
73 PostProcessingError,
74 preferredencoding,
75 prepend_extension,
76 process_communicate_or_kill,
77 register_socks_protocols,
78 render_table,
79 replace_extension,
80 SameFileError,
81 sanitize_filename,
82 sanitize_path,
83 sanitize_url,
84 sanitized_Request,
85 std_headers,
86 str_or_none,
87 subtitles_filename,
88 UnavailableVideoError,
89 url_basename,
90 version_tuple,
91 write_json_file,
92 write_string,
93 YoutubeDLCookieJar,
94 YoutubeDLCookieProcessor,
95 YoutubeDLHandler,
96 YoutubeDLRedirectHandler,
97 )
98 from .cache import Cache
99 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
100 from .extractor.openload import PhantomJSwrapper
101 from .downloader import get_suitable_downloader
102 from .downloader.rtmp import rtmpdump_version
103 from .postprocessor import (
104 FFmpegFixupM3u8PP,
105 FFmpegFixupM4aPP,
106 FFmpegFixupStretchedPP,
107 FFmpegMergerPP,
108 FFmpegPostProcessor,
109 get_postprocessor,
110 )
111 from .version import __version__
112
113 if compat_os_name == 'nt':
114 import ctypes
115
116
117 class YoutubeDL(object):
118 """YoutubeDL class.
119
120 YoutubeDL objects are the ones responsible of downloading the
121 actual video file and writing it to disk if the user has requested
122 it, among some other tasks. In most cases there should be one per
123 program. As, given a video URL, the downloader doesn't know how to
124 extract all the needed information, task that InfoExtractors do, it
125 has to pass the URL to one of them.
126
127 For this, YoutubeDL objects have a method that allows
128 InfoExtractors to be registered in a given order. When it is passed
129 a URL, the YoutubeDL object handles it to the first InfoExtractor it
130 finds that reports being able to handle it. The InfoExtractor extracts
131 all the information about the video or videos the URL refers to, and
132 YoutubeDL process the extracted information, possibly using a File
133 Downloader to download the video.
134
135 YoutubeDL objects accept a lot of parameters. In order not to saturate
136 the object constructor with arguments, it receives a dictionary of
137 options instead. These options are available through the params
138 attribute for the InfoExtractors to use. The YoutubeDL also
139 registers itself as the downloader in charge for the InfoExtractors
140 that are added to it, so this is a "mutual registration".
141
142 Available options:
143
144 username: Username for authentication purposes.
145 password: Password for authentication purposes.
146 videopassword: Password for accessing a video.
147 ap_mso: Adobe Pass multiple-system operator identifier.
148 ap_username: Multiple-system operator account username.
149 ap_password: Multiple-system operator account password.
150 usenetrc: Use netrc for authentication instead.
151 verbose: Print additional info to stdout.
152 quiet: Do not print messages to stdout.
153 no_warnings: Do not print out anything for warnings.
154 forceurl: Force printing final URL.
155 forcetitle: Force printing title.
156 forceid: Force printing ID.
157 forcethumbnail: Force printing thumbnail URL.
158 forcedescription: Force printing description.
159 forcefilename: Force printing final filename.
160 forceduration: Force printing duration.
161 forcejson: Force printing info_dict as JSON.
162 dump_single_json: Force printing the info_dict of the whole playlist
163 (or video) as a single JSON line.
164 simulate: Do not download the video files.
165 format: Video format code. See options.py for more information.
166 outtmpl: Template for output names.
167 outtmpl_na_placeholder: Placeholder for unavailable meta fields.
168 restrictfilenames: Do not allow "&" and spaces in file names
169 ignoreerrors: Do not stop on download errors.
170 force_generic_extractor: Force downloader to use the generic extractor
171 nooverwrites: Prevent overwriting files.
172 playliststart: Playlist item to start at.
173 playlistend: Playlist item to end at.
174 playlist_items: Specific indices of playlist to download.
175 playlistreverse: Download playlist items in reverse order.
176 playlistrandom: Download playlist items in random order.
177 matchtitle: Download only matching titles.
178 rejecttitle: Reject downloads for matching titles.
179 logger: Log messages to a logging.Logger instance.
180 logtostderr: Log messages to stderr instead of stdout.
181 writedescription: Write the video description to a .description file
182 writeinfojson: Write the video description to a .info.json file
183 writeannotations: Write the video annotations to a .annotations.xml file
184 writethumbnail: Write the thumbnail image to a file
185 write_all_thumbnails: Write all thumbnail formats to files
186 writesubtitles: Write the video subtitles to a file
187 writeautomaticsub: Write the automatically generated subtitles to a file
188 allsubtitles: Downloads all the subtitles of the video
189 (requires writesubtitles or writeautomaticsub)
190 listsubtitles: Lists all available subtitles for the video
191 subtitlesformat: The format code for subtitles
192 subtitleslangs: List of languages of the subtitles to download
193 keepvideo: Keep the video file after post-processing
194 daterange: A DateRange object, download only if the upload_date is in the range.
195 skip_download: Skip the actual download of the video file
196 cachedir: Location of the cache files in the filesystem.
197 False to disable filesystem cache.
198 noplaylist: Download single video instead of a playlist if in doubt.
199 age_limit: An integer representing the user's age in years.
200 Unsuitable videos for the given age are skipped.
201 min_views: An integer representing the minimum view count the video
202 must have in order to not be skipped.
203 Videos without view count information are always
204 downloaded. None for no limit.
205 max_views: An integer representing the maximum view count.
206 Videos that are more popular than that are not
207 downloaded.
208 Videos without view count information are always
209 downloaded. None for no limit.
210 download_archive: File name of a file where all downloads are recorded.
211 Videos already present in the file are not downloaded
212 again.
213 cookiefile: File name where cookies should be read from and dumped to.
214 nocheckcertificate:Do not verify SSL certificates
215 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
216 At the moment, this is only supported by YouTube.
217 proxy: URL of the proxy server to use
218 geo_verification_proxy: URL of the proxy to use for IP address verification
219 on geo-restricted sites.
220 socket_timeout: Time to wait for unresponsive hosts, in seconds
221 bidi_workaround: Work around buggy terminals without bidirectional text
222 support, using fridibi
223 debug_printtraffic:Print out sent and received HTTP traffic
224 include_ads: Download ads as well
225 default_search: Prepend this string if an input url is not valid.
226 'auto' for elaborate guessing
227 encoding: Use this encoding instead of the system-specified.
228 extract_flat: Do not resolve URLs, return the immediate result.
229 Pass in 'in_playlist' to only show this behavior for
230 playlist items.
231 postprocessors: A list of dictionaries, each with an entry
232 * key: The name of the postprocessor. See
233 youtube_dl/postprocessor/__init__.py for a list.
234 as well as any further keyword arguments for the
235 postprocessor.
236 progress_hooks: A list of functions that get called on download
237 progress, with a dictionary with the entries
238 * status: One of "downloading", "error", or "finished".
239 Check this first and ignore unknown values.
240
241 If status is one of "downloading", or "finished", the
242 following properties may also be present:
243 * filename: The final filename (always present)
244 * tmpfilename: The filename we're currently writing to
245 * downloaded_bytes: Bytes on disk
246 * total_bytes: Size of the whole file, None if unknown
247 * total_bytes_estimate: Guess of the eventual file size,
248 None if unavailable.
249 * elapsed: The number of seconds since download started.
250 * eta: The estimated time in seconds, None if unknown
251 * speed: The download speed in bytes/second, None if
252 unknown
253 * fragment_index: The counter of the currently
254 downloaded video fragment.
255 * fragment_count: The number of fragments (= individual
256 files that will be merged)
257
258 Progress hooks are guaranteed to be called at least once
259 (with status "finished") if the download is successful.
260 merge_output_format: Extension to use when merging formats.
261 fixup: Automatically correct known faults of the file.
262 One of:
263 - "never": do nothing
264 - "warn": only emit a warning
265 - "detect_or_warn": check whether we can do anything
266 about it, warn otherwise (default)
267 source_address: Client-side IP address to bind to.
268 call_home: Boolean, true iff we are allowed to contact the
269 youtube-dl servers for debugging.
270 sleep_interval: Number of seconds to sleep before each download when
271 used alone or a lower bound of a range for randomized
272 sleep before each download (minimum possible number
273 of seconds to sleep) when used along with
274 max_sleep_interval.
275 max_sleep_interval:Upper bound of a range for randomized sleep before each
276 download (maximum possible number of seconds to sleep).
277 Must only be used along with sleep_interval.
278 Actual sleep time will be a random float from range
279 [sleep_interval; max_sleep_interval].
280 listformats: Print an overview of available video formats and exit.
281 list_thumbnails: Print a table of all thumbnails and exit.
282 match_filter: A function that gets called with the info_dict of
283 every video.
284 If it returns a message, the video is ignored.
285 If it returns None, the video is downloaded.
286 match_filter_func in utils.py is one example for this.
287 no_color: Do not emit color codes in output.
288 geo_bypass: Bypass geographic restriction via faking X-Forwarded-For
289 HTTP header
290 geo_bypass_country:
291 Two-letter ISO 3166-2 country code that will be used for
292 explicit geographic restriction bypassing via faking
293 X-Forwarded-For HTTP header
294 geo_bypass_ip_block:
295 IP range in CIDR notation that will be used similarly to
296 geo_bypass_country
297
298 The following options determine which downloader is picked:
299 external_downloader: Executable of the external downloader to call.
300 None or unset for standard (built-in) downloader.
301 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
302 if True, otherwise use ffmpeg/avconv if False, otherwise
303 use downloader suggested by extractor if None.
304
305 The following parameters are not used by YoutubeDL itself, they are used by
306 the downloader (see youtube_dl/downloader/common.py):
307 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
308 noresizebuffer, retries, continuedl, noprogress, consoletitle,
309 xattr_set_filesize, external_downloader_args, hls_use_mpegts,
310 http_chunk_size.
311
312 The following options are used by the post processors:
313 prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
314 otherwise prefer ffmpeg.
315 ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
316 to the binary or its containing directory.
317 postprocessor_args: A list of additional command-line arguments for the
318 postprocessor.
319
320 The following options are used by the Youtube extractor:
321 youtube_include_dash_manifest: If True (default), DASH manifests and related
322 data will be downloaded and processed by extractor.
323 You can reduce network I/O by disabling it if you don't
324 care about DASH.
325 """
326
327 _NUMERIC_FIELDS = set((
328 'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
329 'timestamp', 'upload_year', 'upload_month', 'upload_day',
330 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
331 'average_rating', 'comment_count', 'age_limit',
332 'start_time', 'end_time',
333 'chapter_number', 'season_number', 'episode_number',
334 'track_number', 'disc_number', 'release_year',
335 'playlist_index',
336 ))
337
338 params = None
339 _ies = []
340 _pps = []
341 _download_retcode = None
342 _num_downloads = None
343 _playlist_level = 0
344 _playlist_urls = set()
345 _screen_file = None
346
347 def __init__(self, params=None, auto_init=True):
348 """Create a FileDownloader object with the given options."""
349 if params is None:
350 params = {}
351 self._ies = []
352 self._ies_instances = {}
353 self._pps = []
354 self._progress_hooks = []
355 self._download_retcode = 0
356 self._num_downloads = 0
357 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
358 self._err_file = sys.stderr
359 self.params = {
360 # Default parameters
361 'nocheckcertificate': False,
362 }
363 self.params.update(params)
364 self.cache = Cache(self)
365
366 def check_deprecated(param, option, suggestion):
367 if self.params.get(param) is not None:
368 self.report_warning(
369 '%s is deprecated. Use %s instead.' % (option, suggestion))
370 return True
371 return False
372
373 if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
374 if self.params.get('geo_verification_proxy') is None:
375 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
376
377 check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
378 check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
379 check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
380
381 if params.get('bidi_workaround', False):
382 try:
383 import pty
384 master, slave = pty.openpty()
385 width = compat_get_terminal_size().columns
386 if width is None:
387 width_args = []
388 else:
389 width_args = ['-w', str(width)]
390 sp_kwargs = dict(
391 stdin=subprocess.PIPE,
392 stdout=slave,
393 stderr=self._err_file)
394 try:
395 self._output_process = subprocess.Popen(
396 ['bidiv'] + width_args, **sp_kwargs
397 )
398 except OSError:
399 self._output_process = subprocess.Popen(
400 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
401 self._output_channel = os.fdopen(master, 'rb')
402 except OSError as ose:
403 if ose.errno == errno.ENOENT:
404 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
405 else:
406 raise
407
408 if (sys.platform != 'win32'
409 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
410 and not params.get('restrictfilenames', False)):
411 # Unicode filesystem API will throw errors (#1474, #13027)
412 self.report_warning(
413 'Assuming --restrict-filenames since file system encoding '
414 'cannot encode all characters. '
415 'Set the LC_ALL environment variable to fix this.')
416 self.params['restrictfilenames'] = True
417
418 if isinstance(params.get('outtmpl'), bytes):
419 self.report_warning(
420 'Parameter outtmpl is bytes, but should be a unicode string. '
421 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
422
423 self._setup_opener()
424
425 if auto_init:
426 self.print_debug_header()
427 self.add_default_info_extractors()
428
429 for pp_def_raw in self.params.get('postprocessors', []):
430 pp_class = get_postprocessor(pp_def_raw['key'])
431 pp_def = dict(pp_def_raw)
432 del pp_def['key']
433 pp = pp_class(self, **compat_kwargs(pp_def))
434 self.add_post_processor(pp)
435
436 for ph in self.params.get('progress_hooks', []):
437 self.add_progress_hook(ph)
438
439 register_socks_protocols()
440
441 def warn_if_short_id(self, argv):
442 # short YouTube ID starting with dash?
443 idxs = [
444 i for i, a in enumerate(argv)
445 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
446 if idxs:
447 correct_argv = (
448 ['youtube-dl']
449 + [a for i, a in enumerate(argv) if i not in idxs]
450 + ['--'] + [argv[i] for i in idxs]
451 )
452 self.report_warning(
453 'Long argument string detected. '
454 'Use -- to separate parameters and URLs, like this:\n%s\n' %
455 args_to_str(correct_argv))
456
457 def add_info_extractor(self, ie):
458 """Add an InfoExtractor object to the end of the list."""
459 self._ies.append(ie)
460 if not isinstance(ie, type):
461 self._ies_instances[ie.ie_key()] = ie
462 ie.set_downloader(self)
463
464 def get_info_extractor(self, ie_key):
465 """
466 Get an instance of an IE with name ie_key, it will try to get one from
467 the _ies list, if there's no instance it will create a new one and add
468 it to the extractor list.
469 """
470 ie = self._ies_instances.get(ie_key)
471 if ie is None:
472 ie = get_info_extractor(ie_key)()
473 self.add_info_extractor(ie)
474 return ie
475
476 def add_default_info_extractors(self):
477 """
478 Add the InfoExtractors returned by gen_extractors to the end of the list
479 """
480 for ie in gen_extractor_classes():
481 self.add_info_extractor(ie)
482
483 def add_post_processor(self, pp):
484 """Add a PostProcessor object to the end of the chain."""
485 self._pps.append(pp)
486 pp.set_downloader(self)
487
488 def add_progress_hook(self, ph):
489 """Add the progress hook (currently only for the file downloader)"""
490 self._progress_hooks.append(ph)
491
492 def _bidi_workaround(self, message):
493 if not hasattr(self, '_output_channel'):
494 return message
495
496 assert hasattr(self, '_output_process')
497 assert isinstance(message, compat_str)
498 line_count = message.count('\n') + 1
499 self._output_process.stdin.write((message + '\n').encode('utf-8'))
500 self._output_process.stdin.flush()
501 res = ''.join(self._output_channel.readline().decode('utf-8')
502 for _ in range(line_count))
503 return res[:-len('\n')]
504
505 def to_screen(self, message, skip_eol=False):
506 """Print message to stdout if not in quiet mode."""
507 return self.to_stdout(message, skip_eol, check_quiet=True)
508
509 def _write_string(self, s, out=None):
510 write_string(s, out=out, encoding=self.params.get('encoding'))
511
512 def to_stdout(self, message, skip_eol=False, check_quiet=False):
513 """Print message to stdout if not in quiet mode."""
514 if self.params.get('logger'):
515 self.params['logger'].debug(message)
516 elif not check_quiet or not self.params.get('quiet', False):
517 message = self._bidi_workaround(message)
518 terminator = ['\n', ''][skip_eol]
519 output = message + terminator
520
521 self._write_string(output, self._screen_file)
522
523 def to_stderr(self, message):
524 """Print message to stderr."""
525 assert isinstance(message, compat_str)
526 if self.params.get('logger'):
527 self.params['logger'].error(message)
528 else:
529 message = self._bidi_workaround(message)
530 output = message + '\n'
531 self._write_string(output, self._err_file)
532
533 def to_console_title(self, message):
534 if not self.params.get('consoletitle', False):
535 return
536 if compat_os_name == 'nt':
537 if ctypes.windll.kernel32.GetConsoleWindow():
538 # c_wchar_p() might not be necessary if `message` is
539 # already of type unicode()
540 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
541 elif 'TERM' in os.environ:
542 self._write_string('\033]0;%s\007' % message, self._screen_file)
543
544 def save_console_title(self):
545 if not self.params.get('consoletitle', False):
546 return
547 if self.params.get('simulate', False):
548 return
549 if compat_os_name != 'nt' and 'TERM' in os.environ:
550 # Save the title on stack
551 self._write_string('\033[22;0t', self._screen_file)
552
553 def restore_console_title(self):
554 if not self.params.get('consoletitle', False):
555 return
556 if self.params.get('simulate', False):
557 return
558 if compat_os_name != 'nt' and 'TERM' in os.environ:
559 # Restore the title from stack
560 self._write_string('\033[23;0t', self._screen_file)
561
562 def __enter__(self):
563 self.save_console_title()
564 return self
565
566 def __exit__(self, *args):
567 self.restore_console_title()
568
569 if self.params.get('cookiefile') is not None:
570 self.cookiejar.save(ignore_discard=True, ignore_expires=True)
571
572 def trouble(self, message=None, tb=None):
573 """Determine action to take when a download problem appears.
574
575 Depending on if the downloader has been configured to ignore
576 download errors or not, this method may throw an exception or
577 not when errors are found, after printing the message.
578
579 tb, if given, is additional traceback information.
580 """
581 if message is not None:
582 self.to_stderr(message)
583 if self.params.get('verbose'):
584 if tb is None:
585 if sys.exc_info()[0]: # if .trouble has been called from an except block
586 tb = ''
587 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
588 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
589 tb += encode_compat_str(traceback.format_exc())
590 else:
591 tb_data = traceback.format_list(traceback.extract_stack())
592 tb = ''.join(tb_data)
593 self.to_stderr(tb)
594 if not self.params.get('ignoreerrors', False):
595 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
596 exc_info = sys.exc_info()[1].exc_info
597 else:
598 exc_info = sys.exc_info()
599 raise DownloadError(message, exc_info)
600 self._download_retcode = 1
601
602 def report_warning(self, message):
603 '''
604 Print the message to stderr, it will be prefixed with 'WARNING:'
605 If stderr is a tty file the 'WARNING:' will be colored
606 '''
607 if self.params.get('logger') is not None:
608 self.params['logger'].warning(message)
609 else:
610 if self.params.get('no_warnings'):
611 return
612 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
613 _msg_header = '\033[0;33mWARNING:\033[0m'
614 else:
615 _msg_header = 'WARNING:'
616 warning_message = '%s %s' % (_msg_header, message)
617 self.to_stderr(warning_message)
618
619 def report_error(self, message, tb=None):
620 '''
621 Do the same as trouble, but prefixes the message with 'ERROR:', colored
622 in red if stderr is a tty file.
623 '''
624 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
625 _msg_header = '\033[0;31mERROR:\033[0m'
626 else:
627 _msg_header = 'ERROR:'
628 error_message = '%s %s' % (_msg_header, message)
629 self.trouble(error_message, tb)
630
631 def report_file_already_downloaded(self, file_name):
632 """Report file has already been fully downloaded."""
633 try:
634 self.to_screen('[download] %s has already been downloaded' % file_name)
635 except UnicodeEncodeError:
636 self.to_screen('[download] The file has already been downloaded')
637
638 def prepare_filename(self, info_dict):
639 """Generate the output filename."""
640 try:
641 template_dict = dict(info_dict)
642
643 template_dict['epoch'] = int(time.time())
644 autonumber_size = self.params.get('autonumber_size')
645 if autonumber_size is None:
646 autonumber_size = 5
647 template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
648 if template_dict.get('resolution') is None:
649 if template_dict.get('width') and template_dict.get('height'):
650 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
651 elif template_dict.get('height'):
652 template_dict['resolution'] = '%sp' % template_dict['height']
653 elif template_dict.get('width'):
654 template_dict['resolution'] = '%dx?' % template_dict['width']
655
656 sanitize = lambda k, v: sanitize_filename(
657 compat_str(v),
658 restricted=self.params.get('restrictfilenames'),
659 is_id=(k == 'id' or k.endswith('_id')))
660 template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
661 for k, v in template_dict.items()
662 if v is not None and not isinstance(v, (list, tuple, dict)))
663 template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
664
665 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
666
667 # For fields playlist_index and autonumber convert all occurrences
668 # of %(field)s to %(field)0Nd for backward compatibility
669 field_size_compat_map = {
670 'playlist_index': len(str(template_dict['n_entries'])),
671 'autonumber': autonumber_size,
672 }
673 FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
674 mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
675 if mobj:
676 outtmpl = re.sub(
677 FIELD_SIZE_COMPAT_RE,
678 r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
679 outtmpl)
680
681 # Missing numeric fields used together with integer presentation types
682 # in format specification will break the argument substitution since
683 # string NA placeholder is returned for missing fields. We will patch
684 # output template for missing fields to meet string presentation type.
685 for numeric_field in self._NUMERIC_FIELDS:
686 if numeric_field not in template_dict:
687 # As of [1] format syntax is:
688 # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
689 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
690 FORMAT_RE = r'''(?x)
691 (?<!%)
692 %
693 \({0}\) # mapping key
694 (?:[#0\-+ ]+)? # conversion flags (optional)
695 (?:\d+)? # minimum field width (optional)
696 (?:\.\d+)? # precision (optional)
697 [hlL]? # length modifier (optional)
698 [diouxXeEfFgGcrs%] # conversion type
699 '''
700 outtmpl = re.sub(
701 FORMAT_RE.format(numeric_field),
702 r'%({0})s'.format(numeric_field), outtmpl)
703
704 # expand_path translates '%%' into '%' and '$$' into '$'
705 # correspondingly that is not what we want since we need to keep
706 # '%%' intact for template dict substitution step. Working around
707 # with boundary-alike separator hack.
708 sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
709 outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
710
711 # outtmpl should be expand_path'ed before template dict substitution
712 # because meta fields may contain env variables we don't want to
713 # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
714 # title "Hello $PATH", we don't want `$PATH` to be expanded.
715 filename = expand_path(outtmpl).replace(sep, '') % template_dict
716
717 # Temporary fix for #4787
718 # 'Treat' all problem characters by passing filename through preferredencoding
719 # to workaround encoding issues with subprocess on python2 @ Windows
720 if sys.version_info < (3, 0) and sys.platform == 'win32':
721 filename = encodeFilename(filename, True).decode(preferredencoding())
722 return sanitize_path(filename)
723 except ValueError as err:
724 self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
725 return None
726
727 def _match_entry(self, info_dict, incomplete):
728 """ Returns None iff the file should be downloaded """
729
730 video_title = info_dict.get('title', info_dict.get('id', 'video'))
731 if 'title' in info_dict:
732 # This can happen when we're just evaluating the playlist
733 title = info_dict['title']
734 matchtitle = self.params.get('matchtitle', False)
735 if matchtitle:
736 if not re.search(matchtitle, title, re.IGNORECASE):
737 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
738 rejecttitle = self.params.get('rejecttitle', False)
739 if rejecttitle:
740 if re.search(rejecttitle, title, re.IGNORECASE):
741 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
742 date = info_dict.get('upload_date')
743 if date is not None:
744 dateRange = self.params.get('daterange', DateRange())
745 if date not in dateRange:
746 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
747 view_count = info_dict.get('view_count')
748 if view_count is not None:
749 min_views = self.params.get('min_views')
750 if min_views is not None and view_count < min_views:
751 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
752 max_views = self.params.get('max_views')
753 if max_views is not None and view_count > max_views:
754 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
755 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
756 return 'Skipping "%s" because it is age restricted' % video_title
757 if self.in_download_archive(info_dict):
758 return '%s has already been recorded in archive' % video_title
759
760 if not incomplete:
761 match_filter = self.params.get('match_filter')
762 if match_filter is not None:
763 ret = match_filter(info_dict)
764 if ret is not None:
765 return ret
766
767 return None
768
769 @staticmethod
770 def add_extra_info(info_dict, extra_info):
771 '''Set the keys from extra_info in info dict if they are missing'''
772 for key, value in extra_info.items():
773 info_dict.setdefault(key, value)
774
775 def extract_info(self, url, download=True, ie_key=None, extra_info={},
776 process=True, force_generic_extractor=False):
777 """
778 Return a list with a dictionary for each video extracted.
779
780 Arguments:
781 url -- URL to extract
782
783 Keyword arguments:
784 download -- whether to download videos during extraction
785 ie_key -- extractor key hint
786 extra_info -- dictionary containing the extra values to add to each result
787 process -- whether to resolve all unresolved references (URLs, playlist items),
788 must be True for download to work.
789 force_generic_extractor -- force using the generic extractor
790 """
791
792 if not ie_key and force_generic_extractor:
793 ie_key = 'Generic'
794
795 if ie_key:
796 ies = [self.get_info_extractor(ie_key)]
797 else:
798 ies = self._ies
799
800 for ie in ies:
801 if not ie.suitable(url):
802 continue
803
804 ie = self.get_info_extractor(ie.ie_key())
805 if not ie.working():
806 self.report_warning('The program functionality for this site has been marked as broken, '
807 'and will probably not work.')
808
809 return self.__extract_info(url, ie, download, extra_info, process)
810 else:
811 self.report_error('no suitable InfoExtractor for URL %s' % url)
812
813 def __handle_extraction_exceptions(func):
814 def wrapper(self, *args, **kwargs):
815 try:
816 return func(self, *args, **kwargs)
817 except GeoRestrictedError as e:
818 msg = e.msg
819 if e.countries:
820 msg += '\nThis video is available in %s.' % ', '.join(
821 map(ISO3166Utils.short2full, e.countries))
822 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
823 self.report_error(msg)
824 except ExtractorError as e: # An error we somewhat expected
825 self.report_error(compat_str(e), e.format_traceback())
826 except MaxDownloadsReached:
827 raise
828 except Exception as e:
829 if self.params.get('ignoreerrors', False):
830 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
831 else:
832 raise
833 return wrapper
834
835 @__handle_extraction_exceptions
836 def __extract_info(self, url, ie, download, extra_info, process):
837 ie_result = ie.extract(url)
838 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
839 return
840 if isinstance(ie_result, list):
841 # Backwards compatibility: old IE result format
842 ie_result = {
843 '_type': 'compat_list',
844 'entries': ie_result,
845 }
846 self.add_default_extra_info(ie_result, ie, url)
847 if process:
848 return self.process_ie_result(ie_result, download, extra_info)
849 else:
850 return ie_result
851
852 def add_default_extra_info(self, ie_result, ie, url):
853 self.add_extra_info(ie_result, {
854 'extractor': ie.IE_NAME,
855 'webpage_url': url,
856 'webpage_url_basename': url_basename(url),
857 'extractor_key': ie.ie_key(),
858 })
859
860 def process_ie_result(self, ie_result, download=True, extra_info={}):
861 """
862 Take the result of the ie(may be modified) and resolve all unresolved
863 references (URLs, playlist items).
864
865 It will also download the videos if 'download'.
866 Returns the resolved ie_result.
867 """
868 result_type = ie_result.get('_type', 'video')
869
870 if result_type in ('url', 'url_transparent'):
871 ie_result['url'] = sanitize_url(ie_result['url'])
872 extract_flat = self.params.get('extract_flat', False)
873 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
874 or extract_flat is True):
875 self.__forced_printings(
876 ie_result, self.prepare_filename(ie_result),
877 incomplete=True)
878 return ie_result
879
880 if result_type == 'video':
881 self.add_extra_info(ie_result, extra_info)
882 return self.process_video_result(ie_result, download=download)
883 elif result_type == 'url':
884 # We have to add extra_info to the results because it may be
885 # contained in a playlist
886 return self.extract_info(ie_result['url'],
887 download,
888 ie_key=ie_result.get('ie_key'),
889 extra_info=extra_info)
890 elif result_type == 'url_transparent':
891 # Use the information from the embedding page
892 info = self.extract_info(
893 ie_result['url'], ie_key=ie_result.get('ie_key'),
894 extra_info=extra_info, download=False, process=False)
895
896 # extract_info may return None when ignoreerrors is enabled and
897 # extraction failed with an error, don't crash and return early
898 # in this case
899 if not info:
900 return info
901
902 force_properties = dict(
903 (k, v) for k, v in ie_result.items() if v is not None)
904 for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
905 if f in force_properties:
906 del force_properties[f]
907 new_result = info.copy()
908 new_result.update(force_properties)
909
910 # Extracted info may not be a video result (i.e.
911 # info.get('_type', 'video') != video) but rather an url or
912 # url_transparent. In such cases outer metadata (from ie_result)
913 # should be propagated to inner one (info). For this to happen
914 # _type of info should be overridden with url_transparent. This
915 # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
916 if new_result.get('_type') == 'url':
917 new_result['_type'] = 'url_transparent'
918
919 return self.process_ie_result(
920 new_result, download=download, extra_info=extra_info)
921 elif result_type in ('playlist', 'multi_video'):
922 # Protect from infinite recursion due to recursively nested playlists
923 # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
924 webpage_url = ie_result['webpage_url']
925 if webpage_url in self._playlist_urls:
926 self.to_screen(
927 '[download] Skipping already downloaded playlist: %s'
928 % ie_result.get('title') or ie_result.get('id'))
929 return
930
931 self._playlist_level += 1
932 self._playlist_urls.add(webpage_url)
933 try:
934 return self.__process_playlist(ie_result, download)
935 finally:
936 self._playlist_level -= 1
937 if not self._playlist_level:
938 self._playlist_urls.clear()
939 elif result_type == 'compat_list':
940 self.report_warning(
941 'Extractor %s returned a compat_list result. '
942 'It needs to be updated.' % ie_result.get('extractor'))
943
944 def _fixup(r):
945 self.add_extra_info(
946 r,
947 {
948 'extractor': ie_result['extractor'],
949 'webpage_url': ie_result['webpage_url'],
950 'webpage_url_basename': url_basename(ie_result['webpage_url']),
951 'extractor_key': ie_result['extractor_key'],
952 }
953 )
954 return r
955 ie_result['entries'] = [
956 self.process_ie_result(_fixup(r), download, extra_info)
957 for r in ie_result['entries']
958 ]
959 return ie_result
960 else:
961 raise Exception('Invalid result type: %s' % result_type)
962
963 def __process_playlist(self, ie_result, download):
964 # We process each entry in the playlist
965 playlist = ie_result.get('title') or ie_result.get('id')
966
967 self.to_screen('[download] Downloading playlist: %s' % playlist)
968
969 playlist_results = []
970
971 playliststart = self.params.get('playliststart', 1) - 1
972 playlistend = self.params.get('playlistend')
973 # For backwards compatibility, interpret -1 as whole list
974 if playlistend == -1:
975 playlistend = None
976
977 playlistitems_str = self.params.get('playlist_items')
978 playlistitems = None
979 if playlistitems_str is not None:
980 def iter_playlistitems(format):
981 for string_segment in format.split(','):
982 if '-' in string_segment:
983 start, end = string_segment.split('-')
984 for item in range(int(start), int(end) + 1):
985 yield int(item)
986 else:
987 yield int(string_segment)
988 playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
989
990 ie_entries = ie_result['entries']
991
992 def make_playlistitems_entries(list_ie_entries):
993 num_entries = len(list_ie_entries)
994 return [
995 list_ie_entries[i - 1] for i in playlistitems
996 if -num_entries <= i - 1 < num_entries]
997
998 def report_download(num_entries):
999 self.to_screen(
1000 '[%s] playlist %s: Downloading %d videos' %
1001 (ie_result['extractor'], playlist, num_entries))
1002
1003 if isinstance(ie_entries, list):
1004 n_all_entries = len(ie_entries)
1005 if playlistitems:
1006 entries = make_playlistitems_entries(ie_entries)
1007 else:
1008 entries = ie_entries[playliststart:playlistend]
1009 n_entries = len(entries)
1010 self.to_screen(
1011 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
1012 (ie_result['extractor'], playlist, n_all_entries, n_entries))
1013 elif isinstance(ie_entries, PagedList):
1014 if playlistitems:
1015 entries = []
1016 for item in playlistitems:
1017 entries.extend(ie_entries.getslice(
1018 item - 1, item
1019 ))
1020 else:
1021 entries = ie_entries.getslice(
1022 playliststart, playlistend)
1023 n_entries = len(entries)
1024 report_download(n_entries)
1025 else: # iterable
1026 if playlistitems:
1027 entries = make_playlistitems_entries(list(itertools.islice(
1028 ie_entries, 0, max(playlistitems))))
1029 else:
1030 entries = list(itertools.islice(
1031 ie_entries, playliststart, playlistend))
1032 n_entries = len(entries)
1033 report_download(n_entries)
1034
1035 if self.params.get('playlistreverse', False):
1036 entries = entries[::-1]
1037
1038 if self.params.get('playlistrandom', False):
1039 random.shuffle(entries)
1040
1041 x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
1042
1043 for i, entry in enumerate(entries, 1):
1044 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
1045 # This __x_forwarded_for_ip thing is a bit ugly but requires
1046 # minimal changes
1047 if x_forwarded_for:
1048 entry['__x_forwarded_for_ip'] = x_forwarded_for
1049 extra = {
1050 'n_entries': n_entries,
1051 'playlist': playlist,
1052 'playlist_id': ie_result.get('id'),
1053 'playlist_title': ie_result.get('title'),
1054 'playlist_uploader': ie_result.get('uploader'),
1055 'playlist_uploader_id': ie_result.get('uploader_id'),
1056 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
1057 'extractor': ie_result['extractor'],
1058 'webpage_url': ie_result['webpage_url'],
1059 'webpage_url_basename': url_basename(ie_result['webpage_url']),
1060 'extractor_key': ie_result['extractor_key'],
1061 }
1062
1063 reason = self._match_entry(entry, incomplete=True)
1064 if reason is not None:
1065 self.to_screen('[download] ' + reason)
1066 continue
1067
1068 entry_result = self.__process_iterable_entry(entry, download, extra)
1069 # TODO: skip failed (empty) entries?
1070 playlist_results.append(entry_result)
1071 ie_result['entries'] = playlist_results
1072 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
1073 return ie_result
1074
1075 @__handle_extraction_exceptions
1076 def __process_iterable_entry(self, entry, download, extra_info):
1077 return self.process_ie_result(
1078 entry, download=download, extra_info=extra_info)
1079
1080 def _build_format_filter(self, filter_spec):
1081 " Returns a function to filter the formats according to the filter_spec "
1082
1083 OPERATORS = {
1084 '<': operator.lt,
1085 '<=': operator.le,
1086 '>': operator.gt,
1087 '>=': operator.ge,
1088 '=': operator.eq,
1089 '!=': operator.ne,
1090 }
1091 operator_rex = re.compile(r'''(?x)\s*
1092 (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
1093 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
1094 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
1095 $
1096 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
1097 m = operator_rex.search(filter_spec)
1098 if m:
1099 try:
1100 comparison_value = int(m.group('value'))
1101 except ValueError:
1102 comparison_value = parse_filesize(m.group('value'))
1103 if comparison_value is None:
1104 comparison_value = parse_filesize(m.group('value') + 'B')
1105 if comparison_value is None:
1106 raise ValueError(
1107 'Invalid value %r in format specification %r' % (
1108 m.group('value'), filter_spec))
1109 op = OPERATORS[m.group('op')]
1110
1111 if not m:
1112 STR_OPERATORS = {
1113 '=': operator.eq,
1114 '^=': lambda attr, value: attr.startswith(value),
1115 '$=': lambda attr, value: attr.endswith(value),
1116 '*=': lambda attr, value: value in attr,
1117 }
1118 str_operator_rex = re.compile(r'''(?x)
1119 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
1120 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
1121 \s*(?P<value>[a-zA-Z0-9._-]+)
1122 \s*$
1123 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
1124 m = str_operator_rex.search(filter_spec)
1125 if m:
1126 comparison_value = m.group('value')
1127 str_op = STR_OPERATORS[m.group('op')]
1128 if m.group('negation'):
1129 op = lambda attr, value: not str_op(attr, value)
1130 else:
1131 op = str_op
1132
1133 if not m:
1134 raise ValueError('Invalid filter specification %r' % filter_spec)
1135
1136 def _filter(f):
1137 actual_value = f.get(m.group('key'))
1138 if actual_value is None:
1139 return m.group('none_inclusive')
1140 return op(actual_value, comparison_value)
1141 return _filter
1142
1143 def _default_format_spec(self, info_dict, download=True):
1144
1145 def can_merge():
1146 merger = FFmpegMergerPP(self)
1147 return merger.available and merger.can_merge()
1148
1149 def prefer_best():
1150 if self.params.get('simulate', False):
1151 return False
1152 if not download:
1153 return False
1154 if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
1155 return True
1156 if info_dict.get('is_live'):
1157 return True
1158 if not can_merge():
1159 return True
1160 return False
1161
1162 req_format_list = ['bestvideo+bestaudio', 'best']
1163 if prefer_best():
1164 req_format_list.reverse()
1165 return '/'.join(req_format_list)
1166
1167 def build_format_selector(self, format_spec):
1168 def syntax_error(note, start):
1169 message = (
1170 'Invalid format specification: '
1171 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
1172 return SyntaxError(message)
1173
1174 PICKFIRST = 'PICKFIRST'
1175 MERGE = 'MERGE'
1176 SINGLE = 'SINGLE'
1177 GROUP = 'GROUP'
1178 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
1179
1180 def _parse_filter(tokens):
1181 filter_parts = []
1182 for type, string, start, _, _ in tokens:
1183 if type == tokenize.OP and string == ']':
1184 return ''.join(filter_parts)
1185 else:
1186 filter_parts.append(string)
1187
1188 def _remove_unused_ops(tokens):
1189 # Remove operators that we don't use and join them with the surrounding strings
1190 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
1191 ALLOWED_OPS = ('/', '+', ',', '(', ')')
1192 last_string, last_start, last_end, last_line = None, None, None, None
1193 for type, string, start, end, line in tokens:
1194 if type == tokenize.OP and string == '[':
1195 if last_string:
1196 yield tokenize.NAME, last_string, last_start, last_end, last_line
1197 last_string = None
1198 yield type, string, start, end, line
1199 # everything inside brackets will be handled by _parse_filter
1200 for type, string, start, end, line in tokens:
1201 yield type, string, start, end, line
1202 if type == tokenize.OP and string == ']':
1203 break
1204 elif type == tokenize.OP and string in ALLOWED_OPS:
1205 if last_string:
1206 yield tokenize.NAME, last_string, last_start, last_end, last_line
1207 last_string = None
1208 yield type, string, start, end, line
1209 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
1210 if not last_string:
1211 last_string = string
1212 last_start = start
1213 last_end = end
1214 else:
1215 last_string += string
1216 if last_string:
1217 yield tokenize.NAME, last_string, last_start, last_end, last_line
1218
1219 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
1220 selectors = []
1221 current_selector = None
1222 for type, string, start, _, _ in tokens:
1223 # ENCODING is only defined in python 3.x
1224 if type == getattr(tokenize, 'ENCODING', None):
1225 continue
1226 elif type in [tokenize.NAME, tokenize.NUMBER]:
1227 current_selector = FormatSelector(SINGLE, string, [])
1228 elif type == tokenize.OP:
1229 if string == ')':
1230 if not inside_group:
1231 # ')' will be handled by the parentheses group
1232 tokens.restore_last_token()
1233 break
1234 elif inside_merge and string in ['/', ',']:
1235 tokens.restore_last_token()
1236 break
1237 elif inside_choice and string == ',':
1238 tokens.restore_last_token()
1239 break
1240 elif string == ',':
1241 if not current_selector:
1242 raise syntax_error('"," must follow a format selector', start)
1243 selectors.append(current_selector)
1244 current_selector = None
1245 elif string == '/':
1246 if not current_selector:
1247 raise syntax_error('"/" must follow a format selector', start)
1248 first_choice = current_selector
1249 second_choice = _parse_format_selection(tokens, inside_choice=True)
1250 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
1251 elif string == '[':
1252 if not current_selector:
1253 current_selector = FormatSelector(SINGLE, 'best', [])
1254 format_filter = _parse_filter(tokens)
1255 current_selector.filters.append(format_filter)
1256 elif string == '(':
1257 if current_selector:
1258 raise syntax_error('Unexpected "("', start)
1259 group = _parse_format_selection(tokens, inside_group=True)
1260 current_selector = FormatSelector(GROUP, group, [])
1261 elif string == '+':
1262 if inside_merge:
1263 raise syntax_error('Unexpected "+"', start)
1264 video_selector = current_selector
1265 audio_selector = _parse_format_selection(tokens, inside_merge=True)
1266 if not video_selector or not audio_selector:
1267 raise syntax_error('"+" must be between two format selectors', start)
1268 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
1269 else:
1270 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1271 elif type == tokenize.ENDMARKER:
1272 break
1273 if current_selector:
1274 selectors.append(current_selector)
1275 return selectors
1276
1277 def _build_selector_function(selector):
1278 if isinstance(selector, list):
1279 fs = [_build_selector_function(s) for s in selector]
1280
1281 def selector_function(ctx):
1282 for f in fs:
1283 for format in f(ctx):
1284 yield format
1285 return selector_function
1286 elif selector.type == GROUP:
1287 selector_function = _build_selector_function(selector.selector)
1288 elif selector.type == PICKFIRST:
1289 fs = [_build_selector_function(s) for s in selector.selector]
1290
1291 def selector_function(ctx):
1292 for f in fs:
1293 picked_formats = list(f(ctx))
1294 if picked_formats:
1295 return picked_formats
1296 return []
1297 elif selector.type == SINGLE:
1298 format_spec = selector.selector
1299
1300 def selector_function(ctx):
1301 formats = list(ctx['formats'])
1302 if not formats:
1303 return
1304 if format_spec == 'all':
1305 for f in formats:
1306 yield f
1307 elif format_spec in ['best', 'worst', None]:
1308 format_idx = 0 if format_spec == 'worst' else -1
1309 audiovideo_formats = [
1310 f for f in formats
1311 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1312 if audiovideo_formats:
1313 yield audiovideo_formats[format_idx]
1314 # for extractors with incomplete formats (audio only (soundcloud)
1315 # or video only (imgur)) we will fallback to best/worst
1316 # {video,audio}-only format
1317 elif ctx['incomplete_formats']:
1318 yield formats[format_idx]
1319 elif format_spec == 'bestaudio':
1320 audio_formats = [
1321 f for f in formats
1322 if f.get('vcodec') == 'none']
1323 if audio_formats:
1324 yield audio_formats[-1]
1325 elif format_spec == 'worstaudio':
1326 audio_formats = [
1327 f for f in formats
1328 if f.get('vcodec') == 'none']
1329 if audio_formats:
1330 yield audio_formats[0]
1331 elif format_spec == 'bestvideo':
1332 video_formats = [
1333 f for f in formats
1334 if f.get('acodec') == 'none']
1335 if video_formats:
1336 yield video_formats[-1]
1337 elif format_spec == 'worstvideo':
1338 video_formats = [
1339 f for f in formats
1340 if f.get('acodec') == 'none']
1341 if video_formats:
1342 yield video_formats[0]
1343 else:
1344 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1345 if format_spec in extensions:
1346 filter_f = lambda f: f['ext'] == format_spec
1347 else:
1348 filter_f = lambda f: f['format_id'] == format_spec
1349 matches = list(filter(filter_f, formats))
1350 if matches:
1351 yield matches[-1]
1352 elif selector.type == MERGE:
1353 def _merge(formats_info):
1354 format_1, format_2 = [f['format_id'] for f in formats_info]
1355 # The first format must contain the video and the
1356 # second the audio
1357 if formats_info[0].get('vcodec') == 'none':
1358 self.report_error('The first format must '
1359 'contain the video, try using '
1360 '"-f %s+%s"' % (format_2, format_1))
1361 return
1362 # Formats must be opposite (video+audio)
1363 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1364 self.report_error(
1365 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1366 % (format_1, format_2))
1367 return
1368 output_ext = (
1369 formats_info[0]['ext']
1370 if self.params.get('merge_output_format') is None
1371 else self.params['merge_output_format'])
1372 return {
1373 'requested_formats': formats_info,
1374 'format': '%s+%s' % (formats_info[0].get('format'),
1375 formats_info[1].get('format')),
1376 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1377 formats_info[1].get('format_id')),
1378 'width': formats_info[0].get('width'),
1379 'height': formats_info[0].get('height'),
1380 'resolution': formats_info[0].get('resolution'),
1381 'fps': formats_info[0].get('fps'),
1382 'vcodec': formats_info[0].get('vcodec'),
1383 'vbr': formats_info[0].get('vbr'),
1384 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1385 'acodec': formats_info[1].get('acodec'),
1386 'abr': formats_info[1].get('abr'),
1387 'ext': output_ext,
1388 }
1389 video_selector, audio_selector = map(_build_selector_function, selector.selector)
1390
1391 def selector_function(ctx):
1392 for pair in itertools.product(
1393 video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
1394 yield _merge(pair)
1395
1396 filters = [self._build_format_filter(f) for f in selector.filters]
1397
1398 def final_selector(ctx):
1399 ctx_copy = copy.deepcopy(ctx)
1400 for _filter in filters:
1401 ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
1402 return selector_function(ctx_copy)
1403 return final_selector
1404
1405 stream = io.BytesIO(format_spec.encode('utf-8'))
1406 try:
1407 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
1408 except tokenize.TokenError:
1409 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1410
1411 class TokenIterator(object):
1412 def __init__(self, tokens):
1413 self.tokens = tokens
1414 self.counter = 0
1415
1416 def __iter__(self):
1417 return self
1418
1419 def __next__(self):
1420 if self.counter >= len(self.tokens):
1421 raise StopIteration()
1422 value = self.tokens[self.counter]
1423 self.counter += 1
1424 return value
1425
1426 next = __next__
1427
1428 def restore_last_token(self):
1429 self.counter -= 1
1430
1431 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
1432 return _build_selector_function(parsed_selector)
1433
1434 def _calc_headers(self, info_dict):
1435 res = std_headers.copy()
1436
1437 add_headers = info_dict.get('http_headers')
1438 if add_headers:
1439 res.update(add_headers)
1440
1441 cookies = self._calc_cookies(info_dict)
1442 if cookies:
1443 res['Cookie'] = cookies
1444
1445 if 'X-Forwarded-For' not in res:
1446 x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
1447 if x_forwarded_for_ip:
1448 res['X-Forwarded-For'] = x_forwarded_for_ip
1449
1450 return res
1451
1452 def _calc_cookies(self, info_dict):
1453 pr = sanitized_Request(info_dict['url'])
1454 self.cookiejar.add_cookie_header(pr)
1455 return pr.get_header('Cookie')
1456
1457 def process_video_result(self, info_dict, download=True):
1458 assert info_dict.get('_type', 'video') == 'video'
1459
1460 if 'id' not in info_dict:
1461 raise ExtractorError('Missing "id" field in extractor result')
1462 if 'title' not in info_dict:
1463 raise ExtractorError('Missing "title" field in extractor result')
1464
1465 def report_force_conversion(field, field_not, conversion):
1466 self.report_warning(
1467 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
1468 % (field, field_not, conversion))
1469
1470 def sanitize_string_field(info, string_field):
1471 field = info.get(string_field)
1472 if field is None or isinstance(field, compat_str):
1473 return
1474 report_force_conversion(string_field, 'a string', 'string')
1475 info[string_field] = compat_str(field)
1476
1477 def sanitize_numeric_fields(info):
1478 for numeric_field in self._NUMERIC_FIELDS:
1479 field = info.get(numeric_field)
1480 if field is None or isinstance(field, compat_numeric_types):
1481 continue
1482 report_force_conversion(numeric_field, 'numeric', 'int')
1483 info[numeric_field] = int_or_none(field)
1484
1485 sanitize_string_field(info_dict, 'id')
1486 sanitize_numeric_fields(info_dict)
1487
1488 if 'playlist' not in info_dict:
1489 # It isn't part of a playlist
1490 info_dict['playlist'] = None
1491 info_dict['playlist_index'] = None
1492
1493 thumbnails = info_dict.get('thumbnails')
1494 if thumbnails is None:
1495 thumbnail = info_dict.get('thumbnail')
1496 if thumbnail:
1497 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
1498 if thumbnails:
1499 thumbnails.sort(key=lambda t: (
1500 t.get('preference') if t.get('preference') is not None else -1,
1501 t.get('width') if t.get('width') is not None else -1,
1502 t.get('height') if t.get('height') is not None else -1,
1503 t.get('id') if t.get('id') is not None else '', t.get('url')))
1504 for i, t in enumerate(thumbnails):
1505 t['url'] = sanitize_url(t['url'])
1506 if t.get('width') and t.get('height'):
1507 t['resolution'] = '%dx%d' % (t['width'], t['height'])
1508 if t.get('id') is None:
1509 t['id'] = '%d' % i
1510
1511 if self.params.get('list_thumbnails'):
1512 self.list_thumbnails(info_dict)
1513 return
1514
1515 thumbnail = info_dict.get('thumbnail')
1516 if thumbnail:
1517 info_dict['thumbnail'] = sanitize_url(thumbnail)
1518 elif thumbnails:
1519 info_dict['thumbnail'] = thumbnails[-1]['url']
1520
1521 if 'display_id' not in info_dict and 'id' in info_dict:
1522 info_dict['display_id'] = info_dict['id']
1523
1524 for ts_key, date_key in (
1525 ('timestamp', 'upload_date'),
1526 ('release_timestamp', 'release_date'),
1527 ):
1528 if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1529 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1530 # see http://bugs.python.org/issue1646728)
1531 try:
1532 upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1533 info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
1534 except (ValueError, OverflowError, OSError):
1535 pass
1536
1537 # Auto generate title fields corresponding to the *_number fields when missing
1538 # in order to always have clean titles. This is very common for TV series.
1539 for field in ('chapter', 'season', 'episode'):
1540 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1541 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1542
1543 for cc_kind in ('subtitles', 'automatic_captions'):
1544 cc = info_dict.get(cc_kind)
1545 if cc:
1546 for _, subtitle in cc.items():
1547 for subtitle_format in subtitle:
1548 if subtitle_format.get('url'):
1549 subtitle_format['url'] = sanitize_url(subtitle_format['url'])
1550 if subtitle_format.get('ext') is None:
1551 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1552
1553 automatic_captions = info_dict.get('automatic_captions')
1554 subtitles = info_dict.get('subtitles')
1555
1556 if self.params.get('listsubtitles', False):
1557 if 'automatic_captions' in info_dict:
1558 self.list_subtitles(
1559 info_dict['id'], automatic_captions, 'automatic captions')
1560 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
1561 return
1562
1563 info_dict['requested_subtitles'] = self.process_subtitles(
1564 info_dict['id'], subtitles, automatic_captions)
1565
1566 # We now pick which formats have to be downloaded
1567 if info_dict.get('formats') is None:
1568 # There's only one format available
1569 formats = [info_dict]
1570 else:
1571 formats = info_dict['formats']
1572
1573 def is_wellformed(f):
1574 url = f.get('url')
1575 if not url:
1576 self.report_warning(
1577 '"url" field is missing or empty - skipping format, '
1578 'there is an error in extractor')
1579 return False
1580 if isinstance(url, bytes):
1581 sanitize_string_field(f, 'url')
1582 return True
1583
1584 # Filter out malformed formats for better extraction robustness
1585 formats = list(filter(is_wellformed, formats or []))
1586
1587 if not formats:
1588 raise ExtractorError('No video formats found!')
1589
1590 formats_dict = {}
1591
1592 # We check that all the formats have the format and format_id fields
1593 for i, format in enumerate(formats):
1594 sanitize_string_field(format, 'format_id')
1595 sanitize_numeric_fields(format)
1596 format['url'] = sanitize_url(format['url'])
1597 if not format.get('format_id'):
1598 format['format_id'] = compat_str(i)
1599 else:
1600 # Sanitize format_id from characters used in format selector expression
1601 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
1602 format_id = format['format_id']
1603 if format_id not in formats_dict:
1604 formats_dict[format_id] = []
1605 formats_dict[format_id].append(format)
1606
1607 # Make sure all formats have unique format_id
1608 for format_id, ambiguous_formats in formats_dict.items():
1609 if len(ambiguous_formats) > 1:
1610 for i, format in enumerate(ambiguous_formats):
1611 format['format_id'] = '%s-%d' % (format_id, i)
1612
1613 for i, format in enumerate(formats):
1614 if format.get('format') is None:
1615 format['format'] = '{id} - {res}{note}'.format(
1616 id=format['format_id'],
1617 res=self.format_resolution(format),
1618 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
1619 )
1620 # Automatically determine file extension if missing
1621 if format.get('ext') is None:
1622 format['ext'] = determine_ext(format['url']).lower()
1623 # Automatically determine protocol if missing (useful for format
1624 # selection purposes)
1625 if format.get('protocol') is None:
1626 format['protocol'] = determine_protocol(format)
1627 # Add HTTP headers, so that external programs can use them from the
1628 # json output
1629 full_format_info = info_dict.copy()
1630 full_format_info.update(format)
1631 format['http_headers'] = self._calc_headers(full_format_info)
1632 # Remove private housekeeping stuff
1633 if '__x_forwarded_for_ip' in info_dict:
1634 del info_dict['__x_forwarded_for_ip']
1635
1636 # TODO Central sorting goes here
1637
1638 if formats[0] is not info_dict:
1639 # only set the 'formats' fields if the original info_dict list them
1640 # otherwise we end up with a circular reference, the first (and unique)
1641 # element in the 'formats' field in info_dict is info_dict itself,
1642 # which can't be exported to json
1643 info_dict['formats'] = formats
1644 if self.params.get('listformats'):
1645 self.list_formats(info_dict)
1646 return
1647
1648 req_format = self.params.get('format')
1649 if req_format is None:
1650 req_format = self._default_format_spec(info_dict, download=download)
1651 if self.params.get('verbose'):
1652 self._write_string('[debug] Default format spec: %s\n' % req_format)
1653
1654 format_selector = self.build_format_selector(req_format)
1655
1656 # While in format selection we may need to have an access to the original
1657 # format set in order to calculate some metrics or do some processing.
1658 # For now we need to be able to guess whether original formats provided
1659 # by extractor are incomplete or not (i.e. whether extractor provides only
1660 # video-only or audio-only formats) for proper formats selection for
1661 # extractors with such incomplete formats (see
1662 # https://github.com/ytdl-org/youtube-dl/pull/5556).
1663 # Since formats may be filtered during format selection and may not match
1664 # the original formats the results may be incorrect. Thus original formats
1665 # or pre-calculated metrics should be passed to format selection routines
1666 # as well.
1667 # We will pass a context object containing all necessary additional data
1668 # instead of just formats.
1669 # This fixes incorrect format selection issue (see
1670 # https://github.com/ytdl-org/youtube-dl/issues/10083).
1671 incomplete_formats = (
1672 # All formats are video-only or
1673 all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
1674 # all formats are audio-only
1675 or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
1676
1677 ctx = {
1678 'formats': formats,
1679 'incomplete_formats': incomplete_formats,
1680 }
1681
1682 formats_to_download = list(format_selector(ctx))
1683 if not formats_to_download:
1684 raise ExtractorError('requested format not available',
1685 expected=True)
1686
1687 if download:
1688 if len(formats_to_download) > 1:
1689 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
1690 for format in formats_to_download:
1691 new_info = dict(info_dict)
1692 new_info.update(format)
1693 self.process_info(new_info)
1694 # We update the info dict with the best quality format (backwards compatibility)
1695 info_dict.update(formats_to_download[-1])
1696 return info_dict
1697
1698 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
1699 """Select the requested subtitles and their format"""
1700 available_subs = {}
1701 if normal_subtitles and self.params.get('writesubtitles'):
1702 available_subs.update(normal_subtitles)
1703 if automatic_captions and self.params.get('writeautomaticsub'):
1704 for lang, cap_info in automatic_captions.items():
1705 if lang not in available_subs:
1706 available_subs[lang] = cap_info
1707
1708 if (not self.params.get('writesubtitles') and not
1709 self.params.get('writeautomaticsub') or not
1710 available_subs):
1711 return None
1712
1713 if self.params.get('allsubtitles', False):
1714 requested_langs = available_subs.keys()
1715 else:
1716 if self.params.get('subtitleslangs', False):
1717 requested_langs = self.params.get('subtitleslangs')
1718 elif 'en' in available_subs:
1719 requested_langs = ['en']
1720 else:
1721 requested_langs = [list(available_subs.keys())[0]]
1722
1723 formats_query = self.params.get('subtitlesformat', 'best')
1724 formats_preference = formats_query.split('/') if formats_query else []
1725 subs = {}
1726 for lang in requested_langs:
1727 formats = available_subs.get(lang)
1728 if formats is None:
1729 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1730 continue
1731 for ext in formats_preference:
1732 if ext == 'best':
1733 f = formats[-1]
1734 break
1735 matches = list(filter(lambda f: f['ext'] == ext, formats))
1736 if matches:
1737 f = matches[-1]
1738 break
1739 else:
1740 f = formats[-1]
1741 self.report_warning(
1742 'No subtitle format found matching "%s" for language %s, '
1743 'using %s' % (formats_query, lang, f['ext']))
1744 subs[lang] = f
1745 return subs
1746
1747 def __forced_printings(self, info_dict, filename, incomplete):
1748 def print_mandatory(field):
1749 if (self.params.get('force%s' % field, False)
1750 and (not incomplete or info_dict.get(field) is not None)):
1751 self.to_stdout(info_dict[field])
1752
1753 def print_optional(field):
1754 if (self.params.get('force%s' % field, False)
1755 and info_dict.get(field) is not None):
1756 self.to_stdout(info_dict[field])
1757
1758 print_mandatory('title')
1759 print_mandatory('id')
1760 if self.params.get('forceurl', False) and not incomplete:
1761 if info_dict.get('requested_formats') is not None:
1762 for f in info_dict['requested_formats']:
1763 self.to_stdout(f['url'] + f.get('play_path', ''))
1764 else:
1765 # For RTMP URLs, also include the playpath
1766 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
1767 print_optional('thumbnail')
1768 print_optional('description')
1769 if self.params.get('forcefilename', False) and filename is not None:
1770 self.to_stdout(filename)
1771 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1772 self.to_stdout(formatSeconds(info_dict['duration']))
1773 print_mandatory('format')
1774 if self.params.get('forcejson', False):
1775 self.to_stdout(json.dumps(info_dict))
1776
1777 def process_info(self, info_dict):
1778 """Process a single resolved IE result."""
1779
1780 assert info_dict.get('_type', 'video') == 'video'
1781
1782 max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
1783 if self._num_downloads >= max_downloads:
1784 raise MaxDownloadsReached()
1785
1786 # TODO: backward compatibility, to be removed
1787 info_dict['fulltitle'] = info_dict['title']
1788
1789 if 'format' not in info_dict:
1790 info_dict['format'] = info_dict['ext']
1791
1792 reason = self._match_entry(info_dict, incomplete=False)
1793 if reason is not None:
1794 self.to_screen('[download] ' + reason)
1795 return
1796
1797 self._num_downloads += 1
1798
1799 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
1800
1801 # Forced printings
1802 self.__forced_printings(info_dict, filename, incomplete=False)
1803
1804 # Do nothing else if in simulate mode
1805 if self.params.get('simulate', False):
1806 return
1807
1808 if filename is None:
1809 return
1810
1811 def ensure_dir_exists(path):
1812 try:
1813 dn = os.path.dirname(path)
1814 if dn and not os.path.exists(dn):
1815 os.makedirs(dn)
1816 return True
1817 except (OSError, IOError) as err:
1818 if isinstance(err, OSError) and err.errno == errno.EEXIST:
1819 return True
1820 self.report_error('unable to create directory ' + error_to_compat_str(err))
1821 return False
1822
1823 if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
1824 return
1825
1826 if self.params.get('writedescription', False):
1827 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
1828 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
1829 self.to_screen('[info] Video description is already present')
1830 elif info_dict.get('description') is None:
1831 self.report_warning('There\'s no description to write.')
1832 else:
1833 try:
1834 self.to_screen('[info] Writing video description to: ' + descfn)
1835 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1836 descfile.write(info_dict['description'])
1837 except (OSError, IOError):
1838 self.report_error('Cannot write description file ' + descfn)
1839 return
1840
1841 if self.params.get('writeannotations', False):
1842 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
1843 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
1844 self.to_screen('[info] Video annotations are already present')
1845 elif not info_dict.get('annotations'):
1846 self.report_warning('There are no annotations to write.')
1847 else:
1848 try:
1849 self.to_screen('[info] Writing video annotations to: ' + annofn)
1850 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1851 annofile.write(info_dict['annotations'])
1852 except (KeyError, TypeError):
1853 self.report_warning('There are no annotations to write.')
1854 except (OSError, IOError):
1855 self.report_error('Cannot write annotations file: ' + annofn)
1856 return
1857
1858 subtitles_are_requested = any([self.params.get('writesubtitles', False),
1859 self.params.get('writeautomaticsub')])
1860
1861 if subtitles_are_requested and info_dict.get('requested_subtitles'):
1862 # subtitles download errors are already managed as troubles in relevant IE
1863 # that way it will silently go on when used with unsupporting IE
1864 subtitles = info_dict['requested_subtitles']
1865 ie = self.get_info_extractor(info_dict['extractor_key'])
1866 for sub_lang, sub_info in subtitles.items():
1867 sub_format = sub_info['ext']
1868 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
1869 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
1870 self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
1871 else:
1872 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
1873 if sub_info.get('data') is not None:
1874 try:
1875 # Use newline='' to prevent conversion of newline characters
1876 # See https://github.com/ytdl-org/youtube-dl/issues/10268
1877 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
1878 subfile.write(sub_info['data'])
1879 except (OSError, IOError):
1880 self.report_error('Cannot write subtitles file ' + sub_filename)
1881 return
1882 else:
1883 try:
1884 sub_data = ie._request_webpage(
1885 sub_info['url'], info_dict['id'], note=False).read()
1886 with io.open(encodeFilename(sub_filename), 'wb') as subfile:
1887 subfile.write(sub_data)
1888 except (ExtractorError, IOError, OSError, ValueError) as err:
1889 self.report_warning('Unable to download subtitle for "%s": %s' %
1890 (sub_lang, error_to_compat_str(err)))
1891 continue
1892
1893 if self.params.get('writeinfojson', False):
1894 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
1895 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
1896 self.to_screen('[info] Video description metadata is already present')
1897 else:
1898 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
1899 try:
1900 write_json_file(self.filter_requested_info(info_dict), infofn)
1901 except (OSError, IOError):
1902 self.report_error('Cannot write metadata to JSON file ' + infofn)
1903 return
1904
1905 self._write_thumbnails(info_dict, filename)
1906
1907 if not self.params.get('skip_download', False):
1908 try:
1909 def checked_get_suitable_downloader(info_dict, params):
1910 ed_args = params.get('external_downloader_args')
1911 dler = get_suitable_downloader(info_dict, params)
1912 if ed_args and not params.get('external_downloader_args'):
1913 # external_downloader_args was cleared because external_downloader was rejected
1914 self.report_warning('Requested external downloader cannot be used: '
1915 'ignoring --external-downloader-args.')
1916 return dler
1917
1918 def dl(name, info):
1919 fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
1920 for ph in self._progress_hooks:
1921 fd.add_progress_hook(ph)
1922 if self.params.get('verbose'):
1923 self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
1924 return fd.download(name, info)
1925
1926 if info_dict.get('requested_formats') is not None:
1927 downloaded = []
1928 success = True
1929 merger = FFmpegMergerPP(self)
1930 if not merger.available:
1931 postprocessors = []
1932 self.report_warning('You have requested multiple '
1933 'formats but ffmpeg or avconv are not installed.'
1934 ' The formats won\'t be merged.')
1935 else:
1936 postprocessors = [merger]
1937
1938 def compatible_formats(formats):
1939 video, audio = formats
1940 # Check extension
1941 video_ext, audio_ext = video.get('ext'), audio.get('ext')
1942 if video_ext and audio_ext:
1943 COMPATIBLE_EXTS = (
1944 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
1945 ('webm')
1946 )
1947 for exts in COMPATIBLE_EXTS:
1948 if video_ext in exts and audio_ext in exts:
1949 return True
1950 # TODO: Check acodec/vcodec
1951 return False
1952
1953 filename_real_ext = os.path.splitext(filename)[1][1:]
1954 filename_wo_ext = (
1955 os.path.splitext(filename)[0]
1956 if filename_real_ext == info_dict['ext']
1957 else filename)
1958 requested_formats = info_dict['requested_formats']
1959 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
1960 info_dict['ext'] = 'mkv'
1961 self.report_warning(
1962 'Requested formats are incompatible for merge and will be merged into mkv.')
1963 # Ensure filename always has a correct extension for successful merge
1964 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
1965 if os.path.exists(encodeFilename(filename)):
1966 self.to_screen(
1967 '[download] %s has already been downloaded and '
1968 'merged' % filename)
1969 else:
1970 for f in requested_formats:
1971 new_info = dict(info_dict)
1972 new_info.update(f)
1973 fname = prepend_extension(
1974 self.prepare_filename(new_info),
1975 'f%s' % f['format_id'], new_info['ext'])
1976 if not ensure_dir_exists(fname):
1977 return
1978 downloaded.append(fname)
1979 partial_success = dl(fname, new_info)
1980 success = success and partial_success
1981 info_dict['__postprocessors'] = postprocessors
1982 info_dict['__files_to_merge'] = downloaded
1983 else:
1984 # Just a single file
1985 success = dl(filename, info_dict)
1986 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1987 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
1988 return
1989 except (OSError, IOError) as err:
1990 raise UnavailableVideoError(err)
1991 except (ContentTooShortError, ) as err:
1992 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1993 return
1994
1995 if success and filename != '-':
1996 # Fixup content
1997 fixup_policy = self.params.get('fixup')
1998 if fixup_policy is None:
1999 fixup_policy = 'detect_or_warn'
2000
2001 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
2002
2003 stretched_ratio = info_dict.get('stretched_ratio')
2004 if stretched_ratio is not None and stretched_ratio != 1:
2005 if fixup_policy == 'warn':
2006 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
2007 info_dict['id'], stretched_ratio))
2008 elif fixup_policy == 'detect_or_warn':
2009 stretched_pp = FFmpegFixupStretchedPP(self)
2010 if stretched_pp.available:
2011 info_dict.setdefault('__postprocessors', [])
2012 info_dict['__postprocessors'].append(stretched_pp)
2013 else:
2014 self.report_warning(
2015 '%s: Non-uniform pixel ratio (%s). %s'
2016 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
2017 else:
2018 assert fixup_policy in ('ignore', 'never')
2019
2020 if (info_dict.get('requested_formats') is None
2021 and info_dict.get('container') == 'm4a_dash'):
2022 if fixup_policy == 'warn':
2023 self.report_warning(
2024 '%s: writing DASH m4a. '
2025 'Only some players support this container.'
2026 % info_dict['id'])
2027 elif fixup_policy == 'detect_or_warn':
2028 fixup_pp = FFmpegFixupM4aPP(self)
2029 if fixup_pp.available:
2030 info_dict.setdefault('__postprocessors', [])
2031 info_dict['__postprocessors'].append(fixup_pp)
2032 else:
2033 self.report_warning(
2034 '%s: writing DASH m4a. '
2035 'Only some players support this container. %s'
2036 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2037 else:
2038 assert fixup_policy in ('ignore', 'never')
2039
2040 if (info_dict.get('protocol') == 'm3u8_native'
2041 or info_dict.get('protocol') == 'm3u8'
2042 and self.params.get('hls_prefer_native')):
2043 if fixup_policy == 'warn':
2044 self.report_warning('%s: malformed AAC bitstream detected.' % (
2045 info_dict['id']))
2046 elif fixup_policy == 'detect_or_warn':
2047 fixup_pp = FFmpegFixupM3u8PP(self)
2048 if fixup_pp.available:
2049 info_dict.setdefault('__postprocessors', [])
2050 info_dict['__postprocessors'].append(fixup_pp)
2051 else:
2052 self.report_warning(
2053 '%s: malformed AAC bitstream detected. %s'
2054 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
2055 else:
2056 assert fixup_policy in ('ignore', 'never')
2057
2058 try:
2059 self.post_process(filename, info_dict)
2060 except (PostProcessingError) as err:
2061 self.report_error('postprocessing: %s' % error_to_compat_str(err))
2062 return
2063 self.record_download_archive(info_dict)
2064 # avoid possible nugatory search for further items (PR #26638)
2065 if self._num_downloads >= max_downloads:
2066 raise MaxDownloadsReached()
2067
2068 def download(self, url_list):
2069 """Download a given list of URLs."""
2070 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
2071 if (len(url_list) > 1
2072 and outtmpl != '-'
2073 and '%' not in outtmpl
2074 and self.params.get('max_downloads') != 1):
2075 raise SameFileError(outtmpl)
2076
2077 for url in url_list:
2078 try:
2079 # It also downloads the videos
2080 res = self.extract_info(
2081 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
2082 except UnavailableVideoError:
2083 self.report_error('unable to download video')
2084 except MaxDownloadsReached:
2085 self.to_screen('[info] Maximum number of downloaded files reached.')
2086 raise
2087 else:
2088 if self.params.get('dump_single_json', False):
2089 self.to_stdout(json.dumps(res))
2090
2091 return self._download_retcode
2092
2093 def download_with_info_file(self, info_filename):
2094 with contextlib.closing(fileinput.FileInput(
2095 [info_filename], mode='r',
2096 openhook=fileinput.hook_encoded('utf-8'))) as f:
2097 # FileInput doesn't have a read method, we can't call json.load
2098 info = self.filter_requested_info(json.loads('\n'.join(f)))
2099 try:
2100 self.process_ie_result(info, download=True)
2101 except DownloadError:
2102 webpage_url = info.get('webpage_url')
2103 if webpage_url is not None:
2104 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
2105 return self.download([webpage_url])
2106 else:
2107 raise
2108 return self._download_retcode
2109
2110 @staticmethod
2111 def filter_requested_info(info_dict):
2112 return dict(
2113 (k, v) for k, v in info_dict.items()
2114 if k not in ['requested_formats', 'requested_subtitles'])
2115
2116 def post_process(self, filename, ie_info):
2117 """Run all the postprocessors on the given file."""
2118 info = dict(ie_info)
2119 info['filepath'] = filename
2120 pps_chain = []
2121 if ie_info.get('__postprocessors') is not None:
2122 pps_chain.extend(ie_info['__postprocessors'])
2123 pps_chain.extend(self._pps)
2124 for pp in pps_chain:
2125 files_to_delete = []
2126 try:
2127 files_to_delete, info = pp.run(info)
2128 except PostProcessingError as e:
2129 self.report_error(e.msg)
2130 if files_to_delete and not self.params.get('keepvideo', False):
2131 for old_filename in files_to_delete:
2132 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
2133 try:
2134 os.remove(encodeFilename(old_filename))
2135 except (IOError, OSError):
2136 self.report_warning('Unable to remove downloaded original file')
2137
2138 def _make_archive_id(self, info_dict):
2139 video_id = info_dict.get('id')
2140 if not video_id:
2141 return
2142 # Future-proof against any change in case
2143 # and backwards compatibility with prior versions
2144 extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist
2145 if extractor is None:
2146 url = str_or_none(info_dict.get('url'))
2147 if not url:
2148 return
2149 # Try to find matching extractor for the URL and take its ie_key
2150 for ie in self._ies:
2151 if ie.suitable(url):
2152 extractor = ie.ie_key()
2153 break
2154 else:
2155 return
2156 return extractor.lower() + ' ' + video_id
2157
2158 def in_download_archive(self, info_dict):
2159 fn = self.params.get('download_archive')
2160 if fn is None:
2161 return False
2162
2163 vid_id = self._make_archive_id(info_dict)
2164 if not vid_id:
2165 return False # Incomplete video information
2166
2167 try:
2168 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
2169 for line in archive_file:
2170 if line.strip() == vid_id:
2171 return True
2172 except IOError as ioe:
2173 if ioe.errno != errno.ENOENT:
2174 raise
2175 return False
2176
2177 def record_download_archive(self, info_dict):
2178 fn = self.params.get('download_archive')
2179 if fn is None:
2180 return
2181 vid_id = self._make_archive_id(info_dict)
2182 assert vid_id
2183 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
2184 archive_file.write(vid_id + '\n')
2185
2186 @staticmethod
2187 def format_resolution(format, default='unknown'):
2188 if format.get('vcodec') == 'none':
2189 return 'audio only'
2190 if format.get('resolution') is not None:
2191 return format['resolution']
2192 if format.get('height') is not None:
2193 if format.get('width') is not None:
2194 res = '%sx%s' % (format['width'], format['height'])
2195 else:
2196 res = '%sp' % format['height']
2197 elif format.get('width') is not None:
2198 res = '%dx?' % format['width']
2199 else:
2200 res = default
2201 return res
2202
2203 def _format_note(self, fdict):
2204 res = ''
2205 if fdict.get('ext') in ['f4f', 'f4m']:
2206 res += '(unsupported) '
2207 if fdict.get('language'):
2208 if res:
2209 res += ' '
2210 res += '[%s] ' % fdict['language']
2211 if fdict.get('format_note') is not None:
2212 res += fdict['format_note'] + ' '
2213 if fdict.get('tbr') is not None:
2214 res += '%4dk ' % fdict['tbr']
2215 if fdict.get('container') is not None:
2216 if res:
2217 res += ', '
2218 res += '%s container' % fdict['container']
2219 if (fdict.get('vcodec') is not None
2220 and fdict.get('vcodec') != 'none'):
2221 if res:
2222 res += ', '
2223 res += fdict['vcodec']
2224 if fdict.get('vbr') is not None:
2225 res += '@'
2226 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
2227 res += 'video@'
2228 if fdict.get('vbr') is not None:
2229 res += '%4dk' % fdict['vbr']
2230 if fdict.get('fps') is not None:
2231 if res:
2232 res += ', '
2233 res += '%sfps' % fdict['fps']
2234 if fdict.get('acodec') is not None:
2235 if res:
2236 res += ', '
2237 if fdict['acodec'] == 'none':
2238 res += 'video only'
2239 else:
2240 res += '%-5s' % fdict['acodec']
2241 elif fdict.get('abr') is not None:
2242 if res:
2243 res += ', '
2244 res += 'audio'
2245 if fdict.get('abr') is not None:
2246 res += '@%3dk' % fdict['abr']
2247 if fdict.get('asr') is not None:
2248 res += ' (%5dHz)' % fdict['asr']
2249 if fdict.get('filesize') is not None:
2250 if res:
2251 res += ', '
2252 res += format_bytes(fdict['filesize'])
2253 elif fdict.get('filesize_approx') is not None:
2254 if res:
2255 res += ', '
2256 res += '~' + format_bytes(fdict['filesize_approx'])
2257 return res
2258
2259 def list_formats(self, info_dict):
2260 formats = info_dict.get('formats', [info_dict])
2261 table = [
2262 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
2263 for f in formats
2264 if f.get('preference') is None or f['preference'] >= -1000]
2265 if len(formats) > 1:
2266 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
2267
2268 header_line = ['format code', 'extension', 'resolution', 'note']
2269 self.to_screen(
2270 '[info] Available formats for %s:\n%s' %
2271 (info_dict['id'], render_table(header_line, table)))
2272
2273 def list_thumbnails(self, info_dict):
2274 thumbnails = info_dict.get('thumbnails')
2275 if not thumbnails:
2276 self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
2277 return
2278
2279 self.to_screen(
2280 '[info] Thumbnails for %s:' % info_dict['id'])
2281 self.to_screen(render_table(
2282 ['ID', 'width', 'height', 'URL'],
2283 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
2284
2285 def list_subtitles(self, video_id, subtitles, name='subtitles'):
2286 if not subtitles:
2287 self.to_screen('%s has no %s' % (video_id, name))
2288 return
2289 self.to_screen(
2290 'Available %s for %s:' % (name, video_id))
2291 self.to_screen(render_table(
2292 ['Language', 'formats'],
2293 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
2294 for lang, formats in subtitles.items()]))
2295
2296 def urlopen(self, req):
2297 """ Start an HTTP download """
2298 if isinstance(req, compat_basestring):
2299 req = sanitized_Request(req)
2300 return self._opener.open(req, timeout=self._socket_timeout)
2301
2302 def print_debug_header(self):
2303 if not self.params.get('verbose'):
2304 return
2305
2306 if type('') is not compat_str:
2307 # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
2308 self.report_warning(
2309 'Your Python is broken! Update to a newer and supported version')
2310
2311 stdout_encoding = getattr(
2312 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
2313 encoding_str = (
2314 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
2315 locale.getpreferredencoding(),
2316 sys.getfilesystemencoding(),
2317 stdout_encoding,
2318 self.get_encoding()))
2319 write_string(encoding_str, encoding=None)
2320
2321 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
2322 if _LAZY_LOADER:
2323 self._write_string('[debug] Lazy loading extractors enabled' + '\n')
2324 try:
2325 sp = subprocess.Popen(
2326 ['git', 'rev-parse', '--short', 'HEAD'],
2327 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
2328 cwd=os.path.dirname(os.path.abspath(__file__)))
2329 out, err = process_communicate_or_kill(sp)
2330 out = out.decode().strip()
2331 if re.match('[0-9a-f]+', out):
2332 self._write_string('[debug] Git HEAD: ' + out + '\n')
2333 except Exception:
2334 try:
2335 sys.exc_clear()
2336 except Exception:
2337 pass
2338
2339 def python_implementation():
2340 impl_name = platform.python_implementation()
2341 if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
2342 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
2343 return impl_name
2344
2345 self._write_string('[debug] Python version %s (%s) - %s\n' % (
2346 platform.python_version(), python_implementation(),
2347 platform_name()))
2348
2349 exe_versions = FFmpegPostProcessor.get_versions(self)
2350 exe_versions['rtmpdump'] = rtmpdump_version()
2351 exe_versions['phantomjs'] = PhantomJSwrapper._version()
2352 exe_str = ', '.join(
2353 '%s %s' % (exe, v)
2354 for exe, v in sorted(exe_versions.items())
2355 if v
2356 )
2357 if not exe_str:
2358 exe_str = 'none'
2359 self._write_string('[debug] exe versions: %s\n' % exe_str)
2360
2361 proxy_map = {}
2362 for handler in self._opener.handlers:
2363 if hasattr(handler, 'proxies'):
2364 proxy_map.update(handler.proxies)
2365 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
2366
2367 if self.params.get('call_home', False):
2368 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
2369 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
2370 latest_version = self.urlopen(
2371 'https://yt-dl.org/latest/version').read().decode('utf-8')
2372 if version_tuple(latest_version) > version_tuple(__version__):
2373 self.report_warning(
2374 'You are using an outdated version (newest version: %s)! '
2375 'See https://yt-dl.org/update if you need help updating.' %
2376 latest_version)
2377
2378 def _setup_opener(self):
2379 timeout_val = self.params.get('socket_timeout')
2380 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
2381
2382 opts_cookiefile = self.params.get('cookiefile')
2383 opts_proxy = self.params.get('proxy')
2384
2385 if opts_cookiefile is None:
2386 self.cookiejar = compat_cookiejar.CookieJar()
2387 else:
2388 opts_cookiefile = expand_path(opts_cookiefile)
2389 self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
2390 if os.access(opts_cookiefile, os.R_OK):
2391 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
2392
2393 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
2394 if opts_proxy is not None:
2395 if opts_proxy == '':
2396 proxies = {}
2397 else:
2398 proxies = {'http': opts_proxy, 'https': opts_proxy}
2399 else:
2400 proxies = compat_urllib_request.getproxies()
2401 # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
2402 if 'http' in proxies and 'https' not in proxies:
2403 proxies['https'] = proxies['http']
2404 proxy_handler = PerRequestProxyHandler(proxies)
2405
2406 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
2407 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2408 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
2409 redirect_handler = YoutubeDLRedirectHandler()
2410 data_handler = compat_urllib_request_DataHandler()
2411
2412 # When passing our own FileHandler instance, build_opener won't add the
2413 # default FileHandler and allows us to disable the file protocol, which
2414 # can be used for malicious purposes (see
2415 # https://github.com/ytdl-org/youtube-dl/issues/8227)
2416 file_handler = compat_urllib_request.FileHandler()
2417
2418 def file_open(*args, **kwargs):
2419 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
2420 file_handler.file_open = file_open
2421
2422 opener = compat_urllib_request.build_opener(
2423 proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
2424
2425 # Delete the default user-agent header, which would otherwise apply in
2426 # cases where our custom HTTP handler doesn't come into play
2427 # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
2428 opener.addheaders = []
2429 self._opener = opener
2430
2431 def encode(self, s):
2432 if isinstance(s, bytes):
2433 return s # Already encoded
2434
2435 try:
2436 return s.encode(self.get_encoding())
2437 except UnicodeEncodeError as err:
2438 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2439 raise
2440
2441 def get_encoding(self):
2442 encoding = self.params.get('encoding')
2443 if encoding is None:
2444 encoding = preferredencoding()
2445 return encoding
2446
2447 def _write_thumbnails(self, info_dict, filename):
2448 if self.params.get('writethumbnail', False):
2449 thumbnails = info_dict.get('thumbnails')
2450 if thumbnails:
2451 thumbnails = [thumbnails[-1]]
2452 elif self.params.get('write_all_thumbnails', False):
2453 thumbnails = info_dict.get('thumbnails')
2454 else:
2455 return
2456
2457 if not thumbnails:
2458 # No thumbnails present, so return immediately
2459 return
2460
2461 for t in thumbnails:
2462 thumb_ext = determine_ext(t['url'], 'jpg')
2463 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2464 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
2465 t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
2466
2467 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2468 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2469 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2470 else:
2471 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2472 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2473 try:
2474 uf = self.urlopen(t['url'])
2475 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
2476 shutil.copyfileobj(uf, thumbf)
2477 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2478 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2479 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2480 self.report_warning('Unable to download thumbnail "%s": %s' %
2481 (t['url'], error_to_compat_str(err)))
|