1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import json
9 import os
10 import platform
11 import re
12 import shutil
13 import subprocess
14 import socket
15 import sys
16 import time
17 import traceback
18
19 if os.name == 'nt':
20 import ctypes
21
22 from .utils import (
23 compat_cookiejar,
24 compat_http_client,
25 compat_print,
26 compat_str,
27 compat_urllib_error,
28 compat_urllib_request,
29 ContentTooShortError,
30 date_from_str,
31 DateRange,
32 determine_ext,
33 DownloadError,
34 encodeFilename,
35 ExtractorError,
36 format_bytes,
37 locked_file,
38 make_HTTPS_handler,
39 MaxDownloadsReached,
40 PostProcessingError,
41 platform_name,
42 preferredencoding,
43 SameFileError,
44 sanitize_filename,
45 subtitles_filename,
46 takewhile_inclusive,
47 UnavailableVideoError,
48 write_json_file,
49 write_string,
50 YoutubeDLHandler,
51 )
52 from .extractor import get_info_extractor, gen_extractors
53 from .FileDownloader import FileDownloader
54 from .version import __version__
55
56
57 class YoutubeDL(object):
58 """YoutubeDL class.
59
60 YoutubeDL objects are the ones responsible of downloading the
61 actual video file and writing it to disk if the user has requested
62 it, among some other tasks. In most cases there should be one per
63 program. As, given a video URL, the downloader doesn't know how to
64 extract all the needed information, task that InfoExtractors do, it
65 has to pass the URL to one of them.
66
67 For this, YoutubeDL objects have a method that allows
68 InfoExtractors to be registered in a given order. When it is passed
69 a URL, the YoutubeDL object handles it to the first InfoExtractor it
70 finds that reports being able to handle it. The InfoExtractor extracts
71 all the information about the video or videos the URL refers to, and
72 YoutubeDL process the extracted information, possibly using a File
73 Downloader to download the video.
74
75 YoutubeDL objects accept a lot of parameters. In order not to saturate
76 the object constructor with arguments, it receives a dictionary of
77 options instead. These options are available through the params
78 attribute for the InfoExtractors to use. The YoutubeDL also
79 registers itself as the downloader in charge for the InfoExtractors
80 that are added to it, so this is a "mutual registration".
81
82 Available options:
83
84 username: Username for authentication purposes.
85 password: Password for authentication purposes.
86 videopassword: Password for acces a video.
87 usenetrc: Use netrc for authentication instead.
88 verbose: Print additional info to stdout.
89 quiet: Do not print messages to stdout.
90 forceurl: Force printing final URL.
91 forcetitle: Force printing title.
92 forceid: Force printing ID.
93 forcethumbnail: Force printing thumbnail URL.
94 forcedescription: Force printing description.
95 forcefilename: Force printing final filename.
96 forcejson: Force printing info_dict as JSON.
97 simulate: Do not download the video files.
98 format: Video format code.
99 format_limit: Highest quality format to try.
100 outtmpl: Template for output names.
101 restrictfilenames: Do not allow "&" and spaces in file names
102 ignoreerrors: Do not stop on download errors.
103 nooverwrites: Prevent overwriting files.
104 playliststart: Playlist item to start at.
105 playlistend: Playlist item to end at.
106 matchtitle: Download only matching titles.
107 rejecttitle: Reject downloads for matching titles.
108 logger: Log messages to a logging.Logger instance.
109 logtostderr: Log messages to stderr instead of stdout.
110 writedescription: Write the video description to a .description file
111 writeinfojson: Write the video description to a .info.json file
112 writeannotations: Write the video annotations to a .annotations.xml file
113 writethumbnail: Write the thumbnail image to a file
114 writesubtitles: Write the video subtitles to a file
115 writeautomaticsub: Write the automatic subtitles to a file
116 allsubtitles: Downloads all the subtitles of the video
117 (requires writesubtitles or writeautomaticsub)
118 listsubtitles: Lists all available subtitles for the video
119 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
120 subtitleslangs: List of languages of the subtitles to download
121 keepvideo: Keep the video file after post-processing
122 daterange: A DateRange object, download only if the upload_date is in the range.
123 skip_download: Skip the actual download of the video file
124 cachedir: Location of the cache files in the filesystem.
125 None to disable filesystem cache.
126 noplaylist: Download single video instead of a playlist if in doubt.
127 age_limit: An integer representing the user's age in years.
128 Unsuitable videos for the given age are skipped.
129 downloadarchive: File name of a file where all downloads are recorded.
130 Videos already present in the file are not downloaded
131 again.
132 cookiefile: File name where cookies should be read from and dumped to.
133 nocheckcertificate:Do not verify SSL certificates
134 proxy: URL of the proxy server to use
135
136 The following parameters are not used by YoutubeDL itself, they are used by
137 the FileDownloader:
138 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
139 noresizebuffer, retries, continuedl, noprogress, consoletitle
140 """
141
142 params = None
143 _ies = []
144 _pps = []
145 _download_retcode = None
146 _num_downloads = None
147 _screen_file = None
148
149 def __init__(self, params):
150 """Create a FileDownloader object with the given options."""
151 self._ies = []
152 self._ies_instances = {}
153 self._pps = []
154 self._progress_hooks = []
155 self._download_retcode = 0
156 self._num_downloads = 0
157 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
158
159 if (sys.version_info >= (3,) and sys.platform != 'win32' and
160 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
161 and not params['restrictfilenames']):
162 # On Python 3, the Unicode filesystem API will throw errors (#1474)
163 self.report_warning(
164 u'Assuming --restrict-filenames since file system encoding '
165 u'cannot encode all charactes. '
166 u'Set the LC_ALL environment variable to fix this.')
167 params['restrictfilenames'] = True
168
169 self.params = params
170 self.fd = FileDownloader(self, self.params)
171
172 if '%(stitle)s' in self.params['outtmpl']:
173 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
174
175 self._setup_opener()
176
177 def add_info_extractor(self, ie):
178 """Add an InfoExtractor object to the end of the list."""
179 self._ies.append(ie)
180 self._ies_instances[ie.ie_key()] = ie
181 ie.set_downloader(self)
182
183 def get_info_extractor(self, ie_key):
184 """
185 Get an instance of an IE with name ie_key, it will try to get one from
186 the _ies list, if there's no instance it will create a new one and add
187 it to the extractor list.
188 """
189 ie = self._ies_instances.get(ie_key)
190 if ie is None:
191 ie = get_info_extractor(ie_key)()
192 self.add_info_extractor(ie)
193 return ie
194
195 def add_default_info_extractors(self):
196 """
197 Add the InfoExtractors returned by gen_extractors to the end of the list
198 """
199 for ie in gen_extractors():
200 self.add_info_extractor(ie)
201
202 def add_post_processor(self, pp):
203 """Add a PostProcessor object to the end of the chain."""
204 self._pps.append(pp)
205 pp.set_downloader(self)
206
207 def to_screen(self, message, skip_eol=False):
208 """Print message to stdout if not in quiet mode."""
209 if self.params.get('logger'):
210 self.params['logger'].debug(message)
211 elif not self.params.get('quiet', False):
212 terminator = [u'\n', u''][skip_eol]
213 output = message + terminator
214 write_string(output, self._screen_file)
215
216 def to_stderr(self, message):
217 """Print message to stderr."""
218 assert type(message) == type(u'')
219 if self.params.get('logger'):
220 self.params['logger'].error(message)
221 else:
222 output = message + u'\n'
223 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
224 output = output.encode(preferredencoding())
225 sys.stderr.write(output)
226
227 def to_console_title(self, message):
228 if not self.params.get('consoletitle', False):
229 return
230 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
231 # c_wchar_p() might not be necessary if `message` is
232 # already of type unicode()
233 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
234 elif 'TERM' in os.environ:
235 write_string(u'\033]0;%s\007' % message, self._screen_file)
236
237 def save_console_title(self):
238 if not self.params.get('consoletitle', False):
239 return
240 if 'TERM' in os.environ:
241 # Save the title on stack
242 write_string(u'\033[22;0t', self._screen_file)
243
244 def restore_console_title(self):
245 if not self.params.get('consoletitle', False):
246 return
247 if 'TERM' in os.environ:
248 # Restore the title from stack
249 write_string(u'\033[23;0t', self._screen_file)
250
251 def __enter__(self):
252 self.save_console_title()
253 return self
254
255 def __exit__(self, *args):
256 self.restore_console_title()
257
258 if self.params.get('cookiefile') is not None:
259 self.cookiejar.save()
260
261 def fixed_template(self):
262 """Checks if the output template is fixed."""
263 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
264
265 def trouble(self, message=None, tb=None):
266 """Determine action to take when a download problem appears.
267
268 Depending on if the downloader has been configured to ignore
269 download errors or not, this method may throw an exception or
270 not when errors are found, after printing the message.
271
272 tb, if given, is additional traceback information.
273 """
274 if message is not None:
275 self.to_stderr(message)
276 if self.params.get('verbose'):
277 if tb is None:
278 if sys.exc_info()[0]: # if .trouble has been called from an except block
279 tb = u''
280 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
281 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
282 tb += compat_str(traceback.format_exc())
283 else:
284 tb_data = traceback.format_list(traceback.extract_stack())
285 tb = u''.join(tb_data)
286 self.to_stderr(tb)
287 if not self.params.get('ignoreerrors', False):
288 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
289 exc_info = sys.exc_info()[1].exc_info
290 else:
291 exc_info = sys.exc_info()
292 raise DownloadError(message, exc_info)
293 self._download_retcode = 1
294
295 def report_warning(self, message):
296 '''
297 Print the message to stderr, it will be prefixed with 'WARNING:'
298 If stderr is a tty file the 'WARNING:' will be colored
299 '''
300 if sys.stderr.isatty() and os.name != 'nt':
301 _msg_header = u'\033[0;33mWARNING:\033[0m'
302 else:
303 _msg_header = u'WARNING:'
304 warning_message = u'%s %s' % (_msg_header, message)
305 self.to_stderr(warning_message)
306
307 def report_error(self, message, tb=None):
308 '''
309 Do the same as trouble, but prefixes the message with 'ERROR:', colored
310 in red if stderr is a tty file.
311 '''
312 if sys.stderr.isatty() and os.name != 'nt':
313 _msg_header = u'\033[0;31mERROR:\033[0m'
314 else:
315 _msg_header = u'ERROR:'
316 error_message = u'%s %s' % (_msg_header, message)
317 self.trouble(error_message, tb)
318
319 def report_writedescription(self, descfn):
320 """ Report that the description file is being written """
321 self.to_screen(u'[info] Writing video description to: ' + descfn)
322
323 def report_writesubtitles(self, sub_filename):
324 """ Report that the subtitles file is being written """
325 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
326
327 def report_writeinfojson(self, infofn):
328 """ Report that the metadata file has been written """
329 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
330
331 def report_writeannotations(self, annofn):
332 """ Report that the annotations file has been written. """
333 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
334
335 def report_file_already_downloaded(self, file_name):
336 """Report file has already been fully downloaded."""
337 try:
338 self.to_screen(u'[download] %s has already been downloaded' % file_name)
339 except UnicodeEncodeError:
340 self.to_screen(u'[download] The file has already been downloaded')
341
342 def increment_downloads(self):
343 """Increment the ordinal that assigns a number to each file."""
344 self._num_downloads += 1
345
346 def prepare_filename(self, info_dict):
347 """Generate the output filename."""
348 try:
349 template_dict = dict(info_dict)
350
351 template_dict['epoch'] = int(time.time())
352 autonumber_size = self.params.get('autonumber_size')
353 if autonumber_size is None:
354 autonumber_size = 5
355 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
356 template_dict['autonumber'] = autonumber_templ % self._num_downloads
357 if template_dict.get('playlist_index') is not None:
358 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
359
360 sanitize = lambda k, v: sanitize_filename(
361 u'NA' if v is None else compat_str(v),
362 restricted=self.params.get('restrictfilenames'),
363 is_id=(k == u'id'))
364 template_dict = dict((k, sanitize(k, v))
365 for k, v in template_dict.items())
366
367 tmpl = os.path.expanduser(self.params['outtmpl'])
368 filename = tmpl % template_dict
369 return filename
370 except KeyError as err:
371 self.report_error(u'Erroneous output template')
372 return None
373 except ValueError as err:
374 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
375 return None
376
377 def _match_entry(self, info_dict):
378 """ Returns None iff the file should be downloaded """
379
380 if 'title' in info_dict:
381 # This can happen when we're just evaluating the playlist
382 title = info_dict['title']
383 matchtitle = self.params.get('matchtitle', False)
384 if matchtitle:
385 if not re.search(matchtitle, title, re.IGNORECASE):
386 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
387 rejecttitle = self.params.get('rejecttitle', False)
388 if rejecttitle:
389 if re.search(rejecttitle, title, re.IGNORECASE):
390 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
391 date = info_dict.get('upload_date', None)
392 if date is not None:
393 dateRange = self.params.get('daterange', DateRange())
394 if date not in dateRange:
395 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
396 age_limit = self.params.get('age_limit')
397 if age_limit is not None:
398 if age_limit < info_dict.get('age_limit', 0):
399 return u'Skipping "' + title + '" because it is age restricted'
400 if self.in_download_archive(info_dict):
401 return (u'%s has already been recorded in archive'
402 % info_dict.get('title', info_dict.get('id', u'video')))
403 return None
404
405 @staticmethod
406 def add_extra_info(info_dict, extra_info):
407 '''Set the keys from extra_info in info dict if they are missing'''
408 for key, value in extra_info.items():
409 info_dict.setdefault(key, value)
410
411 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
412 '''
413 Returns a list with a dictionary for each video we find.
414 If 'download', also downloads the videos.
415 extra_info is a dict containing the extra values to add to each result
416 '''
417
418 if ie_key:
419 ies = [self.get_info_extractor(ie_key)]
420 else:
421 ies = self._ies
422
423 for ie in ies:
424 if not ie.suitable(url):
425 continue
426
427 if not ie.working():
428 self.report_warning(u'The program functionality for this site has been marked as broken, '
429 u'and will probably not work.')
430
431 try:
432 ie_result = ie.extract(url)
433 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
434 break
435 if isinstance(ie_result, list):
436 # Backwards compatibility: old IE result format
437 ie_result = {
438 '_type': 'compat_list',
439 'entries': ie_result,
440 }
441 self.add_extra_info(ie_result,
442 {
443 'extractor': ie.IE_NAME,
444 'webpage_url': url,
445 'extractor_key': ie.ie_key(),
446 })
447 return self.process_ie_result(ie_result, download, extra_info)
448 except ExtractorError as de: # An error we somewhat expected
449 self.report_error(compat_str(de), de.format_traceback())
450 break
451 except Exception as e:
452 if self.params.get('ignoreerrors', False):
453 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
454 break
455 else:
456 raise
457 else:
458 self.report_error(u'no suitable InfoExtractor: %s' % url)
459
460 def process_ie_result(self, ie_result, download=True, extra_info={}):
461 """
462 Take the result of the ie(may be modified) and resolve all unresolved
463 references (URLs, playlist items).
464
465 It will also download the videos if 'download'.
466 Returns the resolved ie_result.
467 """
468
469 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
470 if result_type == 'video':
471 self.add_extra_info(ie_result, extra_info)
472 return self.process_video_result(ie_result, download=download)
473 elif result_type == 'url':
474 # We have to add extra_info to the results because it may be
475 # contained in a playlist
476 return self.extract_info(ie_result['url'],
477 download,
478 ie_key=ie_result.get('ie_key'),
479 extra_info=extra_info)
480 elif result_type == 'playlist':
481
482 # We process each entry in the playlist
483 playlist = ie_result.get('title', None) or ie_result.get('id', None)
484 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
485
486 playlist_results = []
487
488 n_all_entries = len(ie_result['entries'])
489 playliststart = self.params.get('playliststart', 1) - 1
490 playlistend = self.params.get('playlistend', -1)
491
492 if playlistend == -1:
493 entries = ie_result['entries'][playliststart:]
494 else:
495 entries = ie_result['entries'][playliststart:playlistend]
496
497 n_entries = len(entries)
498
499 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
500 (ie_result['extractor'], playlist, n_all_entries, n_entries))
501
502 for i, entry in enumerate(entries, 1):
503 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
504 extra = {
505 'playlist': playlist,
506 'playlist_index': i + playliststart,
507 'extractor': ie_result['extractor'],
508 'webpage_url': ie_result['webpage_url'],
509 'extractor_key': ie_result['extractor_key'],
510 }
511
512 reason = self._match_entry(entry)
513 if reason is not None:
514 self.to_screen(u'[download] ' + reason)
515 continue
516
517 entry_result = self.process_ie_result(entry,
518 download=download,
519 extra_info=extra)
520 playlist_results.append(entry_result)
521 ie_result['entries'] = playlist_results
522 return ie_result
523 elif result_type == 'compat_list':
524 def _fixup(r):
525 self.add_extra_info(r,
526 {
527 'extractor': ie_result['extractor'],
528 'webpage_url': ie_result['webpage_url'],
529 'extractor_key': ie_result['extractor_key'],
530 })
531 return r
532 ie_result['entries'] = [
533 self.process_ie_result(_fixup(r), download, extra_info)
534 for r in ie_result['entries']
535 ]
536 return ie_result
537 else:
538 raise Exception('Invalid result type: %s' % result_type)
539
540 def select_format(self, format_spec, available_formats):
541 if format_spec == 'best' or format_spec is None:
542 return available_formats[-1]
543 elif format_spec == 'worst':
544 return available_formats[0]
545 else:
546 extensions = [u'mp4', u'flv', u'webm', u'3gp']
547 if format_spec in extensions:
548 filter_f = lambda f: f['ext'] == format_spec
549 else:
550 filter_f = lambda f: f['format_id'] == format_spec
551 matches = list(filter(filter_f, available_formats))
552 if matches:
553 return matches[-1]
554 return None
555
556 def process_video_result(self, info_dict, download=True):
557 assert info_dict.get('_type', 'video') == 'video'
558
559 if 'playlist' not in info_dict:
560 # It isn't part of a playlist
561 info_dict['playlist'] = None
562 info_dict['playlist_index'] = None
563
564 # This extractors handle format selection themselves
565 if info_dict['extractor'] in [u'youtube', u'Youku']:
566 if download:
567 self.process_info(info_dict)
568 return info_dict
569
570 # We now pick which formats have to be downloaded
571 if info_dict.get('formats') is None:
572 # There's only one format available
573 formats = [info_dict]
574 else:
575 formats = info_dict['formats']
576
577 # We check that all the formats have the format and format_id fields
578 for (i, format) in enumerate(formats):
579 if format.get('format_id') is None:
580 format['format_id'] = compat_str(i)
581 if format.get('format') is None:
582 format['format'] = u'{id} - {res}{note}'.format(
583 id=format['format_id'],
584 res=self.format_resolution(format),
585 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
586 )
587 # Automatically determine file extension if missing
588 if 'ext' not in format:
589 format['ext'] = determine_ext(format['url'])
590
591 if self.params.get('listformats', None):
592 self.list_formats(info_dict)
593 return
594
595 format_limit = self.params.get('format_limit', None)
596 if format_limit:
597 formats = list(takewhile_inclusive(
598 lambda f: f['format_id'] != format_limit, formats
599 ))
600 if self.params.get('prefer_free_formats'):
601 def _free_formats_key(f):
602 try:
603 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
604 except ValueError:
605 ext_ord = -1
606 # We only compare the extension if they have the same height and width
607 return (f.get('height'), f.get('width'), ext_ord)
608 formats = sorted(formats, key=_free_formats_key)
609
610 req_format = self.params.get('format', 'best')
611 if req_format is None:
612 req_format = 'best'
613 formats_to_download = []
614 # The -1 is for supporting YoutubeIE
615 if req_format in ('-1', 'all'):
616 formats_to_download = formats
617 else:
618 # We can accept formats requestd in the format: 34/5/best, we pick
619 # the first that is available, starting from left
620 req_formats = req_format.split('/')
621 for rf in req_formats:
622 selected_format = self.select_format(rf, formats)
623 if selected_format is not None:
624 formats_to_download = [selected_format]
625 break
626 if not formats_to_download:
627 raise ExtractorError(u'requested format not available',
628 expected=True)
629
630 if download:
631 if len(formats_to_download) > 1:
632 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
633 for format in formats_to_download:
634 new_info = dict(info_dict)
635 new_info.update(format)
636 self.process_info(new_info)
637 # We update the info dict with the best quality format (backwards compatibility)
638 info_dict.update(formats_to_download[-1])
639 return info_dict
640
641 def process_info(self, info_dict):
642 """Process a single resolved IE result."""
643
644 assert info_dict.get('_type', 'video') == 'video'
645 #We increment the download the download count here to match the previous behaviour.
646 self.increment_downloads()
647
648 info_dict['fulltitle'] = info_dict['title']
649 if len(info_dict['title']) > 200:
650 info_dict['title'] = info_dict['title'][:197] + u'...'
651
652 # Keep for backwards compatibility
653 info_dict['stitle'] = info_dict['title']
654
655 if not 'format' in info_dict:
656 info_dict['format'] = info_dict['ext']
657
658 reason = self._match_entry(info_dict)
659 if reason is not None:
660 self.to_screen(u'[download] ' + reason)
661 return
662
663 max_downloads = self.params.get('max_downloads')
664 if max_downloads is not None:
665 if self._num_downloads > int(max_downloads):
666 raise MaxDownloadsReached()
667
668 filename = self.prepare_filename(info_dict)
669
670 # Forced printings
671 if self.params.get('forcetitle', False):
672 compat_print(info_dict['fulltitle'])
673 if self.params.get('forceid', False):
674 compat_print(info_dict['id'])
675 if self.params.get('forceurl', False):
676 # For RTMP URLs, also include the playpath
677 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
678 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
679 compat_print(info_dict['thumbnail'])
680 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
681 compat_print(info_dict['description'])
682 if self.params.get('forcefilename', False) and filename is not None:
683 compat_print(filename)
684 if self.params.get('forceformat', False):
685 compat_print(info_dict['format'])
686 if self.params.get('forcejson', False):
687 compat_print(json.dumps(info_dict))
688
689 # Do nothing else if in simulate mode
690 if self.params.get('simulate', False):
691 return
692
693 if filename is None:
694 return
695
696 try:
697 dn = os.path.dirname(encodeFilename(filename))
698 if dn != '' and not os.path.exists(dn):
699 os.makedirs(dn)
700 except (OSError, IOError) as err:
701 self.report_error(u'unable to create directory ' + compat_str(err))
702 return
703
704 if self.params.get('writedescription', False):
705 try:
706 descfn = filename + u'.description'
707 self.report_writedescription(descfn)
708 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
709 descfile.write(info_dict['description'])
710 except (KeyError, TypeError):
711 self.report_warning(u'There\'s no description to write.')
712 except (OSError, IOError):
713 self.report_error(u'Cannot write description file ' + descfn)
714 return
715
716 if self.params.get('writeannotations', False):
717 try:
718 annofn = filename + u'.annotations.xml'
719 self.report_writeannotations(annofn)
720 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
721 annofile.write(info_dict['annotations'])
722 except (KeyError, TypeError):
723 self.report_warning(u'There are no annotations to write.')
724 except (OSError, IOError):
725 self.report_error(u'Cannot write annotations file: ' + annofn)
726 return
727
728 subtitles_are_requested = any([self.params.get('writesubtitles', False),
729 self.params.get('writeautomaticsub')])
730
731 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
732 # subtitles download errors are already managed as troubles in relevant IE
733 # that way it will silently go on when used with unsupporting IE
734 subtitles = info_dict['subtitles']
735 sub_format = self.params.get('subtitlesformat', 'srt')
736 for sub_lang in subtitles.keys():
737 sub = subtitles[sub_lang]
738 if sub is None:
739 continue
740 try:
741 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
742 self.report_writesubtitles(sub_filename)
743 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
744 subfile.write(sub)
745 except (OSError, IOError):
746 self.report_error(u'Cannot write subtitles file ' + descfn)
747 return
748
749 if self.params.get('writeinfojson', False):
750 infofn = os.path.splitext(filename)[0] + u'.info.json'
751 self.report_writeinfojson(infofn)
752 try:
753 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
754 write_json_file(json_info_dict, encodeFilename(infofn))
755 except (OSError, IOError):
756 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
757 return
758
759 if self.params.get('writethumbnail', False):
760 if info_dict.get('thumbnail') is not None:
761 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
762 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
763 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
764 (info_dict['extractor'], info_dict['id']))
765 try:
766 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
767 with open(thumb_filename, 'wb') as thumbf:
768 shutil.copyfileobj(uf, thumbf)
769 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
770 (info_dict['extractor'], info_dict['id'], thumb_filename))
771 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
772 self.report_warning(u'Unable to download thumbnail "%s": %s' %
773 (info_dict['thumbnail'], compat_str(err)))
774
775 if not self.params.get('skip_download', False):
776 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
777 success = True
778 else:
779 try:
780 success = self.fd._do_download(filename, info_dict)
781 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
782 self.report_error(u'unable to download video data: %s' % str(err))
783 return
784 except (OSError, IOError) as err:
785 raise UnavailableVideoError(err)
786 except (ContentTooShortError, ) as err:
787 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
788 return
789
790 if success:
791 try:
792 self.post_process(filename, info_dict)
793 except (PostProcessingError) as err:
794 self.report_error(u'postprocessing: %s' % str(err))
795 return
796
797 self.record_download_archive(info_dict)
798
799 def download(self, url_list):
800 """Download a given list of URLs."""
801 if len(url_list) > 1 and self.fixed_template():
802 raise SameFileError(self.params['outtmpl'])
803
804 for url in url_list:
805 try:
806 #It also downloads the videos
807 self.extract_info(url)
808 except UnavailableVideoError:
809 self.report_error(u'unable to download video')
810 except MaxDownloadsReached:
811 self.to_screen(u'[info] Maximum number of downloaded files reached.')
812 raise
813
814 return self._download_retcode
815
816 def post_process(self, filename, ie_info):
817 """Run all the postprocessors on the given file."""
818 info = dict(ie_info)
819 info['filepath'] = filename
820 keep_video = None
821 for pp in self._pps:
822 try:
823 keep_video_wish, new_info = pp.run(info)
824 if keep_video_wish is not None:
825 if keep_video_wish:
826 keep_video = keep_video_wish
827 elif keep_video is None:
828 # No clear decision yet, let IE decide
829 keep_video = keep_video_wish
830 except PostProcessingError as e:
831 self.report_error(e.msg)
832 if keep_video is False and not self.params.get('keepvideo', False):
833 try:
834 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
835 os.remove(encodeFilename(filename))
836 except (IOError, OSError):
837 self.report_warning(u'Unable to remove downloaded video file')
838
839 def in_download_archive(self, info_dict):
840 fn = self.params.get('download_archive')
841 if fn is None:
842 return False
843 extractor = info_dict.get('extractor_id')
844 if extractor is None:
845 if 'id' in info_dict:
846 extractor = info_dict.get('ie_key') # key in a playlist
847 if extractor is None:
848 return False # Incomplete video information
849 # Future-proof against any change in case
850 # and backwards compatibility with prior versions
851 extractor = extractor.lower()
852 vid_id = extractor + u' ' + info_dict['id']
853 try:
854 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
855 for line in archive_file:
856 if line.strip() == vid_id:
857 return True
858 except IOError as ioe:
859 if ioe.errno != errno.ENOENT:
860 raise
861 return False
862
863 def record_download_archive(self, info_dict):
864 fn = self.params.get('download_archive')
865 if fn is None:
866 return
867 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
868 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
869 archive_file.write(vid_id + u'\n')
870
871 @staticmethod
872 def format_resolution(format, default='unknown'):
873 if format.get('_resolution') is not None:
874 return format['_resolution']
875 if format.get('height') is not None:
876 if format.get('width') is not None:
877 res = u'%sx%s' % (format['width'], format['height'])
878 else:
879 res = u'%sp' % format['height']
880 else:
881 res = default
882 return res
883
884 def list_formats(self, info_dict):
885 def format_note(fdict):
886 res = u''
887 if fdict.get('format_note') is not None:
888 res += fdict['format_note'] + u' '
889 if fdict.get('vcodec') is not None:
890 res += u'%-5s' % fdict['vcodec']
891 elif fdict.get('vbr') is not None:
892 res += u'video'
893 if fdict.get('vbr') is not None:
894 res += u'@%4dk' % fdict['vbr']
895 if fdict.get('acodec') is not None:
896 if res:
897 res += u', '
898 res += u'%-5s' % fdict['acodec']
899 elif fdict.get('abr') is not None:
900 if res:
901 res += u', '
902 res += 'audio'
903 if fdict.get('abr') is not None:
904 res += u'@%3dk' % fdict['abr']
905 if fdict.get('filesize') is not None:
906 if res:
907 res += u', '
908 res += format_bytes(fdict['filesize'])
909 return res
910
911 def line(format, idlen=20):
912 return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
913 format['format_id'],
914 format['ext'],
915 self.format_resolution(format),
916 format_note(format),
917 ))
918
919 formats = info_dict.get('formats', [info_dict])
920 idlen = max(len(u'format code'),
921 max(len(f['format_id']) for f in formats))
922 formats_s = [line(f, idlen) for f in formats]
923 if len(formats) > 1:
924 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
925 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
926
927 header_line = line({
928 'format_id': u'format code', 'ext': u'extension',
929 '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
930 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
931 (info_dict['id'], header_line, u"\n".join(formats_s)))
932
933 def urlopen(self, req):
934 """ Start an HTTP download """
935 return self._opener.open(req)
936
937 def print_debug_header(self):
938 if not self.params.get('verbose'):
939 return
940 write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
941 try:
942 sp = subprocess.Popen(
943 ['git', 'rev-parse', '--short', 'HEAD'],
944 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
945 cwd=os.path.dirname(os.path.abspath(__file__)))
946 out, err = sp.communicate()
947 out = out.decode().strip()
948 if re.match('[0-9a-f]+', out):
949 write_string(u'[debug] Git HEAD: ' + out + u'\n')
950 except:
951 try:
952 sys.exc_clear()
953 except:
954 pass
955 write_string(u'[debug] Python version %s - %s' %
956 (platform.python_version(), platform_name()) + u'\n')
957
958 proxy_map = {}
959 for handler in self._opener.handlers:
960 if hasattr(handler, 'proxies'):
961 proxy_map.update(handler.proxies)
962 write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
963
964 def _setup_opener(self, timeout=300):
965 opts_cookiefile = self.params.get('cookiefile')
966 opts_proxy = self.params.get('proxy')
967
968 if opts_cookiefile is None:
969 self.cookiejar = compat_cookiejar.CookieJar()
970 else:
971 self.cookiejar = compat_cookiejar.MozillaCookieJar(
972 opts_cookiefile)
973 if os.access(opts_cookiefile, os.R_OK):
974 self.cookiejar.load()
975
976 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
977 self.cookiejar)
978 if opts_proxy is not None:
979 if opts_proxy == '':
980 proxies = {}
981 else:
982 proxies = {'http': opts_proxy, 'https': opts_proxy}
983 else:
984 proxies = compat_urllib_request.getproxies()
985 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
986 if 'http' in proxies and 'https' not in proxies:
987 proxies['https'] = proxies['http']
988 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
989 https_handler = make_HTTPS_handler(
990 self.params.get('nocheckcertificate', False))
991 opener = compat_urllib_request.build_opener(
992 https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
993 # Delete the default user-agent header, which would otherwise apply in
994 # cases where our custom HTTP handler doesn't come into play
995 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
996 opener.addheaders = []
997 self._opener = opener
998
999 # TODO remove this global modification
1000 compat_urllib_request.install_opener(opener)
1001 socket.setdefaulttimeout(timeout)
|