youtube_dl/YoutubeDL.py



    1 #!/usr/bin/env python
    2 # coding: utf-8
    3 
    4 from __future__ import absolute_import, unicode_literals
    5 
    6 import collections
    7 import contextlib
    8 import copy
    9 import datetime
   10 import errno
   11 import fileinput
   12 import io
   13 import itertools
   14 import json
   15 import locale
   16 import operator
   17 import os
   18 import platform
   19 import re
   20 import shutil
   21 import subprocess
   22 import socket
   23 import sys
   24 import time
   25 import tokenize
   26 import traceback
   27 
   28 from .compat import (
   29     compat_basestring,
   30     compat_cookiejar,
   31     compat_expanduser,
   32     compat_get_terminal_size,
   33     compat_http_client,
   34     compat_kwargs,
   35     compat_os_name,
   36     compat_str,
   37     compat_tokenize_tokenize,
   38     compat_urllib_error,
   39     compat_urllib_request,
   40     compat_urllib_request_DataHandler,
   41 )
   42 from .utils import (
   43     age_restricted,
   44     args_to_str,
   45     ContentTooShortError,
   46     date_from_str,
   47     DateRange,
   48     DEFAULT_OUTTMPL,
   49     determine_ext,
   50     determine_protocol,
   51     DownloadError,
   52     encode_compat_str,
   53     encodeFilename,
   54     error_to_compat_str,
   55     ExtractorError,
   56     format_bytes,
   57     formatSeconds,
   58     locked_file,
   59     make_HTTPS_handler,
   60     MaxDownloadsReached,
   61     PagedList,
   62     parse_filesize,
   63     PerRequestProxyHandler,
   64     platform_name,
   65     PostProcessingError,
   66     preferredencoding,
   67     prepend_extension,
   68     register_socks_protocols,
   69     render_table,
   70     replace_extension,
   71     SameFileError,
   72     sanitize_filename,
   73     sanitize_path,
   74     sanitize_url,
   75     sanitized_Request,
   76     std_headers,
   77     subtitles_filename,
   78     UnavailableVideoError,
   79     url_basename,
   80     version_tuple,
   81     write_json_file,
   82     write_string,
   83     YoutubeDLCookieProcessor,
   84     YoutubeDLHandler,
   85 )
   86 from .cache import Cache
   87 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
   88 from .downloader import get_suitable_downloader
   89 from .downloader.rtmp import rtmpdump_version
   90 from .postprocessor import (
   91     FFmpegFixupM3u8PP,
   92     FFmpegFixupM4aPP,
   93     FFmpegFixupStretchedPP,
   94     FFmpegMergerPP,
   95     FFmpegPostProcessor,
   96     get_postprocessor,
   97 )
   98 from .version import __version__
   99 
  100 if compat_os_name == 'nt':
  101     import ctypes
  102 
  103 
  104 class YoutubeDL(object):
  105     """YoutubeDL class.
  106 
  107     YoutubeDL objects are the ones responsible of downloading the
  108     actual video file and writing it to disk if the user has requested
  109     it, among some other tasks. In most cases there should be one per
  110     program. As, given a video URL, the downloader doesn't know how to
  111     extract all the needed information, task that InfoExtractors do, it
  112     has to pass the URL to one of them.
  113 
  114     For this, YoutubeDL objects have a method that allows
  115     InfoExtractors to be registered in a given order. When it is passed
  116     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  117     finds that reports being able to handle it. The InfoExtractor extracts
  118     all the information about the video or videos the URL refers to, and
  119     YoutubeDL process the extracted information, possibly using a File
  120     Downloader to download the video.
  121 
  122     YoutubeDL objects accept a lot of parameters. In order not to saturate
  123     the object constructor with arguments, it receives a dictionary of
  124     options instead. These options are available through the params
  125     attribute for the InfoExtractors to use. The YoutubeDL also
  126     registers itself as the downloader in charge for the InfoExtractors
  127     that are added to it, so this is a "mutual registration".
  128 
  129     Available options:
  130 
  131     username:          Username for authentication purposes.
  132     password:          Password for authentication purposes.
  133     videopassword:     Password for accessing a video.
  134     ap_mso:            Adobe Pass multiple-system operator identifier.
  135     ap_username:       Multiple-system operator account username.
  136     ap_password:       Multiple-system operator account password.
  137     usenetrc:          Use netrc for authentication instead.
  138     verbose:           Print additional info to stdout.
  139     quiet:             Do not print messages to stdout.
  140     no_warnings:       Do not print out anything for warnings.
  141     forceurl:          Force printing final URL.
  142     forcetitle:        Force printing title.
  143     forceid:           Force printing ID.
  144     forcethumbnail:    Force printing thumbnail URL.
  145     forcedescription:  Force printing description.
  146     forcefilename:     Force printing final filename.
  147     forceduration:     Force printing duration.
  148     forcejson:         Force printing info_dict as JSON.
  149     dump_single_json:  Force printing the info_dict of the whole playlist
  150                        (or video) as a single JSON line.
  151     simulate:          Do not download the video files.
  152     format:            Video format code. See options.py for more information.
  153     outtmpl:           Template for output names.
  154     restrictfilenames: Do not allow "&" and spaces in file names
  155     ignoreerrors:      Do not stop on download errors.
  156     force_generic_extractor: Force downloader to use the generic extractor
  157     nooverwrites:      Prevent overwriting files.
  158     playliststart:     Playlist item to start at.
  159     playlistend:       Playlist item to end at.
  160     playlist_items:    Specific indices of playlist to download.
  161     playlistreverse:   Download playlist items in reverse order.
  162     matchtitle:        Download only matching titles.
  163     rejecttitle:       Reject downloads for matching titles.
  164     logger:            Log messages to a logging.Logger instance.
  165     logtostderr:       Log messages to stderr instead of stdout.
  166     writedescription:  Write the video description to a .description file
  167     writeinfojson:     Write the video description to a .info.json file
  168     writeannotations:  Write the video annotations to a .annotations.xml file
  169     writethumbnail:    Write the thumbnail image to a file
  170     write_all_thumbnails:  Write all thumbnail formats to files
  171     writesubtitles:    Write the video subtitles to a file
  172     writeautomaticsub: Write the automatically generated subtitles to a file
  173     allsubtitles:      Downloads all the subtitles of the video
  174                        (requires writesubtitles or writeautomaticsub)
  175     listsubtitles:     Lists all available subtitles for the video
  176     subtitlesformat:   The format code for subtitles
  177     subtitleslangs:    List of languages of the subtitles to download
  178     keepvideo:         Keep the video file after post-processing
  179     daterange:         A DateRange object, download only if the upload_date is in the range.
  180     skip_download:     Skip the actual download of the video file
  181     cachedir:          Location of the cache files in the filesystem.
  182                        False to disable filesystem cache.
  183     noplaylist:        Download single video instead of a playlist if in doubt.
  184     age_limit:         An integer representing the user's age in years.
  185                        Unsuitable videos for the given age are skipped.
  186     min_views:         An integer representing the minimum view count the video
  187                        must have in order to not be skipped.
  188                        Videos without view count information are always
  189                        downloaded. None for no limit.
  190     max_views:         An integer representing the maximum view count.
  191                        Videos that are more popular than that are not
  192                        downloaded.
  193                        Videos without view count information are always
  194                        downloaded. None for no limit.
  195     download_archive:  File name of a file where all downloads are recorded.
  196                        Videos already present in the file are not downloaded
  197                        again.
  198     cookiefile:        File name where cookies should be read from and dumped to.
  199     nocheckcertificate:Do not verify SSL certificates
  200     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
  201                        At the moment, this is only supported by YouTube.
  202     proxy:             URL of the proxy server to use
  203     geo_verification_proxy:  URL of the proxy to use for IP address verification
  204                        on geo-restricted sites. (Experimental)
  205     socket_timeout:    Time to wait for unresponsive hosts, in seconds
  206     bidi_workaround:   Work around buggy terminals without bidirectional text
  207                        support, using fridibi
  208     debug_printtraffic:Print out sent and received HTTP traffic
  209     include_ads:       Download ads as well
  210     default_search:    Prepend this string if an input url is not valid.
  211                        'auto' for elaborate guessing
  212     encoding:          Use this encoding instead of the system-specified.
  213     extract_flat:      Do not resolve URLs, return the immediate result.
  214                        Pass in 'in_playlist' to only show this behavior for
  215                        playlist items.
  216     postprocessors:    A list of dictionaries, each with an entry
  217                        * key:  The name of the postprocessor. See
  218                                youtube_dl/postprocessor/__init__.py for a list.
  219                        as well as any further keyword arguments for the
  220                        postprocessor.
  221     progress_hooks:    A list of functions that get called on download
  222                        progress, with a dictionary with the entries
  223                        * status: One of "downloading", "error", or "finished".
  224                                  Check this first and ignore unknown values.
  225 
  226                        If status is one of "downloading", or "finished", the
  227                        following properties may also be present:
  228                        * filename: The final filename (always present)
  229                        * tmpfilename: The filename we're currently writing to
  230                        * downloaded_bytes: Bytes on disk
  231                        * total_bytes: Size of the whole file, None if unknown
  232                        * total_bytes_estimate: Guess of the eventual file size,
  233                                                None if unavailable.
  234                        * elapsed: The number of seconds since download started.
  235                        * eta: The estimated time in seconds, None if unknown
  236                        * speed: The download speed in bytes/second, None if
  237                                 unknown
  238                        * fragment_index: The counter of the currently
  239                                          downloaded video fragment.
  240                        * fragment_count: The number of fragments (= individual
  241                                          files that will be merged)
  242 
  243                        Progress hooks are guaranteed to be called at least once
  244                        (with status "finished") if the download is successful.
  245     merge_output_format: Extension to use when merging formats.
  246     fixup:             Automatically correct known faults of the file.
  247                        One of:
  248                        - "never": do nothing
  249                        - "warn": only emit a warning
  250                        - "detect_or_warn": check whether we can do anything
  251                                            about it, warn otherwise (default)
  252     source_address:    (Experimental) Client-side IP address to bind to.
  253     call_home:         Boolean, true iff we are allowed to contact the
  254                        youtube-dl servers for debugging.
  255     sleep_interval:    Number of seconds to sleep before each download when
  256                        used alone or a lower bound of a range for randomized
  257                        sleep before each download (minimum possible number
  258                        of seconds to sleep) when used along with
  259                        max_sleep_interval.
  260     max_sleep_interval:Upper bound of a range for randomized sleep before each
  261                        download (maximum possible number of seconds to sleep).
  262                        Must only be used along with sleep_interval.
  263                        Actual sleep time will be a random float from range
  264                        [sleep_interval; max_sleep_interval].
  265     listformats:       Print an overview of available video formats and exit.
  266     list_thumbnails:   Print a table of all thumbnails and exit.
  267     match_filter:      A function that gets called with the info_dict of
  268                        every video.
  269                        If it returns a message, the video is ignored.
  270                        If it returns None, the video is downloaded.
  271                        match_filter_func in utils.py is one example for this.
  272     no_color:          Do not emit color codes in output.
  273 
  274     The following options determine which downloader is picked:
  275     external_downloader: Executable of the external downloader to call.
  276                        None or unset for standard (built-in) downloader.
  277     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
  278                        if True, otherwise use ffmpeg/avconv if False, otherwise
  279                        use downloader suggested by extractor if None.
  280 
  281     The following parameters are not used by YoutubeDL itself, they are used by
  282     the downloader (see youtube_dl/downloader/common.py):
  283     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
  284     noresizebuffer, retries, continuedl, noprogress, consoletitle,
  285     xattr_set_filesize, external_downloader_args, hls_use_mpegts.
  286 
  287     The following options are used by the post processors:
  288     prefer_ffmpeg:     If True, use ffmpeg instead of avconv if both are available,
  289                        otherwise prefer avconv.
  290     postprocessor_args: A list of additional command-line arguments for the
  291                         postprocessor.
  292     """
  293 
  294     params = None
  295     _ies = []
  296     _pps = []
  297     _download_retcode = None
  298     _num_downloads = None
  299     _screen_file = None
  300 
  301     def __init__(self, params=None, auto_init=True):
  302         """Create a FileDownloader object with the given options."""
  303         if params is None:
  304             params = {}
  305         self._ies = []
  306         self._ies_instances = {}
  307         self._pps = []
  308         self._progress_hooks = []
  309         self._download_retcode = 0
  310         self._num_downloads = 0
  311         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  312         self._err_file = sys.stderr
  313         self.params = {
  314             # Default parameters
  315             'nocheckcertificate': False,
  316         }
  317         self.params.update(params)
  318         self.cache = Cache(self)
  319 
  320         if self.params.get('cn_verification_proxy') is not None:
  321             self.report_warning('--cn-verification-proxy is deprecated. Use --geo-verification-proxy instead.')
  322             if self.params.get('geo_verification_proxy') is None:
  323                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
  324 
  325         if params.get('bidi_workaround', False):
  326             try:
  327                 import pty
  328                 master, slave = pty.openpty()
  329                 width = compat_get_terminal_size().columns
  330                 if width is None:
  331                     width_args = []
  332                 else:
  333                     width_args = ['-w', str(width)]
  334                 sp_kwargs = dict(
  335                     stdin=subprocess.PIPE,
  336                     stdout=slave,
  337                     stderr=self._err_file)
  338                 try:
  339                     self._output_process = subprocess.Popen(
  340                         ['bidiv'] + width_args, **sp_kwargs
  341                     )
  342                 except OSError:
  343                     self._output_process = subprocess.Popen(
  344                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
  345                 self._output_channel = os.fdopen(master, 'rb')
  346             except OSError as ose:
  347                 if ose.errno == errno.ENOENT:
  348                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
  349                 else:
  350                     raise
  351 
  352         if (sys.version_info >= (3,) and sys.platform != 'win32' and
  353                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
  354                 not params.get('restrictfilenames', False)):
  355             # On Python 3, the Unicode filesystem API will throw errors (#1474)
  356             self.report_warning(
  357                 'Assuming --restrict-filenames since file system encoding '
  358                 'cannot encode all characters. '
  359                 'Set the LC_ALL environment variable to fix this.')
  360             self.params['restrictfilenames'] = True
  361 
  362         if isinstance(params.get('outtmpl'), bytes):
  363             self.report_warning(
  364                 'Parameter outtmpl is bytes, but should be a unicode string. '
  365                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
  366 
  367         self._setup_opener()
  368 
  369         if auto_init:
  370             self.print_debug_header()
  371             self.add_default_info_extractors()
  372 
  373         for pp_def_raw in self.params.get('postprocessors', []):
  374             pp_class = get_postprocessor(pp_def_raw['key'])
  375             pp_def = dict(pp_def_raw)
  376             del pp_def['key']
  377             pp = pp_class(self, **compat_kwargs(pp_def))
  378             self.add_post_processor(pp)
  379 
  380         for ph in self.params.get('progress_hooks', []):
  381             self.add_progress_hook(ph)
  382 
  383         register_socks_protocols()
  384 
  385     def warn_if_short_id(self, argv):
  386         # short YouTube ID starting with dash?
  387         idxs = [
  388             i for i, a in enumerate(argv)
  389             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
  390         if idxs:
  391             correct_argv = (
  392                 ['youtube-dl'] +
  393                 [a for i, a in enumerate(argv) if i not in idxs] +
  394                 ['--'] + [argv[i] for i in idxs]
  395             )
  396             self.report_warning(
  397                 'Long argument string detected. '
  398                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
  399                 args_to_str(correct_argv))
  400 
  401     def add_info_extractor(self, ie):
  402         """Add an InfoExtractor object to the end of the list."""
  403         self._ies.append(ie)
  404         if not isinstance(ie, type):
  405             self._ies_instances[ie.ie_key()] = ie
  406             ie.set_downloader(self)
  407 
  408     def get_info_extractor(self, ie_key):
  409         """
  410         Get an instance of an IE with name ie_key, it will try to get one from
  411         the _ies list, if there's no instance it will create a new one and add
  412         it to the extractor list.
  413         """
  414         ie = self._ies_instances.get(ie_key)
  415         if ie is None:
  416             ie = get_info_extractor(ie_key)()
  417             self.add_info_extractor(ie)
  418         return ie
  419 
  420     def add_default_info_extractors(self):
  421         """
  422         Add the InfoExtractors returned by gen_extractors to the end of the list
  423         """
  424         for ie in gen_extractor_classes():
  425             self.add_info_extractor(ie)
  426 
  427     def add_post_processor(self, pp):
  428         """Add a PostProcessor object to the end of the chain."""
  429         self._pps.append(pp)
  430         pp.set_downloader(self)
  431 
  432     def add_progress_hook(self, ph):
  433         """Add the progress hook (currently only for the file downloader)"""
  434         self._progress_hooks.append(ph)
  435 
  436     def _bidi_workaround(self, message):
  437         if not hasattr(self, '_output_channel'):
  438             return message
  439 
  440         assert hasattr(self, '_output_process')
  441         assert isinstance(message, compat_str)
  442         line_count = message.count('\n') + 1
  443         self._output_process.stdin.write((message + '\n').encode('utf-8'))
  444         self._output_process.stdin.flush()
  445         res = ''.join(self._output_channel.readline().decode('utf-8')
  446                       for _ in range(line_count))
  447         return res[:-len('\n')]
  448 
  449     def to_screen(self, message, skip_eol=False):
  450         """Print message to stdout if not in quiet mode."""
  451         return self.to_stdout(message, skip_eol, check_quiet=True)
  452 
  453     def _write_string(self, s, out=None):
  454         write_string(s, out=out, encoding=self.params.get('encoding'))
  455 
  456     def to_stdout(self, message, skip_eol=False, check_quiet=False):
  457         """Print message to stdout if not in quiet mode."""
  458         if self.params.get('logger'):
  459             self.params['logger'].debug(message)
  460         elif not check_quiet or not self.params.get('quiet', False):
  461             message = self._bidi_workaround(message)
  462             terminator = ['\n', ''][skip_eol]
  463             output = message + terminator
  464 
  465             self._write_string(output, self._screen_file)
  466 
  467     def to_stderr(self, message):
  468         """Print message to stderr."""
  469         assert isinstance(message, compat_str)
  470         if self.params.get('logger'):
  471             self.params['logger'].error(message)
  472         else:
  473             message = self._bidi_workaround(message)
  474             output = message + '\n'
  475             self._write_string(output, self._err_file)
  476 
  477     def to_console_title(self, message):
  478         if not self.params.get('consoletitle', False):
  479             return
  480         if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
  481             # c_wchar_p() might not be necessary if `message` is
  482             # already of type unicode()
  483             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  484         elif 'TERM' in os.environ:
  485             self._write_string('\033]0;%s\007' % message, self._screen_file)
  486 
  487     def save_console_title(self):
  488         if not self.params.get('consoletitle', False):
  489             return
  490         if 'TERM' in os.environ:
  491             # Save the title on stack
  492             self._write_string('\033[22;0t', self._screen_file)
  493 
  494     def restore_console_title(self):
  495         if not self.params.get('consoletitle', False):
  496             return
  497         if 'TERM' in os.environ:
  498             # Restore the title from stack
  499             self._write_string('\033[23;0t', self._screen_file)
  500 
  501     def __enter__(self):
  502         self.save_console_title()
  503         return self
  504 
  505     def __exit__(self, *args):
  506         self.restore_console_title()
  507 
  508         if self.params.get('cookiefile') is not None:
  509             self.cookiejar.save()
  510 
  511     def trouble(self, message=None, tb=None):
  512         """Determine action to take when a download problem appears.
  513 
  514         Depending on if the downloader has been configured to ignore
  515         download errors or not, this method may throw an exception or
  516         not when errors are found, after printing the message.
  517 
  518         tb, if given, is additional traceback information.
  519         """
  520         if message is not None:
  521             self.to_stderr(message)
  522         if self.params.get('verbose'):
  523             if tb is None:
  524                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
  525                     tb = ''
  526                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  527                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
  528                     tb += encode_compat_str(traceback.format_exc())
  529                 else:
  530                     tb_data = traceback.format_list(traceback.extract_stack())
  531                     tb = ''.join(tb_data)
  532             self.to_stderr(tb)
  533         if not self.params.get('ignoreerrors', False):
  534             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  535                 exc_info = sys.exc_info()[1].exc_info
  536             else:
  537                 exc_info = sys.exc_info()
  538             raise DownloadError(message, exc_info)
  539         self._download_retcode = 1
  540 
  541     def report_warning(self, message):
  542         '''
  543         Print the message to stderr, it will be prefixed with 'WARNING:'
  544         If stderr is a tty file the 'WARNING:' will be colored
  545         '''
  546         if self.params.get('logger') is not None:
  547             self.params['logger'].warning(message)
  548         else:
  549             if self.params.get('no_warnings'):
  550                 return
  551             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  552                 _msg_header = '\033[0;33mWARNING:\033[0m'
  553             else:
  554                 _msg_header = 'WARNING:'
  555             warning_message = '%s %s' % (_msg_header, message)
  556             self.to_stderr(warning_message)
  557 
  558     def report_error(self, message, tb=None):
  559         '''
  560         Do the same as trouble, but prefixes the message with 'ERROR:', colored
  561         in red if stderr is a tty file.
  562         '''
  563         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  564             _msg_header = '\033[0;31mERROR:\033[0m'
  565         else:
  566             _msg_header = 'ERROR:'
  567         error_message = '%s %s' % (_msg_header, message)
  568         self.trouble(error_message, tb)
  569 
  570     def report_file_already_downloaded(self, file_name):
  571         """Report file has already been fully downloaded."""
  572         try:
  573             self.to_screen('[download] %s has already been downloaded' % file_name)
  574         except UnicodeEncodeError:
  575             self.to_screen('[download] The file has already been downloaded')
  576 
  577     def prepare_filename(self, info_dict):
  578         """Generate the output filename."""
  579         try:
  580             template_dict = dict(info_dict)
  581 
  582             template_dict['epoch'] = int(time.time())
  583             autonumber_size = self.params.get('autonumber_size')
  584             if autonumber_size is None:
  585                 autonumber_size = 5
  586             autonumber_templ = '%0' + str(autonumber_size) + 'd'
  587             template_dict['autonumber'] = autonumber_templ % self._num_downloads
  588             if template_dict.get('playlist_index') is not None:
  589                 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
  590             if template_dict.get('resolution') is None:
  591                 if template_dict.get('width') and template_dict.get('height'):
  592                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
  593                 elif template_dict.get('height'):
  594                     template_dict['resolution'] = '%sp' % template_dict['height']
  595                 elif template_dict.get('width'):
  596                     template_dict['resolution'] = '%dx?' % template_dict['width']
  597 
  598             sanitize = lambda k, v: sanitize_filename(
  599                 compat_str(v),
  600                 restricted=self.params.get('restrictfilenames'),
  601                 is_id=(k == 'id'))
  602             template_dict = dict((k, sanitize(k, v))
  603                                  for k, v in template_dict.items()
  604                                  if v is not None and not isinstance(v, (list, tuple, dict)))
  605             template_dict = collections.defaultdict(lambda: 'NA', template_dict)
  606 
  607             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
  608             tmpl = compat_expanduser(outtmpl)
  609             filename = tmpl % template_dict
  610             # Temporary fix for #4787
  611             # 'Treat' all problem characters by passing filename through preferredencoding
  612             # to workaround encoding issues with subprocess on python2 @ Windows
  613             if sys.version_info < (3, 0) and sys.platform == 'win32':
  614                 filename = encodeFilename(filename, True).decode(preferredencoding())
  615             return sanitize_path(filename)
  616         except ValueError as err:
  617             self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
  618             return None
  619 
  620     def _match_entry(self, info_dict, incomplete):
  621         """ Returns None iff the file should be downloaded """
  622 
  623         video_title = info_dict.get('title', info_dict.get('id', 'video'))
  624         if 'title' in info_dict:
  625             # This can happen when we're just evaluating the playlist
  626             title = info_dict['title']
  627             matchtitle = self.params.get('matchtitle', False)
  628             if matchtitle:
  629                 if not re.search(matchtitle, title, re.IGNORECASE):
  630                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
  631             rejecttitle = self.params.get('rejecttitle', False)
  632             if rejecttitle:
  633                 if re.search(rejecttitle, title, re.IGNORECASE):
  634                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  635         date = info_dict.get('upload_date')
  636         if date is not None:
  637             dateRange = self.params.get('daterange', DateRange())
  638             if date not in dateRange:
  639                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
  640         view_count = info_dict.get('view_count')
  641         if view_count is not None:
  642             min_views = self.params.get('min_views')
  643             if min_views is not None and view_count < min_views:
  644                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
  645             max_views = self.params.get('max_views')
  646             if max_views is not None and view_count > max_views:
  647                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
  648         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
  649             return 'Skipping "%s" because it is age restricted' % video_title
  650         if self.in_download_archive(info_dict):
  651             return '%s has already been recorded in archive' % video_title
  652 
  653         if not incomplete:
  654             match_filter = self.params.get('match_filter')
  655             if match_filter is not None:
  656                 ret = match_filter(info_dict)
  657                 if ret is not None:
  658                     return ret
  659 
  660         return None
  661 
  662     @staticmethod
  663     def add_extra_info(info_dict, extra_info):
  664         '''Set the keys from extra_info in info dict if they are missing'''
  665         for key, value in extra_info.items():
  666             info_dict.setdefault(key, value)
  667 
  668     def extract_info(self, url, download=True, ie_key=None, extra_info={},
  669                      process=True, force_generic_extractor=False):
  670         '''
  671         Returns a list with a dictionary for each video we find.
  672         If 'download', also downloads the videos.
  673         extra_info is a dict containing the extra values to add to each result
  674         '''
  675 
  676         if not ie_key and force_generic_extractor:
  677             ie_key = 'Generic'
  678 
  679         if ie_key:
  680             ies = [self.get_info_extractor(ie_key)]
  681         else:
  682             ies = self._ies
  683 
  684         for ie in ies:
  685             if not ie.suitable(url):
  686                 continue
  687 
  688             ie = self.get_info_extractor(ie.ie_key())
  689             if not ie.working():
  690                 self.report_warning('The program functionality for this site has been marked as broken, '
  691                                     'and will probably not work.')
  692 
  693             try:
  694                 ie_result = ie.extract(url)
  695                 if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
  696                     break
  697                 if isinstance(ie_result, list):
  698                     # Backwards compatibility: old IE result format
  699                     ie_result = {
  700                         '_type': 'compat_list',
  701                         'entries': ie_result,
  702                     }
  703                 self.add_default_extra_info(ie_result, ie, url)
  704                 if process:
  705                     return self.process_ie_result(ie_result, download, extra_info)
  706                 else:
  707                     return ie_result
  708             except ExtractorError as e:  # An error we somewhat expected
  709                 self.report_error(compat_str(e), e.format_traceback())
  710                 break
  711             except MaxDownloadsReached:
  712                 raise
  713             except Exception as e:
  714                 if self.params.get('ignoreerrors', False):
  715                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
  716                     break
  717                 else:
  718                     raise
  719         else:
  720             self.report_error('no suitable InfoExtractor for URL %s' % url)
  721 
  722     def add_default_extra_info(self, ie_result, ie, url):
  723         self.add_extra_info(ie_result, {
  724             'extractor': ie.IE_NAME,
  725             'webpage_url': url,
  726             'webpage_url_basename': url_basename(url),
  727             'extractor_key': ie.ie_key(),
  728         })
  729 
  730     def process_ie_result(self, ie_result, download=True, extra_info={}):
  731         """
  732         Take the result of the ie(may be modified) and resolve all unresolved
  733         references (URLs, playlist items).
  734 
  735         It will also download the videos if 'download'.
  736         Returns the resolved ie_result.
  737         """
  738         result_type = ie_result.get('_type', 'video')
  739 
  740         if result_type in ('url', 'url_transparent'):
  741             ie_result['url'] = sanitize_url(ie_result['url'])
  742             extract_flat = self.params.get('extract_flat', False)
  743             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
  744                     extract_flat is True):
  745                 if self.params.get('forcejson', False):
  746                     self.to_stdout(json.dumps(ie_result))
  747                 return ie_result
  748 
  749         if result_type == 'video':
  750             self.add_extra_info(ie_result, extra_info)
  751             return self.process_video_result(ie_result, download=download)
  752         elif result_type == 'url':
  753             # We have to add extra_info to the results because it may be
  754             # contained in a playlist
  755             return self.extract_info(ie_result['url'],
  756                                      download,
  757                                      ie_key=ie_result.get('ie_key'),
  758                                      extra_info=extra_info)
  759         elif result_type == 'url_transparent':
  760             # Use the information from the embedding page
  761             info = self.extract_info(
  762                 ie_result['url'], ie_key=ie_result.get('ie_key'),
  763                 extra_info=extra_info, download=False, process=False)
  764 
  765             force_properties = dict(
  766                 (k, v) for k, v in ie_result.items() if v is not None)
  767             for f in ('_type', 'url', 'ie_key'):
  768                 if f in force_properties:
  769                     del force_properties[f]
  770             new_result = info.copy()
  771             new_result.update(force_properties)
  772 
  773             assert new_result.get('_type') != 'url_transparent'
  774 
  775             return self.process_ie_result(
  776                 new_result, download=download, extra_info=extra_info)
  777         elif result_type == 'playlist' or result_type == 'multi_video':
  778             # We process each entry in the playlist
  779             playlist = ie_result.get('title') or ie_result.get('id')
  780             self.to_screen('[download] Downloading playlist: %s' % playlist)
  781 
  782             playlist_results = []
  783 
  784             playliststart = self.params.get('playliststart', 1) - 1
  785             playlistend = self.params.get('playlistend')
  786             # For backwards compatibility, interpret -1 as whole list
  787             if playlistend == -1:
  788                 playlistend = None
  789 
  790             playlistitems_str = self.params.get('playlist_items')
  791             playlistitems = None
  792             if playlistitems_str is not None:
  793                 def iter_playlistitems(format):
  794                     for string_segment in format.split(','):
  795                         if '-' in string_segment:
  796                             start, end = string_segment.split('-')
  797                             for item in range(int(start), int(end) + 1):
  798                                 yield int(item)
  799                         else:
  800                             yield int(string_segment)
  801                 playlistitems = iter_playlistitems(playlistitems_str)
  802 
  803             ie_entries = ie_result['entries']
  804             if isinstance(ie_entries, list):
  805                 n_all_entries = len(ie_entries)
  806                 if playlistitems:
  807                     entries = [
  808                         ie_entries[i - 1] for i in playlistitems
  809                         if -n_all_entries <= i - 1 < n_all_entries]
  810                 else:
  811                     entries = ie_entries[playliststart:playlistend]
  812                 n_entries = len(entries)
  813                 self.to_screen(
  814                     '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
  815                     (ie_result['extractor'], playlist, n_all_entries, n_entries))
  816             elif isinstance(ie_entries, PagedList):
  817                 if playlistitems:
  818                     entries = []
  819                     for item in playlistitems:
  820                         entries.extend(ie_entries.getslice(
  821                             item - 1, item
  822                         ))
  823                 else:
  824                     entries = ie_entries.getslice(
  825                         playliststart, playlistend)
  826                 n_entries = len(entries)
  827                 self.to_screen(
  828                     '[%s] playlist %s: Downloading %d videos' %
  829                     (ie_result['extractor'], playlist, n_entries))
  830             else:  # iterable
  831                 if playlistitems:
  832                     entry_list = list(ie_entries)
  833                     entries = [entry_list[i - 1] for i in playlistitems]
  834                 else:
  835                     entries = list(itertools.islice(
  836                         ie_entries, playliststart, playlistend))
  837                 n_entries = len(entries)
  838                 self.to_screen(
  839                     '[%s] playlist %s: Downloading %d videos' %
  840                     (ie_result['extractor'], playlist, n_entries))
  841 
  842             if self.params.get('playlistreverse', False):
  843                 entries = entries[::-1]
  844 
  845             for i, entry in enumerate(entries, 1):
  846                 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
  847                 extra = {
  848                     'n_entries': n_entries,
  849                     'playlist': playlist,
  850                     'playlist_id': ie_result.get('id'),
  851                     'playlist_title': ie_result.get('title'),
  852                     'playlist_index': i + playliststart,
  853                     'extractor': ie_result['extractor'],
  854                     'webpage_url': ie_result['webpage_url'],
  855                     'webpage_url_basename': url_basename(ie_result['webpage_url']),
  856                     'extractor_key': ie_result['extractor_key'],
  857                 }
  858 
  859                 reason = self._match_entry(entry, incomplete=True)
  860                 if reason is not None:
  861                     self.to_screen('[download] ' + reason)
  862                     continue
  863 
  864                 entry_result = self.process_ie_result(entry,
  865                                                       download=download,
  866                                                       extra_info=extra)
  867                 playlist_results.append(entry_result)
  868             ie_result['entries'] = playlist_results
  869             self.to_screen('[download] Finished downloading playlist: %s' % playlist)
  870             return ie_result
  871         elif result_type == 'compat_list':
  872             self.report_warning(
  873                 'Extractor %s returned a compat_list result. '
  874                 'It needs to be updated.' % ie_result.get('extractor'))
  875 
  876             def _fixup(r):
  877                 self.add_extra_info(
  878                     r,
  879                     {
  880                         'extractor': ie_result['extractor'],
  881                         'webpage_url': ie_result['webpage_url'],
  882                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
  883                         'extractor_key': ie_result['extractor_key'],
  884                     }
  885                 )
  886                 return r
  887             ie_result['entries'] = [
  888                 self.process_ie_result(_fixup(r), download, extra_info)
  889                 for r in ie_result['entries']
  890             ]
  891             return ie_result
  892         else:
  893             raise Exception('Invalid result type: %s' % result_type)
  894 
  895     def _build_format_filter(self, filter_spec):
  896         " Returns a function to filter the formats according to the filter_spec "
  897 
  898         OPERATORS = {
  899             '<': operator.lt,
  900             '<=': operator.le,
  901             '>': operator.gt,
  902             '>=': operator.ge,
  903             '=': operator.eq,
  904             '!=': operator.ne,
  905         }
  906         operator_rex = re.compile(r'''(?x)\s*
  907             (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
  908             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
  909             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
  910             $
  911             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
  912         m = operator_rex.search(filter_spec)
  913         if m:
  914             try:
  915                 comparison_value = int(m.group('value'))
  916             except ValueError:
  917                 comparison_value = parse_filesize(m.group('value'))
  918                 if comparison_value is None:
  919                     comparison_value = parse_filesize(m.group('value') + 'B')
  920                 if comparison_value is None:
  921                     raise ValueError(
  922                         'Invalid value %r in format specification %r' % (
  923                             m.group('value'), filter_spec))
  924             op = OPERATORS[m.group('op')]
  925 
  926         if not m:
  927             STR_OPERATORS = {
  928                 '=': operator.eq,
  929                 '!=': operator.ne,
  930                 '^=': lambda attr, value: attr.startswith(value),
  931                 '$=': lambda attr, value: attr.endswith(value),
  932                 '*=': lambda attr, value: value in attr,
  933             }
  934             str_operator_rex = re.compile(r'''(?x)
  935                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
  936                 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
  937                 \s*(?P<value>[a-zA-Z0-9._-]+)
  938                 \s*$
  939                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
  940             m = str_operator_rex.search(filter_spec)
  941             if m:
  942                 comparison_value = m.group('value')
  943                 op = STR_OPERATORS[m.group('op')]
  944 
  945         if not m:
  946             raise ValueError('Invalid filter specification %r' % filter_spec)
  947 
  948         def _filter(f):
  949             actual_value = f.get(m.group('key'))
  950             if actual_value is None:
  951                 return m.group('none_inclusive')
  952             return op(actual_value, comparison_value)
  953         return _filter
  954 
  955     def build_format_selector(self, format_spec):
  956         def syntax_error(note, start):
  957             message = (
  958                 'Invalid format specification: '
  959                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
  960             return SyntaxError(message)
  961 
  962         PICKFIRST = 'PICKFIRST'
  963         MERGE = 'MERGE'
  964         SINGLE = 'SINGLE'
  965         GROUP = 'GROUP'
  966         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
  967 
  968         def _parse_filter(tokens):
  969             filter_parts = []
  970             for type, string, start, _, _ in tokens:
  971                 if type == tokenize.OP and string == ']':
  972                     return ''.join(filter_parts)
  973                 else:
  974                     filter_parts.append(string)
  975 
  976         def _remove_unused_ops(tokens):
  977             # Remove operators that we don't use and join them with the surrounding strings
  978             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
  979             ALLOWED_OPS = ('/', '+', ',', '(', ')')
  980             last_string, last_start, last_end, last_line = None, None, None, None
  981             for type, string, start, end, line in tokens:
  982                 if type == tokenize.OP and string == '[':
  983                     if last_string:
  984                         yield tokenize.NAME, last_string, last_start, last_end, last_line
  985                         last_string = None
  986                     yield type, string, start, end, line
  987                     # everything inside brackets will be handled by _parse_filter
  988                     for type, string, start, end, line in tokens:
  989                         yield type, string, start, end, line
  990                         if type == tokenize.OP and string == ']':
  991                             break
  992                 elif type == tokenize.OP and string in ALLOWED_OPS:
  993                     if last_string:
  994                         yield tokenize.NAME, last_string, last_start, last_end, last_line
  995                         last_string = None
  996                     yield type, string, start, end, line
  997                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
  998                     if not last_string:
  999                         last_string = string
 1000                         last_start = start
 1001                         last_end = end
 1002                     else:
 1003                         last_string += string
 1004             if last_string:
 1005                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 1006 
 1007         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 1008             selectors = []
 1009             current_selector = None
 1010             for type, string, start, _, _ in tokens:
 1011                 # ENCODING is only defined in python 3.x
 1012                 if type == getattr(tokenize, 'ENCODING', None):
 1013                     continue
 1014                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 1015                     current_selector = FormatSelector(SINGLE, string, [])
 1016                 elif type == tokenize.OP:
 1017                     if string == ')':
 1018                         if not inside_group:
 1019                             # ')' will be handled by the parentheses group
 1020                             tokens.restore_last_token()
 1021                         break
 1022                     elif inside_merge and string in ['/', ',']:
 1023                         tokens.restore_last_token()
 1024                         break
 1025                     elif inside_choice and string == ',':
 1026                         tokens.restore_last_token()
 1027                         break
 1028                     elif string == ',':
 1029                         if not current_selector:
 1030                             raise syntax_error('"," must follow a format selector', start)
 1031                         selectors.append(current_selector)
 1032                         current_selector = None
 1033                     elif string == '/':
 1034                         if not current_selector:
 1035                             raise syntax_error('"/" must follow a format selector', start)
 1036                         first_choice = current_selector
 1037                         second_choice = _parse_format_selection(tokens, inside_choice=True)
 1038                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
 1039                     elif string == '[':
 1040                         if not current_selector:
 1041                             current_selector = FormatSelector(SINGLE, 'best', [])
 1042                         format_filter = _parse_filter(tokens)
 1043                         current_selector.filters.append(format_filter)
 1044                     elif string == '(':
 1045                         if current_selector:
 1046                             raise syntax_error('Unexpected "("', start)
 1047                         group = _parse_format_selection(tokens, inside_group=True)
 1048                         current_selector = FormatSelector(GROUP, group, [])
 1049                     elif string == '+':
 1050                         video_selector = current_selector
 1051                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
 1052                         if not video_selector or not audio_selector:
 1053                             raise syntax_error('"+" must be between two format selectors', start)
 1054                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
 1055                     else:
 1056                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
 1057                 elif type == tokenize.ENDMARKER:
 1058                     break
 1059             if current_selector:
 1060                 selectors.append(current_selector)
 1061             return selectors
 1062 
 1063         def _build_selector_function(selector):
 1064             if isinstance(selector, list):
 1065                 fs = [_build_selector_function(s) for s in selector]
 1066 
 1067                 def selector_function(ctx):
 1068                     for f in fs:
 1069                         for format in f(ctx):
 1070                             yield format
 1071                 return selector_function
 1072             elif selector.type == GROUP:
 1073                 selector_function = _build_selector_function(selector.selector)
 1074             elif selector.type == PICKFIRST:
 1075                 fs = [_build_selector_function(s) for s in selector.selector]
 1076 
 1077                 def selector_function(ctx):
 1078                     for f in fs:
 1079                         picked_formats = list(f(ctx))
 1080                         if picked_formats:
 1081                             return picked_formats
 1082                     return []
 1083             elif selector.type == SINGLE:
 1084                 format_spec = selector.selector
 1085 
 1086                 def selector_function(ctx):
 1087                     formats = list(ctx['formats'])
 1088                     if not formats:
 1089                         return
 1090                     if format_spec == 'all':
 1091                         for f in formats:
 1092                             yield f
 1093                     elif format_spec in ['best', 'worst', None]:
 1094                         format_idx = 0 if format_spec == 'worst' else -1
 1095                         audiovideo_formats = [
 1096                             f for f in formats
 1097                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 1098                         if audiovideo_formats:
 1099                             yield audiovideo_formats[format_idx]
 1100                         # for extractors with incomplete formats (audio only (soundcloud)
 1101                         # or video only (imgur)) we will fallback to best/worst
 1102                         # {video,audio}-only format
 1103                         elif ctx['incomplete_formats']:
 1104                             yield formats[format_idx]
 1105                     elif format_spec == 'bestaudio':
 1106                         audio_formats = [
 1107                             f for f in formats
 1108                             if f.get('vcodec') == 'none']
 1109                         if audio_formats:
 1110                             yield audio_formats[-1]
 1111                     elif format_spec == 'worstaudio':
 1112                         audio_formats = [
 1113                             f for f in formats
 1114                             if f.get('vcodec') == 'none']
 1115                         if audio_formats:
 1116                             yield audio_formats[0]
 1117                     elif format_spec == 'bestvideo':
 1118                         video_formats = [
 1119                             f for f in formats
 1120                             if f.get('acodec') == 'none']
 1121                         if video_formats:
 1122                             yield video_formats[-1]
 1123                     elif format_spec == 'worstvideo':
 1124                         video_formats = [
 1125                             f for f in formats
 1126                             if f.get('acodec') == 'none']
 1127                         if video_formats:
 1128                             yield video_formats[0]
 1129                     else:
 1130                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 1131                         if format_spec in extensions:
 1132                             filter_f = lambda f: f['ext'] == format_spec
 1133                         else:
 1134                             filter_f = lambda f: f['format_id'] == format_spec
 1135                         matches = list(filter(filter_f, formats))
 1136                         if matches:
 1137                             yield matches[-1]
 1138             elif selector.type == MERGE:
 1139                 def _merge(formats_info):
 1140                     format_1, format_2 = [f['format_id'] for f in formats_info]
 1141                     # The first format must contain the video and the
 1142                     # second the audio
 1143                     if formats_info[0].get('vcodec') == 'none':
 1144                         self.report_error('The first format must '
 1145                                           'contain the video, try using '
 1146                                           '"-f %s+%s"' % (format_2, format_1))
 1147                         return
 1148                     # Formats must be opposite (video+audio)
 1149                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
 1150                         self.report_error(
 1151                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
 1152                             % (format_1, format_2))
 1153                         return
 1154                     output_ext = (
 1155                         formats_info[0]['ext']
 1156                         if self.params.get('merge_output_format') is None
 1157                         else self.params['merge_output_format'])
 1158                     return {
 1159                         'requested_formats': formats_info,
 1160                         'format': '%s+%s' % (formats_info[0].get('format'),
 1161                                              formats_info[1].get('format')),
 1162                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
 1163                                                 formats_info[1].get('format_id')),
 1164                         'width': formats_info[0].get('width'),
 1165                         'height': formats_info[0].get('height'),
 1166                         'resolution': formats_info[0].get('resolution'),
 1167                         'fps': formats_info[0].get('fps'),
 1168                         'vcodec': formats_info[0].get('vcodec'),
 1169                         'vbr': formats_info[0].get('vbr'),
 1170                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
 1171                         'acodec': formats_info[1].get('acodec'),
 1172                         'abr': formats_info[1].get('abr'),
 1173                         'ext': output_ext,
 1174                     }
 1175                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
 1176 
 1177                 def selector_function(ctx):
 1178                     for pair in itertools.product(
 1179                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
 1180                         yield _merge(pair)
 1181 
 1182             filters = [self._build_format_filter(f) for f in selector.filters]
 1183 
 1184             def final_selector(ctx):
 1185                 ctx_copy = copy.deepcopy(ctx)
 1186                 for _filter in filters:
 1187                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
 1188                 return selector_function(ctx_copy)
 1189             return final_selector
 1190 
 1191         stream = io.BytesIO(format_spec.encode('utf-8'))
 1192         try:
 1193             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
 1194         except tokenize.TokenError:
 1195             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
 1196 
 1197         class TokenIterator(object):
 1198             def __init__(self, tokens):
 1199                 self.tokens = tokens
 1200                 self.counter = 0
 1201 
 1202             def __iter__(self):
 1203                 return self
 1204 
 1205             def __next__(self):
 1206                 if self.counter >= len(self.tokens):
 1207                     raise StopIteration()
 1208                 value = self.tokens[self.counter]
 1209                 self.counter += 1
 1210                 return value
 1211 
 1212             next = __next__
 1213 
 1214             def restore_last_token(self):
 1215                 self.counter -= 1
 1216 
 1217         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
 1218         return _build_selector_function(parsed_selector)
 1219 
 1220     def _calc_headers(self, info_dict):
 1221         res = std_headers.copy()
 1222 
 1223         add_headers = info_dict.get('http_headers')
 1224         if add_headers:
 1225             res.update(add_headers)
 1226 
 1227         cookies = self._calc_cookies(info_dict)
 1228         if cookies:
 1229             res['Cookie'] = cookies
 1230 
 1231         return res
 1232 
 1233     def _calc_cookies(self, info_dict):
 1234         pr = sanitized_Request(info_dict['url'])
 1235         self.cookiejar.add_cookie_header(pr)
 1236         return pr.get_header('Cookie')
 1237 
 1238     def process_video_result(self, info_dict, download=True):
 1239         assert info_dict.get('_type', 'video') == 'video'
 1240 
 1241         if 'id' not in info_dict:
 1242             raise ExtractorError('Missing "id" field in extractor result')
 1243         if 'title' not in info_dict:
 1244             raise ExtractorError('Missing "title" field in extractor result')
 1245 
 1246         if not isinstance(info_dict['id'], compat_str):
 1247             self.report_warning('"id" field is not a string - forcing string conversion')
 1248             info_dict['id'] = compat_str(info_dict['id'])
 1249 
 1250         if 'playlist' not in info_dict:
 1251             # It isn't part of a playlist
 1252             info_dict['playlist'] = None
 1253             info_dict['playlist_index'] = None
 1254 
 1255         thumbnails = info_dict.get('thumbnails')
 1256         if thumbnails is None:
 1257             thumbnail = info_dict.get('thumbnail')
 1258             if thumbnail:
 1259                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 1260         if thumbnails:
 1261             thumbnails.sort(key=lambda t: (
 1262                 t.get('preference') if t.get('preference') is not None else -1,
 1263                 t.get('width') if t.get('width') is not None else -1,
 1264                 t.get('height') if t.get('height') is not None else -1,
 1265                 t.get('id') if t.get('id') is not None else '', t.get('url')))
 1266             for i, t in enumerate(thumbnails):
 1267                 t['url'] = sanitize_url(t['url'])
 1268                 if t.get('width') and t.get('height'):
 1269                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 1270                 if t.get('id') is None:
 1271                     t['id'] = '%d' % i
 1272 
 1273         if self.params.get('list_thumbnails'):
 1274             self.list_thumbnails(info_dict)
 1275             return
 1276 
 1277         thumbnail = info_dict.get('thumbnail')
 1278         if thumbnail:
 1279             info_dict['thumbnail'] = sanitize_url(thumbnail)
 1280         elif thumbnails:
 1281             info_dict['thumbnail'] = thumbnails[-1]['url']
 1282 
 1283         if 'display_id' not in info_dict and 'id' in info_dict:
 1284             info_dict['display_id'] = info_dict['id']
 1285 
 1286         if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
 1287             # Working around out-of-range timestamp values (e.g. negative ones on Windows,
 1288             # see http://bugs.python.org/issue1646728)
 1289             try:
 1290                 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
 1291                 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
 1292             except (ValueError, OverflowError, OSError):
 1293                 pass
 1294 
 1295         # Auto generate title fields corresponding to the *_number fields when missing
 1296         # in order to always have clean titles. This is very common for TV series.
 1297         for field in ('chapter', 'season', 'episode'):
 1298             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
 1299                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
 1300 
 1301         subtitles = info_dict.get('subtitles')
 1302         if subtitles:
 1303             for _, subtitle in subtitles.items():
 1304                 for subtitle_format in subtitle:
 1305                     if subtitle_format.get('url'):
 1306                         subtitle_format['url'] = sanitize_url(subtitle_format['url'])
 1307                     if subtitle_format.get('ext') is None:
 1308                         subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 1309 
 1310         if self.params.get('listsubtitles', False):
 1311             if 'automatic_captions' in info_dict:
 1312                 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
 1313             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
 1314             return
 1315         info_dict['requested_subtitles'] = self.process_subtitles(
 1316             info_dict['id'], subtitles,
 1317             info_dict.get('automatic_captions'))
 1318 
 1319         # We now pick which formats have to be downloaded
 1320         if info_dict.get('formats') is None:
 1321             # There's only one format available
 1322             formats = [info_dict]
 1323         else:
 1324             formats = info_dict['formats']
 1325 
 1326         if not formats:
 1327             raise ExtractorError('No video formats found!')
 1328 
 1329         formats_dict = {}
 1330 
 1331         # We check that all the formats have the format and format_id fields
 1332         for i, format in enumerate(formats):
 1333             if 'url' not in format:
 1334                 raise ExtractorError('Missing "url" key in result (index %d)' % i)
 1335 
 1336             format['url'] = sanitize_url(format['url'])
 1337 
 1338             if format.get('format_id') is None:
 1339                 format['format_id'] = compat_str(i)
 1340             else:
 1341                 # Sanitize format_id from characters used in format selector expression
 1342                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
 1343             format_id = format['format_id']
 1344             if format_id not in formats_dict:
 1345                 formats_dict[format_id] = []
 1346             formats_dict[format_id].append(format)
 1347 
 1348         # Make sure all formats have unique format_id
 1349         for format_id, ambiguous_formats in formats_dict.items():
 1350             if len(ambiguous_formats) > 1:
 1351                 for i, format in enumerate(ambiguous_formats):
 1352                     format['format_id'] = '%s-%d' % (format_id, i)
 1353 
 1354         for i, format in enumerate(formats):
 1355             if format.get('format') is None:
 1356                 format['format'] = '{id} - {res}{note}'.format(
 1357                     id=format['format_id'],
 1358                     res=self.format_resolution(format),
 1359                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 1360                 )
 1361             # Automatically determine file extension if missing
 1362             if format.get('ext') is None:
 1363                 format['ext'] = determine_ext(format['url']).lower()
 1364             # Automatically determine protocol if missing (useful for format
 1365             # selection purposes)
 1366             if 'protocol' not in format:
 1367                 format['protocol'] = determine_protocol(format)
 1368             # Add HTTP headers, so that external programs can use them from the
 1369             # json output
 1370             full_format_info = info_dict.copy()
 1371             full_format_info.update(format)
 1372             format['http_headers'] = self._calc_headers(full_format_info)
 1373 
 1374         # TODO Central sorting goes here
 1375 
 1376         if formats[0] is not info_dict:
 1377             # only set the 'formats' fields if the original info_dict list them
 1378             # otherwise we end up with a circular reference, the first (and unique)
 1379             # element in the 'formats' field in info_dict is info_dict itself,
 1380             # which can't be exported to json
 1381             info_dict['formats'] = formats
 1382         if self.params.get('listformats'):
 1383             self.list_formats(info_dict)
 1384             return
 1385 
 1386         req_format = self.params.get('format')
 1387         if req_format is None:
 1388             req_format_list = []
 1389             if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
 1390                     not info_dict.get('is_live')):
 1391                 merger = FFmpegMergerPP(self)
 1392                 if merger.available and merger.can_merge():
 1393                     req_format_list.append('bestvideo+bestaudio')
 1394             req_format_list.append('best')
 1395             req_format = '/'.join(req_format_list)
 1396         format_selector = self.build_format_selector(req_format)
 1397 
 1398         # While in format selection we may need to have an access to the original
 1399         # format set in order to calculate some metrics or do some processing.
 1400         # For now we need to be able to guess whether original formats provided
 1401         # by extractor are incomplete or not (i.e. whether extractor provides only
 1402         # video-only or audio-only formats) for proper formats selection for
 1403         # extractors with such incomplete formats (see
 1404         # https://github.com/rg3/youtube-dl/pull/5556).
 1405         # Since formats may be filtered during format selection and may not match
 1406         # the original formats the results may be incorrect. Thus original formats
 1407         # or pre-calculated metrics should be passed to format selection routines
 1408         # as well.
 1409         # We will pass a context object containing all necessary additional data
 1410         # instead of just formats.
 1411         # This fixes incorrect format selection issue (see
 1412         # https://github.com/rg3/youtube-dl/issues/10083).
 1413         incomplete_formats = (
 1414             # All formats are video-only or
 1415             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats) or
 1416             # all formats are audio-only
 1417             all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
 1418 
 1419         ctx = {
 1420             'formats': formats,
 1421             'incomplete_formats': incomplete_formats,
 1422         }
 1423 
 1424         formats_to_download = list(format_selector(ctx))
 1425         if not formats_to_download:
 1426             raise ExtractorError('requested format not available',
 1427                                  expected=True)
 1428 
 1429         if download:
 1430             if len(formats_to_download) > 1:
 1431                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 1432             for format in formats_to_download:
 1433                 new_info = dict(info_dict)
 1434                 new_info.update(format)
 1435                 self.process_info(new_info)
 1436         # We update the info dict with the best quality format (backwards compatibility)
 1437         info_dict.update(formats_to_download[-1])
 1438         return info_dict
 1439 
 1440     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
 1441         """Select the requested subtitles and their format"""
 1442         available_subs = {}
 1443         if normal_subtitles and self.params.get('writesubtitles'):
 1444             available_subs.update(normal_subtitles)
 1445         if automatic_captions and self.params.get('writeautomaticsub'):
 1446             for lang, cap_info in automatic_captions.items():
 1447                 if lang not in available_subs:
 1448                     available_subs[lang] = cap_info
 1449 
 1450         if (not self.params.get('writesubtitles') and not
 1451                 self.params.get('writeautomaticsub') or not
 1452                 available_subs):
 1453             return None
 1454 
 1455         if self.params.get('allsubtitles', False):
 1456             requested_langs = available_subs.keys()
 1457         else:
 1458             if self.params.get('subtitleslangs', False):
 1459                 requested_langs = self.params.get('subtitleslangs')
 1460             elif 'en' in available_subs:
 1461                 requested_langs = ['en']
 1462             else:
 1463                 requested_langs = [list(available_subs.keys())[0]]
 1464 
 1465         formats_query = self.params.get('subtitlesformat', 'best')
 1466         formats_preference = formats_query.split('/') if formats_query else []
 1467         subs = {}
 1468         for lang in requested_langs:
 1469             formats = available_subs.get(lang)
 1470             if formats is None:
 1471                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
 1472                 continue
 1473             for ext in formats_preference:
 1474                 if ext == 'best':
 1475                     f = formats[-1]
 1476                     break
 1477                 matches = list(filter(lambda f: f['ext'] == ext, formats))
 1478                 if matches:
 1479                     f = matches[-1]
 1480                     break
 1481             else:
 1482                 f = formats[-1]
 1483                 self.report_warning(
 1484                     'No subtitle format found matching "%s" for language %s, '
 1485                     'using %s' % (formats_query, lang, f['ext']))
 1486             subs[lang] = f
 1487         return subs
 1488 
 1489     def process_info(self, info_dict):
 1490         """Process a single resolved IE result."""
 1491 
 1492         assert info_dict.get('_type', 'video') == 'video'
 1493 
 1494         max_downloads = self.params.get('max_downloads')
 1495         if max_downloads is not None:
 1496             if self._num_downloads >= int(max_downloads):
 1497                 raise MaxDownloadsReached()
 1498 
 1499         info_dict['fulltitle'] = info_dict['title']
 1500         if len(info_dict['title']) > 200:
 1501             info_dict['title'] = info_dict['title'][:197] + '...'
 1502 
 1503         if 'format' not in info_dict:
 1504             info_dict['format'] = info_dict['ext']
 1505 
 1506         reason = self._match_entry(info_dict, incomplete=False)
 1507         if reason is not None:
 1508             self.to_screen('[download] ' + reason)
 1509             return
 1510 
 1511         self._num_downloads += 1
 1512 
 1513         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
 1514 
 1515         # Forced printings
 1516         if self.params.get('forcetitle', False):
 1517             self.to_stdout(info_dict['fulltitle'])
 1518         if self.params.get('forceid', False):
 1519             self.to_stdout(info_dict['id'])
 1520         if self.params.get('forceurl', False):
 1521             if info_dict.get('requested_formats') is not None:
 1522                 for f in info_dict['requested_formats']:
 1523                     self.to_stdout(f['url'] + f.get('play_path', ''))
 1524             else:
 1525                 # For RTMP URLs, also include the playpath
 1526                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 1527         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
 1528             self.to_stdout(info_dict['thumbnail'])
 1529         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
 1530             self.to_stdout(info_dict['description'])
 1531         if self.params.get('forcefilename', False) and filename is not None:
 1532             self.to_stdout(filename)
 1533         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 1534             self.to_stdout(formatSeconds(info_dict['duration']))
 1535         if self.params.get('forceformat', False):
 1536             self.to_stdout(info_dict['format'])
 1537         if self.params.get('forcejson', False):
 1538             self.to_stdout(json.dumps(info_dict))
 1539 
 1540         # Do nothing else if in simulate mode
 1541         if self.params.get('simulate', False):
 1542             return
 1543 
 1544         if filename is None:
 1545             return
 1546 
 1547         try:
 1548             dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
 1549             if dn and not os.path.exists(dn):
 1550                 os.makedirs(dn)
 1551         except (OSError, IOError) as err:
 1552             self.report_error('unable to create directory ' + error_to_compat_str(err))
 1553             return
 1554 
 1555         if self.params.get('writedescription', False):
 1556             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
 1557             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 1558                 self.to_screen('[info] Video description is already present')
 1559             elif info_dict.get('description') is None:
 1560                 self.report_warning('There\'s no description to write.')
 1561             else:
 1562                 try:
 1563                     self.to_screen('[info] Writing video description to: ' + descfn)
 1564                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 1565                         descfile.write(info_dict['description'])
 1566                 except (OSError, IOError):
 1567                     self.report_error('Cannot write description file ' + descfn)
 1568                     return
 1569 
 1570         if self.params.get('writeannotations', False):
 1571             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
 1572             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 1573                 self.to_screen('[info] Video annotations are already present')
 1574             else:
 1575                 try:
 1576                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 1577                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 1578                         annofile.write(info_dict['annotations'])
 1579                 except (KeyError, TypeError):
 1580                     self.report_warning('There are no annotations to write.')
 1581                 except (OSError, IOError):
 1582                     self.report_error('Cannot write annotations file: ' + annofn)
 1583                     return
 1584 
 1585         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 1586                                        self.params.get('writeautomaticsub')])
 1587 
 1588         if subtitles_are_requested and info_dict.get('requested_subtitles'):
 1589             # subtitles download errors are already managed as troubles in relevant IE
 1590             # that way it will silently go on when used with unsupporting IE
 1591             subtitles = info_dict['requested_subtitles']
 1592             ie = self.get_info_extractor(info_dict['extractor_key'])
 1593             for sub_lang, sub_info in subtitles.items():
 1594                 sub_format = sub_info['ext']
 1595                 if sub_info.get('data') is not None:
 1596                     sub_data = sub_info['data']
 1597                 else:
 1598                     try:
 1599                         sub_data = ie._download_webpage(
 1600                             sub_info['url'], info_dict['id'], note=False)
 1601                     except ExtractorError as err:
 1602                         self.report_warning('Unable to download subtitle for "%s": %s' %
 1603                                             (sub_lang, error_to_compat_str(err.cause)))
 1604                         continue
 1605                 try:
 1606                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
 1607                     if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 1608                         self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
 1609                     else:
 1610                         self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 1611                         # Use newline='' to prevent conversion of newline characters
 1612                         # See https://github.com/rg3/youtube-dl/issues/10268
 1613                         with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
 1614                             subfile.write(sub_data)
 1615                 except (OSError, IOError):
 1616                     self.report_error('Cannot write subtitles file ' + sub_filename)
 1617                     return
 1618 
 1619         if self.params.get('writeinfojson', False):
 1620             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
 1621             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 1622                 self.to_screen('[info] Video description metadata is already present')
 1623             else:
 1624                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 1625                 try:
 1626                     write_json_file(self.filter_requested_info(info_dict), infofn)
 1627                 except (OSError, IOError):
 1628                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 1629                     return
 1630 
 1631         self._write_thumbnails(info_dict, filename)
 1632 
 1633         if not self.params.get('skip_download', False):
 1634             try:
 1635                 def dl(name, info):
 1636                     fd = get_suitable_downloader(info, self.params)(self, self.params)
 1637                     for ph in self._progress_hooks:
 1638                         fd.add_progress_hook(ph)
 1639                     if self.params.get('verbose'):
 1640                         self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
 1641                     return fd.download(name, info)
 1642 
 1643                 if info_dict.get('requested_formats') is not None:
 1644                     downloaded = []
 1645                     success = True
 1646                     merger = FFmpegMergerPP(self)
 1647                     if not merger.available:
 1648                         postprocessors = []
 1649                         self.report_warning('You have requested multiple '
 1650                                             'formats but ffmpeg or avconv are not installed.'
 1651                                             ' The formats won\'t be merged.')
 1652                     else:
 1653                         postprocessors = [merger]
 1654 
 1655                     def compatible_formats(formats):
 1656                         video, audio = formats
 1657                         # Check extension
 1658                         video_ext, audio_ext = audio.get('ext'), video.get('ext')
 1659                         if video_ext and audio_ext:
 1660                             COMPATIBLE_EXTS = (
 1661                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
 1662                                 ('webm')
 1663                             )
 1664                             for exts in COMPATIBLE_EXTS:
 1665                                 if video_ext in exts and audio_ext in exts:
 1666                                     return True
 1667                         # TODO: Check acodec/vcodec
 1668                         return False
 1669 
 1670                     filename_real_ext = os.path.splitext(filename)[1][1:]
 1671                     filename_wo_ext = (
 1672                         os.path.splitext(filename)[0]
 1673                         if filename_real_ext == info_dict['ext']
 1674                         else filename)
 1675                     requested_formats = info_dict['requested_formats']
 1676                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
 1677                         info_dict['ext'] = 'mkv'
 1678                         self.report_warning(
 1679                             'Requested formats are incompatible for merge and will be merged into mkv.')
 1680                     # Ensure filename always has a correct extension for successful merge
 1681                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
 1682                     if os.path.exists(encodeFilename(filename)):
 1683                         self.to_screen(
 1684                             '[download] %s has already been downloaded and '
 1685                             'merged' % filename)
 1686                     else:
 1687                         for f in requested_formats:
 1688                             new_info = dict(info_dict)
 1689                             new_info.update(f)
 1690                             fname = self.prepare_filename(new_info)
 1691                             fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
 1692                             downloaded.append(fname)
 1693                             partial_success = dl(fname, new_info)
 1694                             success = success and partial_success
 1695                         info_dict['__postprocessors'] = postprocessors
 1696                         info_dict['__files_to_merge'] = downloaded
 1697                 else:
 1698                     # Just a single file
 1699                     success = dl(filename, info_dict)
 1700             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 1701                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
 1702                 return
 1703             except (OSError, IOError) as err:
 1704                 raise UnavailableVideoError(err)
 1705             except (ContentTooShortError, ) as err:
 1706                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 1707                 return
 1708 
 1709             if success and filename != '-':
 1710                 # Fixup content
 1711                 fixup_policy = self.params.get('fixup')
 1712                 if fixup_policy is None:
 1713                     fixup_policy = 'detect_or_warn'
 1714 
 1715                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
 1716 
 1717                 stretched_ratio = info_dict.get('stretched_ratio')
 1718                 if stretched_ratio is not None and stretched_ratio != 1:
 1719                     if fixup_policy == 'warn':
 1720                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
 1721                             info_dict['id'], stretched_ratio))
 1722                     elif fixup_policy == 'detect_or_warn':
 1723                         stretched_pp = FFmpegFixupStretchedPP(self)
 1724                         if stretched_pp.available:
 1725                             info_dict.setdefault('__postprocessors', [])
 1726                             info_dict['__postprocessors'].append(stretched_pp)
 1727                         else:
 1728                             self.report_warning(
 1729                                 '%s: Non-uniform pixel ratio (%s). %s'
 1730                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
 1731                     else:
 1732                         assert fixup_policy in ('ignore', 'never')
 1733 
 1734                 if (info_dict.get('requested_formats') is None and
 1735                         info_dict.get('container') == 'm4a_dash'):
 1736                     if fixup_policy == 'warn':
 1737                         self.report_warning(
 1738                             '%s: writing DASH m4a. '
 1739                             'Only some players support this container.'
 1740                             % info_dict['id'])
 1741                     elif fixup_policy == 'detect_or_warn':
 1742                         fixup_pp = FFmpegFixupM4aPP(self)
 1743                         if fixup_pp.available:
 1744                             info_dict.setdefault('__postprocessors', [])
 1745                             info_dict['__postprocessors'].append(fixup_pp)
 1746                         else:
 1747                             self.report_warning(
 1748                                 '%s: writing DASH m4a. '
 1749                                 'Only some players support this container. %s'
 1750                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 1751                     else:
 1752                         assert fixup_policy in ('ignore', 'never')
 1753 
 1754                 if (info_dict.get('protocol') == 'm3u8_native' or
 1755                         info_dict.get('protocol') == 'm3u8' and
 1756                         self.params.get('hls_prefer_native')):
 1757                     if fixup_policy == 'warn':
 1758                         self.report_warning('%s: malformated aac bitstream.' % (
 1759                             info_dict['id']))
 1760                     elif fixup_policy == 'detect_or_warn':
 1761                         fixup_pp = FFmpegFixupM3u8PP(self)
 1762                         if fixup_pp.available:
 1763                             info_dict.setdefault('__postprocessors', [])
 1764                             info_dict['__postprocessors'].append(fixup_pp)
 1765                         else:
 1766                             self.report_warning(
 1767                                 '%s: malformated aac bitstream. %s'
 1768                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 1769                     else:
 1770                         assert fixup_policy in ('ignore', 'never')
 1771 
 1772                 try:
 1773                     self.post_process(filename, info_dict)
 1774                 except (PostProcessingError) as err:
 1775                     self.report_error('postprocessing: %s' % str(err))
 1776                     return
 1777                 self.record_download_archive(info_dict)
 1778 
 1779     def download(self, url_list):
 1780         """Download a given list of URLs."""
 1781         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 1782         if (len(url_list) > 1 and
 1783                 '%' not in outtmpl and
 1784                 self.params.get('max_downloads') != 1):
 1785             raise SameFileError(outtmpl)
 1786 
 1787         for url in url_list:
 1788             try:
 1789                 # It also downloads the videos
 1790                 res = self.extract_info(
 1791                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
 1792             except UnavailableVideoError:
 1793                 self.report_error('unable to download video')
 1794             except MaxDownloadsReached:
 1795                 self.to_screen('[info] Maximum number of downloaded files reached.')
 1796                 raise
 1797             else:
 1798                 if self.params.get('dump_single_json', False):
 1799                     self.to_stdout(json.dumps(res))
 1800 
 1801         return self._download_retcode
 1802 
 1803     def download_with_info_file(self, info_filename):
 1804         with contextlib.closing(fileinput.FileInput(
 1805                 [info_filename], mode='r',
 1806                 openhook=fileinput.hook_encoded('utf-8'))) as f:
 1807             # FileInput doesn't have a read method, we can't call json.load
 1808             info = self.filter_requested_info(json.loads('\n'.join(f)))
 1809         try:
 1810             self.process_ie_result(info, download=True)
 1811         except DownloadError:
 1812             webpage_url = info.get('webpage_url')
 1813             if webpage_url is not None:
 1814                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
 1815                 return self.download([webpage_url])
 1816             else:
 1817                 raise
 1818         return self._download_retcode
 1819 
 1820     @staticmethod
 1821     def filter_requested_info(info_dict):
 1822         return dict(
 1823             (k, v) for k, v in info_dict.items()
 1824             if k not in ['requested_formats', 'requested_subtitles'])
 1825 
 1826     def post_process(self, filename, ie_info):
 1827         """Run all the postprocessors on the given file."""
 1828         info = dict(ie_info)
 1829         info['filepath'] = filename
 1830         pps_chain = []
 1831         if ie_info.get('__postprocessors') is not None:
 1832             pps_chain.extend(ie_info['__postprocessors'])
 1833         pps_chain.extend(self._pps)
 1834         for pp in pps_chain:
 1835             files_to_delete = []
 1836             try:
 1837                 files_to_delete, info = pp.run(info)
 1838             except PostProcessingError as e:
 1839                 self.report_error(e.msg)
 1840             if files_to_delete and not self.params.get('keepvideo', False):
 1841                 for old_filename in files_to_delete:
 1842                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
 1843                     try:
 1844                         os.remove(encodeFilename(old_filename))
 1845                     except (IOError, OSError):
 1846                         self.report_warning('Unable to remove downloaded original file')
 1847 
 1848     def _make_archive_id(self, info_dict):
 1849         # Future-proof against any change in case
 1850         # and backwards compatibility with prior versions
 1851         extractor = info_dict.get('extractor_key')
 1852         if extractor is None:
 1853             if 'id' in info_dict:
 1854                 extractor = info_dict.get('ie_key')  # key in a playlist
 1855         if extractor is None:
 1856             return None  # Incomplete video information
 1857         return extractor.lower() + ' ' + info_dict['id']
 1858 
 1859     def in_download_archive(self, info_dict):
 1860         fn = self.params.get('download_archive')
 1861         if fn is None:
 1862             return False
 1863 
 1864         vid_id = self._make_archive_id(info_dict)
 1865         if vid_id is None:
 1866             return False  # Incomplete video information
 1867 
 1868         try:
 1869             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 1870                 for line in archive_file:
 1871                     if line.strip() == vid_id:
 1872                         return True
 1873         except IOError as ioe:
 1874             if ioe.errno != errno.ENOENT:
 1875                 raise
 1876         return False
 1877 
 1878     def record_download_archive(self, info_dict):
 1879         fn = self.params.get('download_archive')
 1880         if fn is None:
 1881             return
 1882         vid_id = self._make_archive_id(info_dict)
 1883         assert vid_id
 1884         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 1885             archive_file.write(vid_id + '\n')
 1886 
 1887     @staticmethod
 1888     def format_resolution(format, default='unknown'):
 1889         if format.get('vcodec') == 'none':
 1890             return 'audio only'
 1891         if format.get('resolution') is not None:
 1892             return format['resolution']
 1893         if format.get('height') is not None:
 1894             if format.get('width') is not None:
 1895                 res = '%sx%s' % (format['width'], format['height'])
 1896             else:
 1897                 res = '%sp' % format['height']
 1898         elif format.get('width') is not None:
 1899             res = '%dx?' % format['width']
 1900         else:
 1901             res = default
 1902         return res
 1903 
 1904     def _format_note(self, fdict):
 1905         res = ''
 1906         if fdict.get('ext') in ['f4f', 'f4m']:
 1907             res += '(unsupported) '
 1908         if fdict.get('language'):
 1909             if res:
 1910                 res += ' '
 1911             res += '[%s] ' % fdict['language']
 1912         if fdict.get('format_note') is not None:
 1913             res += fdict['format_note'] + ' '
 1914         if fdict.get('tbr') is not None:
 1915             res += '%4dk ' % fdict['tbr']
 1916         if fdict.get('container') is not None:
 1917             if res:
 1918                 res += ', '
 1919             res += '%s container' % fdict['container']
 1920         if (fdict.get('vcodec') is not None and
 1921                 fdict.get('vcodec') != 'none'):
 1922             if res:
 1923                 res += ', '
 1924             res += fdict['vcodec']
 1925             if fdict.get('vbr') is not None:
 1926                 res += '@'
 1927         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
 1928             res += 'video@'
 1929         if fdict.get('vbr') is not None:
 1930             res += '%4dk' % fdict['vbr']
 1931         if fdict.get('fps') is not None:
 1932             if res:
 1933                 res += ', '
 1934             res += '%sfps' % fdict['fps']
 1935         if fdict.get('acodec') is not None:
 1936             if res:
 1937                 res += ', '
 1938             if fdict['acodec'] == 'none':
 1939                 res += 'video only'
 1940             else:
 1941                 res += '%-5s' % fdict['acodec']
 1942         elif fdict.get('abr') is not None:
 1943             if res:
 1944                 res += ', '
 1945             res += 'audio'
 1946         if fdict.get('abr') is not None:
 1947             res += '@%3dk' % fdict['abr']
 1948         if fdict.get('asr') is not None:
 1949             res += ' (%5dHz)' % fdict['asr']
 1950         if fdict.get('filesize') is not None:
 1951             if res:
 1952                 res += ', '
 1953             res += format_bytes(fdict['filesize'])
 1954         elif fdict.get('filesize_approx') is not None:
 1955             if res:
 1956                 res += ', '
 1957             res += '~' + format_bytes(fdict['filesize_approx'])
 1958         return res
 1959 
 1960     def list_formats(self, info_dict):
 1961         formats = info_dict.get('formats', [info_dict])
 1962         table = [
 1963             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
 1964             for f in formats
 1965             if f.get('preference') is None or f['preference'] >= -1000]
 1966         if len(formats) > 1:
 1967             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
 1968 
 1969         header_line = ['format code', 'extension', 'resolution', 'note']
 1970         self.to_screen(
 1971             '[info] Available formats for %s:\n%s' %
 1972             (info_dict['id'], render_table(header_line, table)))
 1973 
 1974     def list_thumbnails(self, info_dict):
 1975         thumbnails = info_dict.get('thumbnails')
 1976         if not thumbnails:
 1977             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
 1978             return
 1979 
 1980         self.to_screen(
 1981             '[info] Thumbnails for %s:' % info_dict['id'])
 1982         self.to_screen(render_table(
 1983             ['ID', 'width', 'height', 'URL'],
 1984             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 1985 
 1986     def list_subtitles(self, video_id, subtitles, name='subtitles'):
 1987         if not subtitles:
 1988             self.to_screen('%s has no %s' % (video_id, name))
 1989             return
 1990         self.to_screen(
 1991             'Available %s for %s:' % (name, video_id))
 1992         self.to_screen(render_table(
 1993             ['Language', 'formats'],
 1994             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
 1995                 for lang, formats in subtitles.items()]))
 1996 
 1997     def urlopen(self, req):
 1998         """ Start an HTTP download """
 1999         if isinstance(req, compat_basestring):
 2000             req = sanitized_Request(req)
 2001         return self._opener.open(req, timeout=self._socket_timeout)
 2002 
 2003     def print_debug_header(self):
 2004         if not self.params.get('verbose'):
 2005             return
 2006 
 2007         if type('') is not compat_str:
 2008             # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
 2009             self.report_warning(
 2010                 'Your Python is broken! Update to a newer and supported version')
 2011 
 2012         stdout_encoding = getattr(
 2013             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
 2014         encoding_str = (
 2015             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
 2016                 locale.getpreferredencoding(),
 2017                 sys.getfilesystemencoding(),
 2018                 stdout_encoding,
 2019                 self.get_encoding()))
 2020         write_string(encoding_str, encoding=None)
 2021 
 2022         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
 2023         if _LAZY_LOADER:
 2024             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
 2025         try:
 2026             sp = subprocess.Popen(
 2027                 ['git', 'rev-parse', '--short', 'HEAD'],
 2028                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 2029                 cwd=os.path.dirname(os.path.abspath(__file__)))
 2030             out, err = sp.communicate()
 2031             out = out.decode().strip()
 2032             if re.match('[0-9a-f]+', out):
 2033                 self._write_string('[debug] Git HEAD: ' + out + '\n')
 2034         except Exception:
 2035             try:
 2036                 sys.exc_clear()
 2037             except Exception:
 2038                 pass
 2039         self._write_string('[debug] Python version %s - %s\n' % (
 2040             platform.python_version(), platform_name()))
 2041 
 2042         exe_versions = FFmpegPostProcessor.get_versions(self)
 2043         exe_versions['rtmpdump'] = rtmpdump_version()
 2044         exe_str = ', '.join(
 2045             '%s %s' % (exe, v)
 2046             for exe, v in sorted(exe_versions.items())
 2047             if v
 2048         )
 2049         if not exe_str:
 2050             exe_str = 'none'
 2051         self._write_string('[debug] exe versions: %s\n' % exe_str)
 2052 
 2053         proxy_map = {}
 2054         for handler in self._opener.handlers:
 2055             if hasattr(handler, 'proxies'):
 2056                 proxy_map.update(handler.proxies)
 2057         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
 2058 
 2059         if self.params.get('call_home', False):
 2060             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
 2061             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
 2062             latest_version = self.urlopen(
 2063                 'https://yt-dl.org/latest/version').read().decode('utf-8')
 2064             if version_tuple(latest_version) > version_tuple(__version__):
 2065                 self.report_warning(
 2066                     'You are using an outdated version (newest version: %s)! '
 2067                     'See https://yt-dl.org/update if you need help updating.' %
 2068                     latest_version)
 2069 
 2070     def _setup_opener(self):
 2071         timeout_val = self.params.get('socket_timeout')
 2072         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
 2073 
 2074         opts_cookiefile = self.params.get('cookiefile')
 2075         opts_proxy = self.params.get('proxy')
 2076 
 2077         if opts_cookiefile is None:
 2078             self.cookiejar = compat_cookiejar.CookieJar()
 2079         else:
 2080             opts_cookiefile = compat_expanduser(opts_cookiefile)
 2081             self.cookiejar = compat_cookiejar.MozillaCookieJar(
 2082                 opts_cookiefile)
 2083             if os.access(opts_cookiefile, os.R_OK):
 2084                 self.cookiejar.load()
 2085 
 2086         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
 2087         if opts_proxy is not None:
 2088             if opts_proxy == '':
 2089                 proxies = {}
 2090             else:
 2091                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 2092         else:
 2093             proxies = compat_urllib_request.getproxies()
 2094             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
 2095             if 'http' in proxies and 'https' not in proxies:
 2096                 proxies['https'] = proxies['http']
 2097         proxy_handler = PerRequestProxyHandler(proxies)
 2098 
 2099         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
 2100         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
 2101         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
 2102         data_handler = compat_urllib_request_DataHandler()
 2103 
 2104         # When passing our own FileHandler instance, build_opener won't add the
 2105         # default FileHandler and allows us to disable the file protocol, which
 2106         # can be used for malicious purposes (see
 2107         # https://github.com/rg3/youtube-dl/issues/8227)
 2108         file_handler = compat_urllib_request.FileHandler()
 2109 
 2110         def file_open(*args, **kwargs):
 2111             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
 2112         file_handler.file_open = file_open
 2113 
 2114         opener = compat_urllib_request.build_opener(
 2115             proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
 2116 
 2117         # Delete the default user-agent header, which would otherwise apply in
 2118         # cases where our custom HTTP handler doesn't come into play
 2119         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 2120         opener.addheaders = []
 2121         self._opener = opener
 2122 
 2123     def encode(self, s):
 2124         if isinstance(s, bytes):
 2125             return s  # Already encoded
 2126 
 2127         try:
 2128             return s.encode(self.get_encoding())
 2129         except UnicodeEncodeError as err:
 2130             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
 2131             raise
 2132 
 2133     def get_encoding(self):
 2134         encoding = self.params.get('encoding')
 2135         if encoding is None:
 2136             encoding = preferredencoding()
 2137         return encoding
 2138 
 2139     def _write_thumbnails(self, info_dict, filename):
 2140         if self.params.get('writethumbnail', False):
 2141             thumbnails = info_dict.get('thumbnails')
 2142             if thumbnails:
 2143                 thumbnails = [thumbnails[-1]]
 2144         elif self.params.get('write_all_thumbnails', False):
 2145             thumbnails = info_dict.get('thumbnails')
 2146         else:
 2147             return
 2148 
 2149         if not thumbnails:
 2150             # No thumbnails present, so return immediately
 2151             return
 2152 
 2153         for t in thumbnails:
 2154             thumb_ext = determine_ext(t['url'], 'jpg')
 2155             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
 2156             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
 2157             t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
 2158 
 2159             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 2160                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
 2161                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2162             else:
 2163                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
 2164                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2165                 try:
 2166                     uf = self.urlopen(t['url'])
 2167                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
 2168                         shutil.copyfileobj(uf, thumbf)
 2169                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
 2170                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
 2171                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 2172                     self.report_warning('Unable to download thumbnail "%s": %s' %
 2173                                         (t['url'], error_to_compat_str(err)))