summaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
blob: b03a89390da1a1eac19fcc67e7a4537a7af433f6 (plain)
    1 #!/usr/bin/env python
    2 # coding: utf-8
    3 
    4 from __future__ import absolute_import, unicode_literals
    5 
    6 import collections
    7 import contextlib
    8 import copy
    9 import datetime
   10 import errno
   11 import fileinput
   12 import io
   13 import itertools
   14 import json
   15 import locale
   16 import operator
   17 import os
   18 import platform
   19 import re
   20 import shutil
   21 import subprocess
   22 import socket
   23 import sys
   24 import time
   25 import tokenize
   26 import traceback
   27 import random
   28 
   29 from string import ascii_letters
   30 
   31 from .compat import (
   32     compat_basestring,
   33     compat_cookiejar,
   34     compat_get_terminal_size,
   35     compat_http_client,
   36     compat_kwargs,
   37     compat_numeric_types,
   38     compat_os_name,
   39     compat_str,
   40     compat_tokenize_tokenize,
   41     compat_urllib_error,
   42     compat_urllib_request,
   43     compat_urllib_request_DataHandler,
   44 )
   45 from .utils import (
   46     age_restricted,
   47     args_to_str,
   48     ContentTooShortError,
   49     date_from_str,
   50     DateRange,
   51     DEFAULT_OUTTMPL,
   52     determine_ext,
   53     determine_protocol,
   54     DownloadError,
   55     encode_compat_str,
   56     encodeFilename,
   57     error_to_compat_str,
   58     expand_path,
   59     ExtractorError,
   60     format_bytes,
   61     formatSeconds,
   62     GeoRestrictedError,
   63     int_or_none,
   64     ISO3166Utils,
   65     locked_file,
   66     make_HTTPS_handler,
   67     MaxDownloadsReached,
   68     orderedSet,
   69     PagedList,
   70     parse_filesize,
   71     PerRequestProxyHandler,
   72     platform_name,
   73     PostProcessingError,
   74     preferredencoding,
   75     prepend_extension,
   76     process_communicate_or_kill,
   77     register_socks_protocols,
   78     render_table,
   79     replace_extension,
   80     SameFileError,
   81     sanitize_filename,
   82     sanitize_path,
   83     sanitize_url,
   84     sanitized_Request,
   85     std_headers,
   86     str_or_none,
   87     subtitles_filename,
   88     UnavailableVideoError,
   89     url_basename,
   90     variadic,
   91     version_tuple,
   92     write_json_file,
   93     write_string,
   94     YoutubeDLCookieJar,
   95     YoutubeDLCookieProcessor,
   96     YoutubeDLHandler,
   97     YoutubeDLRedirectHandler,
   98 )
   99 from .cache import Cache
  100 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  101 from .extractor.openload import PhantomJSwrapper
  102 from .downloader import get_suitable_downloader
  103 from .downloader.rtmp import rtmpdump_version
  104 from .postprocessor import (
  105     FFmpegFixupM3u8PP,
  106     FFmpegFixupM4aPP,
  107     FFmpegFixupStretchedPP,
  108     FFmpegMergerPP,
  109     FFmpegPostProcessor,
  110     get_postprocessor,
  111 )
  112 from .version import __version__
  113 
  114 if compat_os_name == 'nt':
  115     import ctypes
  116 
  117 
  118 class YoutubeDL(object):
  119     """YoutubeDL class.
  120 
  121     YoutubeDL objects are the ones responsible of downloading the
  122     actual video file and writing it to disk if the user has requested
  123     it, among some other tasks. In most cases there should be one per
  124     program. As, given a video URL, the downloader doesn't know how to
  125     extract all the needed information, task that InfoExtractors do, it
  126     has to pass the URL to one of them.
  127 
  128     For this, YoutubeDL objects have a method that allows
  129     InfoExtractors to be registered in a given order. When it is passed
  130     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  131     finds that reports being able to handle it. The InfoExtractor extracts
  132     all the information about the video or videos the URL refers to, and
  133     YoutubeDL process the extracted information, possibly using a File
  134     Downloader to download the video.
  135 
  136     YoutubeDL objects accept a lot of parameters. In order not to saturate
  137     the object constructor with arguments, it receives a dictionary of
  138     options instead. These options are available through the params
  139     attribute for the InfoExtractors to use. The YoutubeDL also
  140     registers itself as the downloader in charge for the InfoExtractors
  141     that are added to it, so this is a "mutual registration".
  142 
  143     Available options:
  144 
  145     username:          Username for authentication purposes.
  146     password:          Password for authentication purposes.
  147     videopassword:     Password for accessing a video.
  148     ap_mso:            Adobe Pass multiple-system operator identifier.
  149     ap_username:       Multiple-system operator account username.
  150     ap_password:       Multiple-system operator account password.
  151     usenetrc:          Use netrc for authentication instead.
  152     verbose:           Print additional info to stdout.
  153     quiet:             Do not print messages to stdout.
  154     no_warnings:       Do not print out anything for warnings.
  155     forceurl:          Force printing final URL.
  156     forcetitle:        Force printing title.
  157     forceid:           Force printing ID.
  158     forcethumbnail:    Force printing thumbnail URL.
  159     forcedescription:  Force printing description.
  160     forcefilename:     Force printing final filename.
  161     forceduration:     Force printing duration.
  162     forcejson:         Force printing info_dict as JSON.
  163     dump_single_json:  Force printing the info_dict of the whole playlist
  164                        (or video) as a single JSON line.
  165     simulate:          Do not download the video files.
  166     format:            Video format code. See options.py for more information.
  167     outtmpl:           Template for output names.
  168     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
  169     restrictfilenames: Do not allow "&" and spaces in file names
  170     ignoreerrors:      Do not stop on download errors.
  171     force_generic_extractor: Force downloader to use the generic extractor
  172     nooverwrites:      Prevent overwriting files.
  173     playliststart:     Playlist item to start at.
  174     playlistend:       Playlist item to end at.
  175     playlist_items:    Specific indices of playlist to download.
  176     playlistreverse:   Download playlist items in reverse order.
  177     playlistrandom:    Download playlist items in random order.
  178     matchtitle:        Download only matching titles.
  179     rejecttitle:       Reject downloads for matching titles.
  180     logger:            Log messages to a logging.Logger instance.
  181     logtostderr:       Log messages to stderr instead of stdout.
  182     writedescription:  Write the video description to a .description file
  183     writeinfojson:     Write the video description to a .info.json file
  184     writeannotations:  Write the video annotations to a .annotations.xml file
  185     writethumbnail:    Write the thumbnail image to a file
  186     write_all_thumbnails:  Write all thumbnail formats to files
  187     writesubtitles:    Write the video subtitles to a file
  188     writeautomaticsub: Write the automatically generated subtitles to a file
  189     allsubtitles:      Downloads all the subtitles of the video
  190                        (requires writesubtitles or writeautomaticsub)
  191     listsubtitles:     Lists all available subtitles for the video
  192     subtitlesformat:   The format code for subtitles
  193     subtitleslangs:    List of languages of the subtitles to download
  194     keepvideo:         Keep the video file after post-processing
  195     daterange:         A DateRange object, download only if the upload_date is in the range.
  196     skip_download:     Skip the actual download of the video file
  197     cachedir:          Location of the cache files in the filesystem.
  198                        False to disable filesystem cache.
  199     noplaylist:        Download single video instead of a playlist if in doubt.
  200     age_limit:         An integer representing the user's age in years.
  201                        Unsuitable videos for the given age are skipped.
  202     min_views:         An integer representing the minimum view count the video
  203                        must have in order to not be skipped.
  204                        Videos without view count information are always
  205                        downloaded. None for no limit.
  206     max_views:         An integer representing the maximum view count.
  207                        Videos that are more popular than that are not
  208                        downloaded.
  209                        Videos without view count information are always
  210                        downloaded. None for no limit.
  211     download_archive:  File name of a file where all downloads are recorded.
  212                        Videos already present in the file are not downloaded
  213                        again.
  214     cookiefile:        File name where cookies should be read from and dumped to.
  215     nocheckcertificate:Do not verify SSL certificates
  216     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
  217                        At the moment, this is only supported by YouTube.
  218     proxy:             URL of the proxy server to use
  219     geo_verification_proxy:  URL of the proxy to use for IP address verification
  220                        on geo-restricted sites.
  221     socket_timeout:    Time to wait for unresponsive hosts, in seconds
  222     bidi_workaround:   Work around buggy terminals without bidirectional text
  223                        support, using fridibi
  224     debug_printtraffic:Print out sent and received HTTP traffic
  225     include_ads:       Download ads as well
  226     default_search:    Prepend this string if an input url is not valid.
  227                        'auto' for elaborate guessing
  228     encoding:          Use this encoding instead of the system-specified.
  229     extract_flat:      Do not resolve URLs, return the immediate result.
  230                        Pass in 'in_playlist' to only show this behavior for
  231                        playlist items.
  232     postprocessors:    A list of dictionaries, each with an entry
  233                        * key:  The name of the postprocessor. See
  234                                youtube_dl/postprocessor/__init__.py for a list.
  235                        as well as any further keyword arguments for the
  236                        postprocessor.
  237     progress_hooks:    A list of functions that get called on download
  238                        progress, with a dictionary with the entries
  239                        * status: One of "downloading", "error", or "finished".
  240                                  Check this first and ignore unknown values.
  241 
  242                        If status is one of "downloading", or "finished", the
  243                        following properties may also be present:
  244                        * filename: The final filename (always present)
  245                        * tmpfilename: The filename we're currently writing to
  246                        * downloaded_bytes: Bytes on disk
  247                        * total_bytes: Size of the whole file, None if unknown
  248                        * total_bytes_estimate: Guess of the eventual file size,
  249                                                None if unavailable.
  250                        * elapsed: The number of seconds since download started.
  251                        * eta: The estimated time in seconds, None if unknown
  252                        * speed: The download speed in bytes/second, None if
  253                                 unknown
  254                        * fragment_index: The counter of the currently
  255                                          downloaded video fragment.
  256                        * fragment_count: The number of fragments (= individual
  257                                          files that will be merged)
  258 
  259                        Progress hooks are guaranteed to be called at least once
  260                        (with status "finished") if the download is successful.
  261     merge_output_format: Extension to use when merging formats.
  262     fixup:             Automatically correct known faults of the file.
  263                        One of:
  264                        - "never": do nothing
  265                        - "warn": only emit a warning
  266                        - "detect_or_warn": check whether we can do anything
  267                                            about it, warn otherwise (default)
  268     source_address:    Client-side IP address to bind to.
  269     call_home:         Boolean, true iff we are allowed to contact the
  270                        youtube-dl servers for debugging.
  271     sleep_interval:    Number of seconds to sleep before each download when
  272                        used alone or a lower bound of a range for randomized
  273                        sleep before each download (minimum possible number
  274                        of seconds to sleep) when used along with
  275                        max_sleep_interval.
  276     max_sleep_interval:Upper bound of a range for randomized sleep before each
  277                        download (maximum possible number of seconds to sleep).
  278                        Must only be used along with sleep_interval.
  279                        Actual sleep time will be a random float from range
  280                        [sleep_interval; max_sleep_interval].
  281     listformats:       Print an overview of available video formats and exit.
  282     list_thumbnails:   Print a table of all thumbnails and exit.
  283     match_filter:      A function that gets called with the info_dict of
  284                        every video.
  285                        If it returns a message, the video is ignored.
  286                        If it returns None, the video is downloaded.
  287                        match_filter_func in utils.py is one example for this.
  288     no_color:          Do not emit color codes in output.
  289     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
  290                        HTTP header
  291     geo_bypass_country:
  292                        Two-letter ISO 3166-2 country code that will be used for
  293                        explicit geographic restriction bypassing via faking
  294                        X-Forwarded-For HTTP header
  295     geo_bypass_ip_block:
  296                        IP range in CIDR notation that will be used similarly to
  297                        geo_bypass_country
  298 
  299     The following options determine which downloader is picked:
  300     external_downloader: Executable of the external downloader to call.
  301                        None or unset for standard (built-in) downloader.
  302     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
  303                        if True, otherwise use ffmpeg/avconv if False, otherwise
  304                        use downloader suggested by extractor if None.
  305 
  306     The following parameters are not used by YoutubeDL itself, they are used by
  307     the downloader (see youtube_dl/downloader/common.py):
  308     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
  309     noresizebuffer, retries, continuedl, noprogress, consoletitle,
  310     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
  311     http_chunk_size.
  312 
  313     The following options are used by the post processors:
  314     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
  315                        otherwise prefer ffmpeg.
  316     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
  317                        to the binary or its containing directory.
  318     postprocessor_args: A list of additional command-line arguments for the
  319                         postprocessor.
  320 
  321     The following options are used by the Youtube extractor:
  322     youtube_include_dash_manifest: If True (default), DASH manifests and related
  323                         data will be downloaded and processed by extractor.
  324                         You can reduce network I/O by disabling it if you don't
  325                         care about DASH.
  326     """
  327 
  328     _NUMERIC_FIELDS = set((
  329         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
  330         'timestamp', 'upload_year', 'upload_month', 'upload_day',
  331         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
  332         'average_rating', 'comment_count', 'age_limit',
  333         'start_time', 'end_time',
  334         'chapter_number', 'season_number', 'episode_number',
  335         'track_number', 'disc_number', 'release_year',
  336         'playlist_index',
  337     ))
  338 
  339     params = None
  340     _ies = []
  341     _pps = []
  342     _download_retcode = None
  343     _num_downloads = None
  344     _playlist_level = 0
  345     _playlist_urls = set()
  346     _screen_file = None
  347 
  348     def __init__(self, params=None, auto_init=True):
  349         """Create a FileDownloader object with the given options."""
  350         if params is None:
  351             params = {}
  352         self._ies = []
  353         self._ies_instances = {}
  354         self._pps = []
  355         self._progress_hooks = []
  356         self._download_retcode = 0
  357         self._num_downloads = 0
  358         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  359         self._err_file = sys.stderr
  360         self.params = {
  361             # Default parameters
  362             'nocheckcertificate': False,
  363         }
  364         self.params.update(params)
  365         self.cache = Cache(self)
  366 
  367         def check_deprecated(param, option, suggestion):
  368             if self.params.get(param) is not None:
  369                 self.report_warning(
  370                     '%s is deprecated. Use %s instead.' % (option, suggestion))
  371                 return True
  372             return False
  373 
  374         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
  375             if self.params.get('geo_verification_proxy') is None:
  376                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
  377 
  378         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
  379         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
  380         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
  381 
  382         if params.get('bidi_workaround', False):
  383             try:
  384                 import pty
  385                 master, slave = pty.openpty()
  386                 width = compat_get_terminal_size().columns
  387                 if width is None:
  388                     width_args = []
  389                 else:
  390                     width_args = ['-w', str(width)]
  391                 sp_kwargs = dict(
  392                     stdin=subprocess.PIPE,
  393                     stdout=slave,
  394                     stderr=self._err_file)
  395                 try:
  396                     self._output_process = subprocess.Popen(
  397                         ['bidiv'] + width_args, **sp_kwargs
  398                     )
  399                 except OSError:
  400                     self._output_process = subprocess.Popen(
  401                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
  402                 self._output_channel = os.fdopen(master, 'rb')
  403             except OSError as ose:
  404                 if ose.errno == errno.ENOENT:
  405                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
  406                 else:
  407                     raise
  408 
  409         if (sys.platform != 'win32'
  410                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
  411                 and not params.get('restrictfilenames', False)):
  412             # Unicode filesystem API will throw errors (#1474, #13027)
  413             self.report_warning(
  414                 'Assuming --restrict-filenames since file system encoding '
  415                 'cannot encode all characters. '
  416                 'Set the LC_ALL environment variable to fix this.')
  417             self.params['restrictfilenames'] = True
  418 
  419         if isinstance(params.get('outtmpl'), bytes):
  420             self.report_warning(
  421                 'Parameter outtmpl is bytes, but should be a unicode string. '
  422                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
  423 
  424         self._setup_opener()
  425 
  426         if auto_init:
  427             self.print_debug_header()
  428             self.add_default_info_extractors()
  429 
  430         for pp_def_raw in self.params.get('postprocessors', []):
  431             pp_class = get_postprocessor(pp_def_raw['key'])
  432             pp_def = dict(pp_def_raw)
  433             del pp_def['key']
  434             pp = pp_class(self, **compat_kwargs(pp_def))
  435             self.add_post_processor(pp)
  436 
  437         for ph in self.params.get('progress_hooks', []):
  438             self.add_progress_hook(ph)
  439 
  440         register_socks_protocols()
  441 
  442     def warn_if_short_id(self, argv):
  443         # short YouTube ID starting with dash?
  444         idxs = [
  445             i for i, a in enumerate(argv)
  446             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
  447         if idxs:
  448             correct_argv = (
  449                 ['youtube-dl']
  450                 + [a for i, a in enumerate(argv) if i not in idxs]
  451                 + ['--'] + [argv[i] for i in idxs]
  452             )
  453             self.report_warning(
  454                 'Long argument string detected. '
  455                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
  456                 args_to_str(correct_argv))
  457 
  458     def add_info_extractor(self, ie):
  459         """Add an InfoExtractor object to the end of the list."""
  460         self._ies.append(ie)
  461         if not isinstance(ie, type):
  462             self._ies_instances[ie.ie_key()] = ie
  463             ie.set_downloader(self)
  464 
  465     def get_info_extractor(self, ie_key):
  466         """
  467         Get an instance of an IE with name ie_key, it will try to get one from
  468         the _ies list, if there's no instance it will create a new one and add
  469         it to the extractor list.
  470         """
  471         ie = self._ies_instances.get(ie_key)
  472         if ie is None:
  473             ie = get_info_extractor(ie_key)()
  474             self.add_info_extractor(ie)
  475         return ie
  476 
  477     def add_default_info_extractors(self):
  478         """
  479         Add the InfoExtractors returned by gen_extractors to the end of the list
  480         """
  481         for ie in gen_extractor_classes():
  482             self.add_info_extractor(ie)
  483 
  484     def add_post_processor(self, pp):
  485         """Add a PostProcessor object to the end of the chain."""
  486         self._pps.append(pp)
  487         pp.set_downloader(self)
  488 
  489     def add_progress_hook(self, ph):
  490         """Add the progress hook (currently only for the file downloader)"""
  491         self._progress_hooks.append(ph)
  492 
  493     def _bidi_workaround(self, message):
  494         if not hasattr(self, '_output_channel'):
  495             return message
  496 
  497         assert hasattr(self, '_output_process')
  498         assert isinstance(message, compat_str)
  499         line_count = message.count('\n') + 1
  500         self._output_process.stdin.write((message + '\n').encode('utf-8'))
  501         self._output_process.stdin.flush()
  502         res = ''.join(self._output_channel.readline().decode('utf-8')
  503                       for _ in range(line_count))
  504         return res[:-len('\n')]
  505 
  506     def to_screen(self, message, skip_eol=False):
  507         """Print message to stdout if not in quiet mode."""
  508         return self.to_stdout(message, skip_eol, check_quiet=True)
  509 
  510     def _write_string(self, s, out=None):
  511         write_string(s, out=out, encoding=self.params.get('encoding'))
  512 
  513     def to_stdout(self, message, skip_eol=False, check_quiet=False):
  514         """Print message to stdout if not in quiet mode."""
  515         if self.params.get('logger'):
  516             self.params['logger'].debug(message)
  517         elif not check_quiet or not self.params.get('quiet', False):
  518             message = self._bidi_workaround(message)
  519             terminator = ['\n', ''][skip_eol]
  520             output = message + terminator
  521 
  522             self._write_string(output, self._screen_file)
  523 
  524     def to_stderr(self, message):
  525         """Print message to stderr."""
  526         assert isinstance(message, compat_str)
  527         if self.params.get('logger'):
  528             self.params['logger'].error(message)
  529         else:
  530             message = self._bidi_workaround(message)
  531             output = message + '\n'
  532             self._write_string(output, self._err_file)
  533 
  534     def to_console_title(self, message):
  535         if not self.params.get('consoletitle', False):
  536             return
  537         if compat_os_name == 'nt':
  538             if ctypes.windll.kernel32.GetConsoleWindow():
  539                 # c_wchar_p() might not be necessary if `message` is
  540                 # already of type unicode()
  541                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  542         elif 'TERM' in os.environ:
  543             self._write_string('\033]0;%s\007' % message, self._screen_file)
  544 
  545     def save_console_title(self):
  546         if not self.params.get('consoletitle', False):
  547             return
  548         if self.params.get('simulate', False):
  549             return
  550         if compat_os_name != 'nt' and 'TERM' in os.environ:
  551             # Save the title on stack
  552             self._write_string('\033[22;0t', self._screen_file)
  553 
  554     def restore_console_title(self):
  555         if not self.params.get('consoletitle', False):
  556             return
  557         if self.params.get('simulate', False):
  558             return
  559         if compat_os_name != 'nt' and 'TERM' in os.environ:
  560             # Restore the title from stack
  561             self._write_string('\033[23;0t', self._screen_file)
  562 
  563     def __enter__(self):
  564         self.save_console_title()
  565         return self
  566 
  567     def __exit__(self, *args):
  568         self.restore_console_title()
  569 
  570         if self.params.get('cookiefile') is not None:
  571             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
  572 
  573     def trouble(self, message=None, tb=None):
  574         """Determine action to take when a download problem appears.
  575 
  576         Depending on if the downloader has been configured to ignore
  577         download errors or not, this method may throw an exception or
  578         not when errors are found, after printing the message.
  579 
  580         tb, if given, is additional traceback information.
  581         """
  582         if message is not None:
  583             self.to_stderr(message)
  584         if self.params.get('verbose'):
  585             if tb is None:
  586                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
  587                     tb = ''
  588                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  589                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
  590                     tb += encode_compat_str(traceback.format_exc())
  591                 else:
  592                     tb_data = traceback.format_list(traceback.extract_stack())
  593                     tb = ''.join(tb_data)
  594             self.to_stderr(tb)
  595         if not self.params.get('ignoreerrors', False):
  596             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  597                 exc_info = sys.exc_info()[1].exc_info
  598             else:
  599                 exc_info = sys.exc_info()
  600             raise DownloadError(message, exc_info)
  601         self._download_retcode = 1
  602 
  603     def report_warning(self, message):
  604         '''
  605         Print the message to stderr, it will be prefixed with 'WARNING:'
  606         If stderr is a tty file the 'WARNING:' will be colored
  607         '''
  608         if self.params.get('logger') is not None:
  609             self.params['logger'].warning(message)
  610         else:
  611             if self.params.get('no_warnings'):
  612                 return
  613             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  614                 _msg_header = '\033[0;33mWARNING:\033[0m'
  615             else:
  616                 _msg_header = 'WARNING:'
  617             warning_message = '%s %s' % (_msg_header, message)
  618             self.to_stderr(warning_message)
  619 
  620     def report_error(self, message, tb=None):
  621         '''
  622         Do the same as trouble, but prefixes the message with 'ERROR:', colored
  623         in red if stderr is a tty file.
  624         '''
  625         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  626             _msg_header = '\033[0;31mERROR:\033[0m'
  627         else:
  628             _msg_header = 'ERROR:'
  629         error_message = '%s %s' % (_msg_header, message)
  630         self.trouble(error_message, tb)
  631 
  632     def report_file_already_downloaded(self, file_name):
  633         """Report file has already been fully downloaded."""
  634         try:
  635             self.to_screen('[download] %s has already been downloaded' % file_name)
  636         except UnicodeEncodeError:
  637             self.to_screen('[download] The file has already been downloaded')
  638 
  639     def prepare_filename(self, info_dict):
  640         """Generate the output filename."""
  641         try:
  642             template_dict = dict(info_dict)
  643 
  644             template_dict['epoch'] = int(time.time())
  645             autonumber_size = self.params.get('autonumber_size')
  646             if autonumber_size is None:
  647                 autonumber_size = 5
  648             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
  649             if template_dict.get('resolution') is None:
  650                 if template_dict.get('width') and template_dict.get('height'):
  651                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
  652                 elif template_dict.get('height'):
  653                     template_dict['resolution'] = '%sp' % template_dict['height']
  654                 elif template_dict.get('width'):
  655                     template_dict['resolution'] = '%dx?' % template_dict['width']
  656 
  657             sanitize = lambda k, v: sanitize_filename(
  658                 compat_str(v),
  659                 restricted=self.params.get('restrictfilenames'),
  660                 is_id=(k == 'id' or k.endswith('_id')))
  661             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
  662                                  for k, v in template_dict.items()
  663                                  if v is not None and not isinstance(v, (list, tuple, dict)))
  664             template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
  665 
  666             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
  667 
  668             # For fields playlist_index and autonumber convert all occurrences
  669             # of %(field)s to %(field)0Nd for backward compatibility
  670             field_size_compat_map = {
  671                 'playlist_index': len(str(template_dict['n_entries'])),
  672                 'autonumber': autonumber_size,
  673             }
  674             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
  675             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
  676             if mobj:
  677                 outtmpl = re.sub(
  678                     FIELD_SIZE_COMPAT_RE,
  679                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
  680                     outtmpl)
  681 
  682             # Missing numeric fields used together with integer presentation types
  683             # in format specification will break the argument substitution since
  684             # string NA placeholder is returned for missing fields. We will patch
  685             # output template for missing fields to meet string presentation type.
  686             for numeric_field in self._NUMERIC_FIELDS:
  687                 if numeric_field not in template_dict:
  688                     # As of [1] format syntax is:
  689                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
  690                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
  691                     FORMAT_RE = r'''(?x)
  692                         (?<!%)
  693                         %
  694                         \({0}\)  # mapping key
  695                         (?:[#0\-+ ]+)?  # conversion flags (optional)
  696                         (?:\d+)?  # minimum field width (optional)
  697                         (?:\.\d+)?  # precision (optional)
  698                         [hlL]?  # length modifier (optional)
  699                         [diouxXeEfFgGcrs%]  # conversion type
  700                     '''
  701                     outtmpl = re.sub(
  702                         FORMAT_RE.format(numeric_field),
  703                         r'%({0})s'.format(numeric_field), outtmpl)
  704 
  705             # expand_path translates '%%' into '%' and '$$' into '$'
  706             # correspondingly that is not what we want since we need to keep
  707             # '%%' intact for template dict substitution step. Working around
  708             # with boundary-alike separator hack.
  709             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
  710             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
  711 
  712             # outtmpl should be expand_path'ed before template dict substitution
  713             # because meta fields may contain env variables we don't want to
  714             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
  715             # title "Hello $PATH", we don't want `$PATH` to be expanded.
  716             filename = expand_path(outtmpl).replace(sep, '') % template_dict
  717 
  718             # Temporary fix for #4787
  719             # 'Treat' all problem characters by passing filename through preferredencoding
  720             # to workaround encoding issues with subprocess on python2 @ Windows
  721             if sys.version_info < (3, 0) and sys.platform == 'win32':
  722                 filename = encodeFilename(filename, True).decode(preferredencoding())
  723             return sanitize_path(filename)
  724         except ValueError as err:
  725             self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
  726             return None
  727 
  728     def _match_entry(self, info_dict, incomplete):
  729         """ Returns None iff the file should be downloaded """
  730 
  731         video_title = info_dict.get('title', info_dict.get('id', 'video'))
  732         if 'title' in info_dict:
  733             # This can happen when we're just evaluating the playlist
  734             title = info_dict['title']
  735             matchtitle = self.params.get('matchtitle', False)
  736             if matchtitle:
  737                 if not re.search(matchtitle, title, re.IGNORECASE):
  738                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
  739             rejecttitle = self.params.get('rejecttitle', False)
  740             if rejecttitle:
  741                 if re.search(rejecttitle, title, re.IGNORECASE):
  742                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  743         date = info_dict.get('upload_date')
  744         if date is not None:
  745             dateRange = self.params.get('daterange', DateRange())
  746             if date not in dateRange:
  747                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
  748         view_count = info_dict.get('view_count')
  749         if view_count is not None:
  750             min_views = self.params.get('min_views')
  751             if min_views is not None and view_count < min_views:
  752                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
  753             max_views = self.params.get('max_views')
  754             if max_views is not None and view_count > max_views:
  755                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
  756         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
  757             return 'Skipping "%s" because it is age restricted' % video_title
  758         if self.in_download_archive(info_dict):
  759             return '%s has already been recorded in archive' % video_title
  760 
  761         if not incomplete:
  762             match_filter = self.params.get('match_filter')
  763             if match_filter is not None:
  764                 ret = match_filter(info_dict)
  765                 if ret is not None:
  766                     return ret
  767 
  768         return None
  769 
  770     @staticmethod
  771     def add_extra_info(info_dict, extra_info):
  772         '''Set the keys from extra_info in info dict if they are missing'''
  773         for key, value in extra_info.items():
  774             info_dict.setdefault(key, value)
  775 
  776     def extract_info(self, url, download=True, ie_key=None, extra_info={},
  777                      process=True, force_generic_extractor=False):
  778         """
  779         Return a list with a dictionary for each video extracted.
  780 
  781         Arguments:
  782         url -- URL to extract
  783 
  784         Keyword arguments:
  785         download -- whether to download videos during extraction
  786         ie_key -- extractor key hint
  787         extra_info -- dictionary containing the extra values to add to each result
  788         process -- whether to resolve all unresolved references (URLs, playlist items),
  789             must be True for download to work.
  790         force_generic_extractor -- force using the generic extractor
  791         """
  792 
  793         if not ie_key and force_generic_extractor:
  794             ie_key = 'Generic'
  795 
  796         if ie_key:
  797             ies = [self.get_info_extractor(ie_key)]
  798         else:
  799             ies = self._ies
  800 
  801         for ie in ies:
  802             if not ie.suitable(url):
  803                 continue
  804 
  805             ie = self.get_info_extractor(ie.ie_key())
  806             if not ie.working():
  807                 self.report_warning('The program functionality for this site has been marked as broken, '
  808                                     'and will probably not work.')
  809 
  810             return self.__extract_info(url, ie, download, extra_info, process)
  811         else:
  812             self.report_error('no suitable InfoExtractor for URL %s' % url)
  813 
  814     def __handle_extraction_exceptions(func):
  815         def wrapper(self, *args, **kwargs):
  816             try:
  817                 return func(self, *args, **kwargs)
  818             except GeoRestrictedError as e:
  819                 msg = e.msg
  820                 if e.countries:
  821                     msg += '\nThis video is available in %s.' % ', '.join(
  822                         map(ISO3166Utils.short2full, e.countries))
  823                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
  824                 self.report_error(msg)
  825             except ExtractorError as e:  # An error we somewhat expected
  826                 self.report_error(compat_str(e), e.format_traceback())
  827             except MaxDownloadsReached:
  828                 raise
  829             except Exception as e:
  830                 if self.params.get('ignoreerrors', False):
  831                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
  832                 else:
  833                     raise
  834         return wrapper
  835 
  836     @__handle_extraction_exceptions
  837     def __extract_info(self, url, ie, download, extra_info, process):
  838         ie_result = ie.extract(url)
  839         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
  840             return
  841         if isinstance(ie_result, list):
  842             # Backwards compatibility: old IE result format
  843             ie_result = {
  844                 '_type': 'compat_list',
  845                 'entries': ie_result,
  846             }
  847         self.add_default_extra_info(ie_result, ie, url)
  848         if process:
  849             return self.process_ie_result(ie_result, download, extra_info)
  850         else:
  851             return ie_result
  852 
  853     def add_default_extra_info(self, ie_result, ie, url):
  854         self.add_extra_info(ie_result, {
  855             'extractor': ie.IE_NAME,
  856             'webpage_url': url,
  857             'webpage_url_basename': url_basename(url),
  858             'extractor_key': ie.ie_key(),
  859         })
  860 
  861     def process_ie_result(self, ie_result, download=True, extra_info={}):
  862         """
  863         Take the result of the ie(may be modified) and resolve all unresolved
  864         references (URLs, playlist items).
  865 
  866         It will also download the videos if 'download'.
  867         Returns the resolved ie_result.
  868         """
  869         result_type = ie_result.get('_type', 'video')
  870 
  871         if result_type in ('url', 'url_transparent'):
  872             ie_result['url'] = sanitize_url(ie_result['url'])
  873             extract_flat = self.params.get('extract_flat', False)
  874             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
  875                     or extract_flat is True):
  876                 self.__forced_printings(
  877                     ie_result, self.prepare_filename(ie_result),
  878                     incomplete=True)
  879                 return ie_result
  880 
  881         if result_type == 'video':
  882             self.add_extra_info(ie_result, extra_info)
  883             return self.process_video_result(ie_result, download=download)
  884         elif result_type == 'url':
  885             # We have to add extra_info to the results because it may be
  886             # contained in a playlist
  887             return self.extract_info(ie_result['url'],
  888                                      download,
  889                                      ie_key=ie_result.get('ie_key'),
  890                                      extra_info=extra_info)
  891         elif result_type == 'url_transparent':
  892             # Use the information from the embedding page
  893             info = self.extract_info(
  894                 ie_result['url'], ie_key=ie_result.get('ie_key'),
  895                 extra_info=extra_info, download=False, process=False)
  896 
  897             # extract_info may return None when ignoreerrors is enabled and
  898             # extraction failed with an error, don't crash and return early
  899             # in this case
  900             if not info:
  901                 return info
  902 
  903             force_properties = dict(
  904                 (k, v) for k, v in ie_result.items() if v is not None)
  905             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
  906                 if f in force_properties:
  907                     del force_properties[f]
  908             new_result = info.copy()
  909             new_result.update(force_properties)
  910 
  911             # Extracted info may not be a video result (i.e.
  912             # info.get('_type', 'video') != video) but rather an url or
  913             # url_transparent. In such cases outer metadata (from ie_result)
  914             # should be propagated to inner one (info). For this to happen
  915             # _type of info should be overridden with url_transparent. This
  916             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
  917             if new_result.get('_type') == 'url':
  918                 new_result['_type'] = 'url_transparent'
  919 
  920             return self.process_ie_result(
  921                 new_result, download=download, extra_info=extra_info)
  922         elif result_type in ('playlist', 'multi_video'):
  923             # Protect from infinite recursion due to recursively nested playlists
  924             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
  925             webpage_url = ie_result['webpage_url']
  926             if webpage_url in self._playlist_urls:
  927                 self.to_screen(
  928                     '[download] Skipping already downloaded playlist: %s'
  929                     % ie_result.get('title') or ie_result.get('id'))
  930                 return
  931 
  932             self._playlist_level += 1
  933             self._playlist_urls.add(webpage_url)
  934             try:
  935                 return self.__process_playlist(ie_result, download)
  936             finally:
  937                 self._playlist_level -= 1
  938                 if not self._playlist_level:
  939                     self._playlist_urls.clear()
  940         elif result_type == 'compat_list':
  941             self.report_warning(
  942                 'Extractor %s returned a compat_list result. '
  943                 'It needs to be updated.' % ie_result.get('extractor'))
  944 
  945             def _fixup(r):
  946                 self.add_extra_info(
  947                     r,
  948                     {
  949                         'extractor': ie_result['extractor'],
  950                         'webpage_url': ie_result['webpage_url'],
  951                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
  952                         'extractor_key': ie_result['extractor_key'],
  953                     }
  954                 )
  955                 return r
  956             ie_result['entries'] = [
  957                 self.process_ie_result(_fixup(r), download, extra_info)
  958                 for r in ie_result['entries']
  959             ]
  960             return ie_result
  961         else:
  962             raise Exception('Invalid result type: %s' % result_type)
  963 
  964     def __process_playlist(self, ie_result, download):
  965         # We process each entry in the playlist
  966         playlist = ie_result.get('title') or ie_result.get('id')
  967 
  968         self.to_screen('[download] Downloading playlist: %s' % playlist)
  969 
  970         playlist_results = []
  971 
  972         playliststart = self.params.get('playliststart', 1) - 1
  973         playlistend = self.params.get('playlistend')
  974         # For backwards compatibility, interpret -1 as whole list
  975         if playlistend == -1:
  976             playlistend = None
  977 
  978         playlistitems_str = self.params.get('playlist_items')
  979         playlistitems = None
  980         if playlistitems_str is not None:
  981             def iter_playlistitems(format):
  982                 for string_segment in format.split(','):
  983                     if '-' in string_segment:
  984                         start, end = string_segment.split('-')
  985                         for item in range(int(start), int(end) + 1):
  986                             yield int(item)
  987                     else:
  988                         yield int(string_segment)
  989             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
  990 
  991         ie_entries = ie_result['entries']
  992 
  993         def make_playlistitems_entries(list_ie_entries):
  994             num_entries = len(list_ie_entries)
  995             return [
  996                 list_ie_entries[i - 1] for i in playlistitems
  997                 if -num_entries <= i - 1 < num_entries]
  998 
  999         def report_download(num_entries):
 1000             self.to_screen(
 1001                 '[%s] playlist %s: Downloading %d videos' %
 1002                 (ie_result['extractor'], playlist, num_entries))
 1003 
 1004         if isinstance(ie_entries, list):
 1005             n_all_entries = len(ie_entries)
 1006             if playlistitems:
 1007                 entries = make_playlistitems_entries(ie_entries)
 1008             else:
 1009                 entries = ie_entries[playliststart:playlistend]
 1010             n_entries = len(entries)
 1011             self.to_screen(
 1012                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 1013                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 1014         elif isinstance(ie_entries, PagedList):
 1015             if playlistitems:
 1016                 entries = []
 1017                 for item in playlistitems:
 1018                     entries.extend(ie_entries.getslice(
 1019                         item - 1, item
 1020                     ))
 1021             else:
 1022                 entries = ie_entries.getslice(
 1023                     playliststart, playlistend)
 1024             n_entries = len(entries)
 1025             report_download(n_entries)
 1026         else:  # iterable
 1027             if playlistitems:
 1028                 entries = make_playlistitems_entries(list(itertools.islice(
 1029                     ie_entries, 0, max(playlistitems))))
 1030             else:
 1031                 entries = list(itertools.islice(
 1032                     ie_entries, playliststart, playlistend))
 1033             n_entries = len(entries)
 1034             report_download(n_entries)
 1035 
 1036         if self.params.get('playlistreverse', False):
 1037             entries = entries[::-1]
 1038 
 1039         if self.params.get('playlistrandom', False):
 1040             random.shuffle(entries)
 1041 
 1042         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 1043 
 1044         for i, entry in enumerate(entries, 1):
 1045             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 1046             # This __x_forwarded_for_ip thing is a bit ugly but requires
 1047             # minimal changes
 1048             if x_forwarded_for:
 1049                 entry['__x_forwarded_for_ip'] = x_forwarded_for
 1050             extra = {
 1051                 'n_entries': n_entries,
 1052                 'playlist': playlist,
 1053                 'playlist_id': ie_result.get('id'),
 1054                 'playlist_title': ie_result.get('title'),
 1055                 'playlist_uploader': ie_result.get('uploader'),
 1056                 'playlist_uploader_id': ie_result.get('uploader_id'),
 1057                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
 1058                 'extractor': ie_result['extractor'],
 1059                 'webpage_url': ie_result['webpage_url'],
 1060                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
 1061                 'extractor_key': ie_result['extractor_key'],
 1062             }
 1063 
 1064             reason = self._match_entry(entry, incomplete=True)
 1065             if reason is not None:
 1066                 self.to_screen('[download] ' + reason)
 1067                 continue
 1068 
 1069             entry_result = self.__process_iterable_entry(entry, download, extra)
 1070             # TODO: skip failed (empty) entries?
 1071             playlist_results.append(entry_result)
 1072         ie_result['entries'] = playlist_results
 1073         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 1074         return ie_result
 1075 
 1076     @__handle_extraction_exceptions
 1077     def __process_iterable_entry(self, entry, download, extra_info):
 1078         return self.process_ie_result(
 1079             entry, download=download, extra_info=extra_info)
 1080 
 1081     def _build_format_filter(self, filter_spec):
 1082         " Returns a function to filter the formats according to the filter_spec "
 1083 
 1084         OPERATORS = {
 1085             '<': operator.lt,
 1086             '<=': operator.le,
 1087             '>': operator.gt,
 1088             '>=': operator.ge,
 1089             '=': operator.eq,
 1090             '!=': operator.ne,
 1091         }
 1092         operator_rex = re.compile(r'''(?x)\s*
 1093             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
 1094             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 1095             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 1096             $
 1097             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 1098         m = operator_rex.search(filter_spec)
 1099         if m:
 1100             try:
 1101                 comparison_value = int(m.group('value'))
 1102             except ValueError:
 1103                 comparison_value = parse_filesize(m.group('value'))
 1104                 if comparison_value is None:
 1105                     comparison_value = parse_filesize(m.group('value') + 'B')
 1106                 if comparison_value is None:
 1107                     raise ValueError(
 1108                         'Invalid value %r in format specification %r' % (
 1109                             m.group('value'), filter_spec))
 1110             op = OPERATORS[m.group('op')]
 1111 
 1112         if not m:
 1113             STR_OPERATORS = {
 1114                 '=': operator.eq,
 1115                 '^=': lambda attr, value: attr.startswith(value),
 1116                 '$=': lambda attr, value: attr.endswith(value),
 1117                 '*=': lambda attr, value: value in attr,
 1118             }
 1119             str_operator_rex = re.compile(r'''(?x)
 1120                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
 1121                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
 1122                 \s*(?P<value>[a-zA-Z0-9._-]+)
 1123                 \s*$
 1124                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 1125             m = str_operator_rex.search(filter_spec)
 1126             if m:
 1127                 comparison_value = m.group('value')
 1128                 str_op = STR_OPERATORS[m.group('op')]
 1129                 if m.group('negation'):
 1130                     op = lambda attr, value: not str_op(attr, value)
 1131                 else:
 1132                     op = str_op
 1133 
 1134         if not m:
 1135             raise ValueError('Invalid filter specification %r' % filter_spec)
 1136 
 1137         def _filter(f):
 1138             actual_value = f.get(m.group('key'))
 1139             if actual_value is None:
 1140                 return m.group('none_inclusive')
 1141             return op(actual_value, comparison_value)
 1142         return _filter
 1143 
 1144     def _default_format_spec(self, info_dict, download=True):
 1145 
 1146         def can_merge():
 1147             merger = FFmpegMergerPP(self)
 1148             return merger.available and merger.can_merge()
 1149 
 1150         def prefer_best():
 1151             if self.params.get('simulate', False):
 1152                 return False
 1153             if not download:
 1154                 return False
 1155             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
 1156                 return True
 1157             if info_dict.get('is_live'):
 1158                 return True
 1159             if not can_merge():
 1160                 return True
 1161             return False
 1162 
 1163         req_format_list = ['bestvideo+bestaudio', 'best']
 1164         if prefer_best():
 1165             req_format_list.reverse()
 1166         return '/'.join(req_format_list)
 1167 
 1168     def build_format_selector(self, format_spec):
 1169         def syntax_error(note, start):
 1170             message = (
 1171                 'Invalid format specification: '
 1172                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 1173             return SyntaxError(message)
 1174 
 1175         PICKFIRST = 'PICKFIRST'
 1176         MERGE = 'MERGE'
 1177         SINGLE = 'SINGLE'
 1178         GROUP = 'GROUP'
 1179         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 1180 
 1181         def _parse_filter(tokens):
 1182             filter_parts = []
 1183             for type, string, start, _, _ in tokens:
 1184                 if type == tokenize.OP and string == ']':
 1185                     return ''.join(filter_parts)
 1186                 else:
 1187                     filter_parts.append(string)
 1188 
 1189         def _remove_unused_ops(tokens):
 1190             # Remove operators that we don't use and join them with the surrounding strings
 1191             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 1192             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 1193             last_string, last_start, last_end, last_line = None, None, None, None
 1194             for type, string, start, end, line in tokens:
 1195                 if type == tokenize.OP and string == '[':
 1196                     if last_string:
 1197                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 1198                         last_string = None
 1199                     yield type, string, start, end, line
 1200                     # everything inside brackets will be handled by _parse_filter
 1201                     for type, string, start, end, line in tokens:
 1202                         yield type, string, start, end, line
 1203                         if type == tokenize.OP and string == ']':
 1204                             break
 1205                 elif type == tokenize.OP and string in ALLOWED_OPS:
 1206                     if last_string:
 1207                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 1208                         last_string = None
 1209                     yield type, string, start, end, line
 1210                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 1211                     if not last_string:
 1212                         last_string = string
 1213                         last_start = start
 1214                         last_end = end
 1215                     else:
 1216                         last_string += string
 1217             if last_string:
 1218                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 1219 
 1220         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 1221             selectors = []
 1222             current_selector = None
 1223             for type, string, start, _, _ in tokens:
 1224                 # ENCODING is only defined in python 3.x
 1225                 if type == getattr(tokenize, 'ENCODING', None):
 1226                     continue
 1227                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 1228                     current_selector = FormatSelector(SINGLE, string, [])
 1229                 elif type == tokenize.OP:
 1230                     if string == ')':
 1231                         if not inside_group:
 1232                             # ')' will be handled by the parentheses group
 1233                             tokens.restore_last_token()
 1234                         break
 1235                     elif inside_merge and string in ['/', ',']:
 1236                         tokens.restore_last_token()
 1237                         break
 1238                     elif inside_choice and string == ',':
 1239                         tokens.restore_last_token()
 1240                         break
 1241                     elif string == ',':
 1242                         if not current_selector:
 1243                             raise syntax_error('"," must follow a format selector', start)
 1244                         selectors.append(current_selector)
 1245                         current_selector = None
 1246                     elif string == '/':
 1247                         if not current_selector:
 1248                             raise syntax_error('"/" must follow a format selector', start)
 1249                         first_choice = current_selector
 1250                         second_choice = _parse_format_selection(tokens, inside_choice=True)
 1251                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
 1252                     elif string == '[':
 1253                         if not current_selector:
 1254                             current_selector = FormatSelector(SINGLE, 'best', [])
 1255                         format_filter = _parse_filter(tokens)
 1256                         current_selector.filters.append(format_filter)
 1257                     elif string == '(':
 1258                         if current_selector:
 1259                             raise syntax_error('Unexpected "("', start)
 1260                         group = _parse_format_selection(tokens, inside_group=True)
 1261                         current_selector = FormatSelector(GROUP, group, [])
 1262                     elif string == '+':
 1263                         if inside_merge:
 1264                             raise syntax_error('Unexpected "+"', start)
 1265                         video_selector = current_selector
 1266                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
 1267                         if not video_selector or not audio_selector:
 1268                             raise syntax_error('"+" must be between two format selectors', start)
 1269                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
 1270                     else:
 1271                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
 1272                 elif type == tokenize.ENDMARKER:
 1273                     break
 1274             if current_selector:
 1275                 selectors.append(current_selector)
 1276             return selectors
 1277 
 1278         def _build_selector_function(selector):
 1279             if isinstance(selector, list):
 1280                 fs = [_build_selector_function(s) for s in selector]
 1281 
 1282                 def selector_function(ctx):
 1283                     for f in fs:
 1284                         for format in f(ctx):
 1285                             yield format
 1286                 return selector_function
 1287             elif selector.type == GROUP:
 1288                 selector_function = _build_selector_function(selector.selector)
 1289             elif selector.type == PICKFIRST:
 1290                 fs = [_build_selector_function(s) for s in selector.selector]
 1291 
 1292                 def selector_function(ctx):
 1293                     for f in fs:
 1294                         picked_formats = list(f(ctx))
 1295                         if picked_formats:
 1296                             return picked_formats
 1297                     return []
 1298             elif selector.type == SINGLE:
 1299                 format_spec = selector.selector
 1300 
 1301                 def selector_function(ctx):
 1302 
 1303                     def best_worst(fmts, fmt_spec='best'):
 1304                         format_idx = 0 if fmt_spec == 'worst' else -1
 1305                         audiovideo_formats = [
 1306                             f for f in fmts
 1307                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 1308                         if audiovideo_formats:
 1309                             return audiovideo_formats[format_idx]
 1310                         # for extractors with incomplete formats (audio only (soundcloud)
 1311                         # or video only (imgur)) we will fallback to best/worst
 1312                         # {video,audio}-only format
 1313                         elif ctx['incomplete_formats']:
 1314                             return fmts[format_idx]
 1315 
 1316                     formats = list(ctx['formats'])
 1317                     if not formats:
 1318                         return
 1319                     if format_spec == 'all':
 1320                         pass
 1321                     elif format_spec in ('best', 'worst', None):
 1322                         formats = best_worst(formats, format_spec)
 1323                     elif format_spec in ('bestaudio', 'worstaudio'):
 1324                         audio_formats = [
 1325                             f for f in formats
 1326                             if f.get('vcodec') == 'none']
 1327                         formats = audio_formats[:1] if format_spec == 'worstaudio' else audio_formats[-1:]
 1328                     elif format_spec in ('bestvideo', 'worstvideo'):
 1329                         video_formats = [
 1330                             f for f in formats
 1331                             if f.get('acodec') == 'none']
 1332                         formats = video_formats[:1] if format_spec == 'worstvideo' else video_formats[-1:]
 1333                     else:
 1334                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 1335                         if format_spec in extensions:
 1336                             filter_f = lambda f: f['ext'] == format_spec
 1337                         else:
 1338                             filter_f = lambda f: f['format_id'] == format_spec
 1339                         formats = best_worst(list(filter(filter_f, formats)))
 1340                     for f in variadic(formats or []):
 1341                         yield f
 1342             elif selector.type == MERGE:
 1343                 def _merge(formats_info):
 1344                     format_1, format_2 = [f['format_id'] for f in formats_info]
 1345                     # The first format must contain the video and the
 1346                     # second the audio
 1347                     if formats_info[0].get('vcodec') == 'none':
 1348                         self.report_error('The first format must '
 1349                                           'contain the video, try using '
 1350                                           '"-f %s+%s"' % (format_2, format_1))
 1351                         return
 1352                     # Formats must be opposite (video+audio)
 1353                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
 1354                         self.report_error(
 1355                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
 1356                             % (format_1, format_2))
 1357                         return
 1358                     output_ext = (
 1359                         formats_info[0]['ext']
 1360                         if self.params.get('merge_output_format') is None
 1361                         else self.params['merge_output_format'])
 1362                     return {
 1363                         'requested_formats': formats_info,
 1364                         'format': '%s+%s' % (formats_info[0].get('format'),
 1365                                              formats_info[1].get('format')),
 1366                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
 1367                                                 formats_info[1].get('format_id')),
 1368                         'width': formats_info[0].get('width'),
 1369                         'height': formats_info[0].get('height'),
 1370                         'resolution': formats_info[0].get('resolution'),
 1371                         'fps': formats_info[0].get('fps'),
 1372                         'vcodec': formats_info[0].get('vcodec'),
 1373                         'vbr': formats_info[0].get('vbr'),
 1374                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
 1375                         'acodec': formats_info[1].get('acodec'),
 1376                         'abr': formats_info[1].get('abr'),
 1377                         'ext': output_ext,
 1378                     }
 1379                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
 1380 
 1381                 def selector_function(ctx):
 1382                     for pair in itertools.product(
 1383                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
 1384                         yield _merge(pair)
 1385 
 1386             filters = [self._build_format_filter(f) for f in selector.filters]
 1387 
 1388             def final_selector(ctx):
 1389                 ctx_copy = copy.deepcopy(ctx)
 1390                 for _filter in filters:
 1391                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
 1392                 return selector_function(ctx_copy)
 1393             return final_selector
 1394 
 1395         stream = io.BytesIO(format_spec.encode('utf-8'))
 1396         try:
 1397             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
 1398         except tokenize.TokenError:
 1399             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
 1400 
 1401         class TokenIterator(object):
 1402             def __init__(self, tokens):
 1403                 self.tokens = tokens
 1404                 self.counter = 0
 1405 
 1406             def __iter__(self):
 1407                 return self
 1408 
 1409             def __next__(self):
 1410                 if self.counter >= len(self.tokens):
 1411                     raise StopIteration()
 1412                 value = self.tokens[self.counter]
 1413                 self.counter += 1
 1414                 return value
 1415 
 1416             next = __next__
 1417 
 1418             def restore_last_token(self):
 1419                 self.counter -= 1
 1420 
 1421         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
 1422         return _build_selector_function(parsed_selector)
 1423 
 1424     def _calc_headers(self, info_dict):
 1425         res = std_headers.copy()
 1426 
 1427         add_headers = info_dict.get('http_headers')
 1428         if add_headers:
 1429             res.update(add_headers)
 1430 
 1431         cookies = self._calc_cookies(info_dict)
 1432         if cookies:
 1433             res['Cookie'] = cookies
 1434 
 1435         if 'X-Forwarded-For' not in res:
 1436             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
 1437             if x_forwarded_for_ip:
 1438                 res['X-Forwarded-For'] = x_forwarded_for_ip
 1439 
 1440         return res
 1441 
 1442     def _calc_cookies(self, info_dict):
 1443         pr = sanitized_Request(info_dict['url'])
 1444         self.cookiejar.add_cookie_header(pr)
 1445         return pr.get_header('Cookie')
 1446 
 1447     def process_video_result(self, info_dict, download=True):
 1448         assert info_dict.get('_type', 'video') == 'video'
 1449 
 1450         if 'id' not in info_dict:
 1451             raise ExtractorError('Missing "id" field in extractor result')
 1452         if 'title' not in info_dict:
 1453             raise ExtractorError('Missing "title" field in extractor result')
 1454 
 1455         def report_force_conversion(field, field_not, conversion):
 1456             self.report_warning(
 1457                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
 1458                 % (field, field_not, conversion))
 1459 
 1460         def sanitize_string_field(info, string_field):
 1461             field = info.get(string_field)
 1462             if field is None or isinstance(field, compat_str):
 1463                 return
 1464             report_force_conversion(string_field, 'a string', 'string')
 1465             info[string_field] = compat_str(field)
 1466 
 1467         def sanitize_numeric_fields(info):
 1468             for numeric_field in self._NUMERIC_FIELDS:
 1469                 field = info.get(numeric_field)
 1470                 if field is None or isinstance(field, compat_numeric_types):
 1471                     continue
 1472                 report_force_conversion(numeric_field, 'numeric', 'int')
 1473                 info[numeric_field] = int_or_none(field)
 1474 
 1475         sanitize_string_field(info_dict, 'id')
 1476         sanitize_numeric_fields(info_dict)
 1477 
 1478         if 'playlist' not in info_dict:
 1479             # It isn't part of a playlist
 1480             info_dict['playlist'] = None
 1481             info_dict['playlist_index'] = None
 1482 
 1483         thumbnails = info_dict.get('thumbnails')
 1484         if thumbnails is None:
 1485             thumbnail = info_dict.get('thumbnail')
 1486             if thumbnail:
 1487                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 1488         if thumbnails:
 1489             thumbnails.sort(key=lambda t: (
 1490                 t.get('preference') if t.get('preference') is not None else -1,
 1491                 t.get('width') if t.get('width') is not None else -1,
 1492                 t.get('height') if t.get('height') is not None else -1,
 1493                 t.get('id') if t.get('id') is not None else '', t.get('url')))
 1494             for i, t in enumerate(thumbnails):
 1495                 t['url'] = sanitize_url(t['url'])
 1496                 if t.get('width') and t.get('height'):
 1497                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 1498                 if t.get('id') is None:
 1499                     t['id'] = '%d' % i
 1500 
 1501         if self.params.get('list_thumbnails'):
 1502             self.list_thumbnails(info_dict)
 1503             return
 1504 
 1505         thumbnail = info_dict.get('thumbnail')
 1506         if thumbnail:
 1507             info_dict['thumbnail'] = sanitize_url(thumbnail)
 1508         elif thumbnails:
 1509             info_dict['thumbnail'] = thumbnails[-1]['url']
 1510 
 1511         if 'display_id' not in info_dict and 'id' in info_dict:
 1512             info_dict['display_id'] = info_dict['id']
 1513 
 1514         for ts_key, date_key in (
 1515                 ('timestamp', 'upload_date'),
 1516                 ('release_timestamp', 'release_date'),
 1517         ):
 1518             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
 1519                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
 1520                 # see http://bugs.python.org/issue1646728)
 1521                 try:
 1522                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
 1523                     info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
 1524                 except (ValueError, OverflowError, OSError):
 1525                     pass
 1526 
 1527         # Auto generate title fields corresponding to the *_number fields when missing
 1528         # in order to always have clean titles. This is very common for TV series.
 1529         for field in ('chapter', 'season', 'episode'):
 1530             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
 1531                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
 1532 
 1533         for cc_kind in ('subtitles', 'automatic_captions'):
 1534             cc = info_dict.get(cc_kind)
 1535             if cc:
 1536                 for _, subtitle in cc.items():
 1537                     for subtitle_format in subtitle:
 1538                         if subtitle_format.get('url'):
 1539                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
 1540                         if subtitle_format.get('ext') is None:
 1541                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 1542 
 1543         automatic_captions = info_dict.get('automatic_captions')
 1544         subtitles = info_dict.get('subtitles')
 1545 
 1546         if self.params.get('listsubtitles', False):
 1547             if 'automatic_captions' in info_dict:
 1548                 self.list_subtitles(
 1549                     info_dict['id'], automatic_captions, 'automatic captions')
 1550             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
 1551             return
 1552 
 1553         info_dict['requested_subtitles'] = self.process_subtitles(
 1554             info_dict['id'], subtitles, automatic_captions)
 1555 
 1556         # We now pick which formats have to be downloaded
 1557         if info_dict.get('formats') is None:
 1558             # There's only one format available
 1559             formats = [info_dict]
 1560         else:
 1561             formats = info_dict['formats']
 1562 
 1563         def is_wellformed(f):
 1564             url = f.get('url')
 1565             if not url:
 1566                 self.report_warning(
 1567                     '"url" field is missing or empty - skipping format, '
 1568                     'there is an error in extractor')
 1569                 return False
 1570             if isinstance(url, bytes):
 1571                 sanitize_string_field(f, 'url')
 1572             return True
 1573 
 1574         # Filter out malformed formats for better extraction robustness
 1575         formats = list(filter(is_wellformed, formats or []))
 1576 
 1577         if not formats:
 1578             raise ExtractorError('No video formats found!')
 1579 
 1580         formats_dict = {}
 1581 
 1582         # We check that all the formats have the format and format_id fields
 1583         for i, format in enumerate(formats):
 1584             sanitize_string_field(format, 'format_id')
 1585             sanitize_numeric_fields(format)
 1586             format['url'] = sanitize_url(format['url'])
 1587             if not format.get('format_id'):
 1588                 format['format_id'] = compat_str(i)
 1589             else:
 1590                 # Sanitize format_id from characters used in format selector expression
 1591                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
 1592             format_id = format['format_id']
 1593             if format_id not in formats_dict:
 1594                 formats_dict[format_id] = []
 1595             formats_dict[format_id].append(format)
 1596 
 1597         # Make sure all formats have unique format_id
 1598         for format_id, ambiguous_formats in formats_dict.items():
 1599             if len(ambiguous_formats) > 1:
 1600                 for i, format in enumerate(ambiguous_formats):
 1601                     format['format_id'] = '%s-%d' % (format_id, i)
 1602 
 1603         for i, format in enumerate(formats):
 1604             if format.get('format') is None:
 1605                 format['format'] = '{id} - {res}{note}'.format(
 1606                     id=format['format_id'],
 1607                     res=self.format_resolution(format),
 1608                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 1609                 )
 1610             # Automatically determine file extension if missing
 1611             if format.get('ext') is None:
 1612                 format['ext'] = determine_ext(format['url']).lower()
 1613             # Automatically determine protocol if missing (useful for format
 1614             # selection purposes)
 1615             if format.get('protocol') is None:
 1616                 format['protocol'] = determine_protocol(format)
 1617             # Add HTTP headers, so that external programs can use them from the
 1618             # json output
 1619             full_format_info = info_dict.copy()
 1620             full_format_info.update(format)
 1621             format['http_headers'] = self._calc_headers(full_format_info)
 1622         # Remove private housekeeping stuff
 1623         if '__x_forwarded_for_ip' in info_dict:
 1624             del info_dict['__x_forwarded_for_ip']
 1625 
 1626         # TODO Central sorting goes here
 1627 
 1628         if formats[0] is not info_dict:
 1629             # only set the 'formats' fields if the original info_dict list them
 1630             # otherwise we end up with a circular reference, the first (and unique)
 1631             # element in the 'formats' field in info_dict is info_dict itself,
 1632             # which can't be exported to json
 1633             info_dict['formats'] = formats
 1634         if self.params.get('listformats'):
 1635             self.list_formats(info_dict)
 1636             return
 1637 
 1638         req_format = self.params.get('format')
 1639         if req_format is None:
 1640             req_format = self._default_format_spec(info_dict, download=download)
 1641             if self.params.get('verbose'):
 1642                 self._write_string('[debug] Default format spec: %s\n' % req_format)
 1643 
 1644         format_selector = self.build_format_selector(req_format)
 1645 
 1646         # While in format selection we may need to have an access to the original
 1647         # format set in order to calculate some metrics or do some processing.
 1648         # For now we need to be able to guess whether original formats provided
 1649         # by extractor are incomplete or not (i.e. whether extractor provides only
 1650         # video-only or audio-only formats) for proper formats selection for
 1651         # extractors with such incomplete formats (see
 1652         # https://github.com/ytdl-org/youtube-dl/pull/5556).
 1653         # Since formats may be filtered during format selection and may not match
 1654         # the original formats the results may be incorrect. Thus original formats
 1655         # or pre-calculated metrics should be passed to format selection routines
 1656         # as well.
 1657         # We will pass a context object containing all necessary additional data
 1658         # instead of just formats.
 1659         # This fixes incorrect format selection issue (see
 1660         # https://github.com/ytdl-org/youtube-dl/issues/10083).
 1661         incomplete_formats = (
 1662             # All formats are video-only or
 1663             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
 1664             # all formats are audio-only
 1665             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
 1666 
 1667         ctx = {
 1668             'formats': formats,
 1669             'incomplete_formats': incomplete_formats,
 1670         }
 1671 
 1672         formats_to_download = list(format_selector(ctx))
 1673         if not formats_to_download:
 1674             raise ExtractorError('requested format not available',
 1675                                  expected=True)
 1676 
 1677         if download:
 1678             if len(formats_to_download) > 1:
 1679                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 1680             for format in formats_to_download:
 1681                 new_info = dict(info_dict)
 1682                 new_info.update(format)
 1683                 self.process_info(new_info)
 1684         # We update the info dict with the best quality format (backwards compatibility)
 1685         info_dict.update(formats_to_download[-1])
 1686         return info_dict
 1687 
 1688     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
 1689         """Select the requested subtitles and their format"""
 1690         available_subs = {}
 1691         if normal_subtitles and self.params.get('writesubtitles'):
 1692             available_subs.update(normal_subtitles)
 1693         if automatic_captions and self.params.get('writeautomaticsub'):
 1694             for lang, cap_info in automatic_captions.items():
 1695                 if lang not in available_subs:
 1696                     available_subs[lang] = cap_info
 1697 
 1698         if (not self.params.get('writesubtitles') and not
 1699                 self.params.get('writeautomaticsub') or not
 1700                 available_subs):
 1701             return None
 1702 
 1703         if self.params.get('allsubtitles', False):
 1704             requested_langs = available_subs.keys()
 1705         else:
 1706             if self.params.get('subtitleslangs', False):
 1707                 requested_langs = self.params.get('subtitleslangs')
 1708             elif 'en' in available_subs:
 1709                 requested_langs = ['en']
 1710             else:
 1711                 requested_langs = [list(available_subs.keys())[0]]
 1712 
 1713         formats_query = self.params.get('subtitlesformat', 'best')
 1714         formats_preference = formats_query.split('/') if formats_query else []
 1715         subs = {}
 1716         for lang in requested_langs:
 1717             formats = available_subs.get(lang)
 1718             if formats is None:
 1719                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
 1720                 continue
 1721             for ext in formats_preference:
 1722                 if ext == 'best':
 1723                     f = formats[-1]
 1724                     break
 1725                 matches = list(filter(lambda f: f['ext'] == ext, formats))
 1726                 if matches:
 1727                     f = matches[-1]
 1728                     break
 1729             else:
 1730                 f = formats[-1]
 1731                 self.report_warning(
 1732                     'No subtitle format found matching "%s" for language %s, '
 1733                     'using %s' % (formats_query, lang, f['ext']))
 1734             subs[lang] = f
 1735         return subs
 1736 
 1737     def __forced_printings(self, info_dict, filename, incomplete):
 1738         def print_mandatory(field):
 1739             if (self.params.get('force%s' % field, False)
 1740                     and (not incomplete or info_dict.get(field) is not None)):
 1741                 self.to_stdout(info_dict[field])
 1742 
 1743         def print_optional(field):
 1744             if (self.params.get('force%s' % field, False)
 1745                     and info_dict.get(field) is not None):
 1746                 self.to_stdout(info_dict[field])
 1747 
 1748         print_mandatory('title')
 1749         print_mandatory('id')
 1750         if self.params.get('forceurl', False) and not incomplete:
 1751             if info_dict.get('requested_formats') is not None:
 1752                 for f in info_dict['requested_formats']:
 1753                     self.to_stdout(f['url'] + f.get('play_path', ''))
 1754             else:
 1755                 # For RTMP URLs, also include the playpath
 1756                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 1757         print_optional('thumbnail')
 1758         print_optional('description')
 1759         if self.params.get('forcefilename', False) and filename is not None:
 1760             self.to_stdout(filename)
 1761         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 1762             self.to_stdout(formatSeconds(info_dict['duration']))
 1763         print_mandatory('format')
 1764         if self.params.get('forcejson', False):
 1765             self.to_stdout(json.dumps(info_dict))
 1766 
 1767     def process_info(self, info_dict):
 1768         """Process a single resolved IE result."""
 1769 
 1770         assert info_dict.get('_type', 'video') == 'video'
 1771 
 1772         max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
 1773         if self._num_downloads >= max_downloads:
 1774             raise MaxDownloadsReached()
 1775 
 1776         # TODO: backward compatibility, to be removed
 1777         info_dict['fulltitle'] = info_dict['title']
 1778 
 1779         if 'format' not in info_dict:
 1780             info_dict['format'] = info_dict['ext']
 1781 
 1782         reason = self._match_entry(info_dict, incomplete=False)
 1783         if reason is not None:
 1784             self.to_screen('[download] ' + reason)
 1785             return
 1786 
 1787         self._num_downloads += 1
 1788 
 1789         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
 1790 
 1791         # Forced printings
 1792         self.__forced_printings(info_dict, filename, incomplete=False)
 1793 
 1794         # Do nothing else if in simulate mode
 1795         if self.params.get('simulate', False):
 1796             return
 1797 
 1798         if filename is None:
 1799             return
 1800 
 1801         def ensure_dir_exists(path):
 1802             try:
 1803                 dn = os.path.dirname(path)
 1804                 if dn and not os.path.exists(dn):
 1805                     os.makedirs(dn)
 1806                 return True
 1807             except (OSError, IOError) as err:
 1808                 if isinstance(err, OSError) and err.errno == errno.EEXIST:
 1809                     return True
 1810                 self.report_error('unable to create directory ' + error_to_compat_str(err))
 1811                 return False
 1812 
 1813         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
 1814             return
 1815 
 1816         if self.params.get('writedescription', False):
 1817             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
 1818             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 1819                 self.to_screen('[info] Video description is already present')
 1820             elif info_dict.get('description') is None:
 1821                 self.report_warning('There\'s no description to write.')
 1822             else:
 1823                 try:
 1824                     self.to_screen('[info] Writing video description to: ' + descfn)
 1825                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 1826                         descfile.write(info_dict['description'])
 1827                 except (OSError, IOError):
 1828                     self.report_error('Cannot write description file ' + descfn)
 1829                     return
 1830 
 1831         if self.params.get('writeannotations', False):
 1832             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
 1833             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 1834                 self.to_screen('[info] Video annotations are already present')
 1835             elif not info_dict.get('annotations'):
 1836                 self.report_warning('There are no annotations to write.')
 1837             else:
 1838                 try:
 1839                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 1840                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 1841                         annofile.write(info_dict['annotations'])
 1842                 except (KeyError, TypeError):
 1843                     self.report_warning('There are no annotations to write.')
 1844                 except (OSError, IOError):
 1845                     self.report_error('Cannot write annotations file: ' + annofn)
 1846                     return
 1847 
 1848         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 1849                                        self.params.get('writeautomaticsub')])
 1850 
 1851         if subtitles_are_requested and info_dict.get('requested_subtitles'):
 1852             # subtitles download errors are already managed as troubles in relevant IE
 1853             # that way it will silently go on when used with unsupporting IE
 1854             subtitles = info_dict['requested_subtitles']
 1855             ie = self.get_info_extractor(info_dict['extractor_key'])
 1856             for sub_lang, sub_info in subtitles.items():
 1857                 sub_format = sub_info['ext']
 1858                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
 1859                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 1860                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
 1861                 else:
 1862                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 1863                     if sub_info.get('data') is not None:
 1864                         try:
 1865                             # Use newline='' to prevent conversion of newline characters
 1866                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
 1867                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
 1868                                 subfile.write(sub_info['data'])
 1869                         except (OSError, IOError):
 1870                             self.report_error('Cannot write subtitles file ' + sub_filename)
 1871                             return
 1872                     else:
 1873                         try:
 1874                             sub_data = ie._request_webpage(
 1875                                 sub_info['url'], info_dict['id'], note=False).read()
 1876                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
 1877                                 subfile.write(sub_data)
 1878                         except (ExtractorError, IOError, OSError, ValueError) as err:
 1879                             self.report_warning('Unable to download subtitle for "%s": %s' %
 1880                                                 (sub_lang, error_to_compat_str(err)))
 1881                             continue
 1882 
 1883         if self.params.get('writeinfojson', False):
 1884             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
 1885             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 1886                 self.to_screen('[info] Video description metadata is already present')
 1887             else:
 1888                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 1889                 try:
 1890                     write_json_file(self.filter_requested_info(info_dict), infofn)
 1891                 except (OSError, IOError):
 1892                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 1893                     return
 1894 
 1895         self._write_thumbnails(info_dict, filename)
 1896 
 1897         if not self.params.get('skip_download', False):
 1898             try:
 1899                 def checked_get_suitable_downloader(info_dict, params):
 1900                     ed_args = params.get('external_downloader_args')
 1901                     dler = get_suitable_downloader(info_dict, params)
 1902                     if ed_args and not params.get('external_downloader_args'):
 1903                         # external_downloader_args was cleared because external_downloader was rejected
 1904                         self.report_warning('Requested external downloader cannot be used: '
 1905                                             'ignoring --external-downloader-args.')
 1906                     return dler
 1907 
 1908                 def dl(name, info):
 1909                     fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
 1910                     for ph in self._progress_hooks:
 1911                         fd.add_progress_hook(ph)
 1912                     if self.params.get('verbose'):
 1913                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
 1914                     return fd.download(name, info)
 1915 
 1916                 if info_dict.get('requested_formats') is not None:
 1917                     downloaded = []
 1918                     success = True
 1919                     merger = FFmpegMergerPP(self)
 1920                     if not merger.available:
 1921                         postprocessors = []
 1922                         self.report_warning('You have requested multiple '
 1923                                             'formats but ffmpeg or avconv are not installed.'
 1924                                             ' The formats won\'t be merged.')
 1925                     else:
 1926                         postprocessors = [merger]
 1927 
 1928                     def compatible_formats(formats):
 1929                         video, audio = formats
 1930                         # Check extension
 1931                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
 1932                         if video_ext and audio_ext:
 1933                             COMPATIBLE_EXTS = (
 1934                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
 1935                                 ('webm')
 1936                             )
 1937                             for exts in COMPATIBLE_EXTS:
 1938                                 if video_ext in exts and audio_ext in exts:
 1939                                     return True
 1940                         # TODO: Check acodec/vcodec
 1941                         return False
 1942 
 1943                     filename_real_ext = os.path.splitext(filename)[1][1:]
 1944                     filename_wo_ext = (
 1945                         os.path.splitext(filename)[0]
 1946                         if filename_real_ext == info_dict['ext']
 1947                         else filename)
 1948                     requested_formats = info_dict['requested_formats']
 1949                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
 1950                         info_dict['ext'] = 'mkv'
 1951                         self.report_warning(
 1952                             'Requested formats are incompatible for merge and will be merged into mkv.')
 1953                     # Ensure filename always has a correct extension for successful merge
 1954                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
 1955                     if os.path.exists(encodeFilename(filename)):
 1956                         self.to_screen(
 1957                             '[download] %s has already been downloaded and '
 1958                             'merged' % filename)
 1959                     else:
 1960                         for f in requested_formats:
 1961                             new_info = dict(info_dict)
 1962                             new_info.update(f)
 1963                             fname = prepend_extension(
 1964                                 self.prepare_filename(new_info),
 1965                                 'f%s' % f['format_id'], new_info['ext'])
 1966                             if not ensure_dir_exists(fname):
 1967                                 return
 1968                             downloaded.append(fname)
 1969                             partial_success = dl(fname, new_info)
 1970                             success = success and partial_success
 1971                         info_dict['__postprocessors'] = postprocessors
 1972                         info_dict['__files_to_merge'] = downloaded
 1973                 else:
 1974                     # Just a single file
 1975                     success = dl(filename, info_dict)
 1976             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 1977                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
 1978                 return
 1979             except (OSError, IOError) as err:
 1980                 raise UnavailableVideoError(err)
 1981             except (ContentTooShortError, ) as err:
 1982                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 1983                 return
 1984 
 1985             if success and filename != '-':
 1986                 # Fixup content
 1987                 fixup_policy = self.params.get('fixup')
 1988                 if fixup_policy is None:
 1989                     fixup_policy = 'detect_or_warn'
 1990 
 1991                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
 1992 
 1993                 stretched_ratio = info_dict.get('stretched_ratio')
 1994                 if stretched_ratio is not None and stretched_ratio != 1:
 1995                     if fixup_policy == 'warn':
 1996                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
 1997                             info_dict['id'], stretched_ratio))
 1998                     elif fixup_policy == 'detect_or_warn':
 1999                         stretched_pp = FFmpegFixupStretchedPP(self)
 2000                         if stretched_pp.available:
 2001                             info_dict.setdefault('__postprocessors', [])
 2002                             info_dict['__postprocessors'].append(stretched_pp)
 2003                         else:
 2004                             self.report_warning(
 2005                                 '%s: Non-uniform pixel ratio (%s). %s'
 2006                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
 2007                     else:
 2008                         assert fixup_policy in ('ignore', 'never')
 2009 
 2010                 if (info_dict.get('requested_formats') is None
 2011                         and info_dict.get('container') == 'm4a_dash'):
 2012                     if fixup_policy == 'warn':
 2013                         self.report_warning(
 2014                             '%s: writing DASH m4a. '
 2015                             'Only some players support this container.'
 2016                             % info_dict['id'])
 2017                     elif fixup_policy == 'detect_or_warn':
 2018                         fixup_pp = FFmpegFixupM4aPP(self)
 2019                         if fixup_pp.available:
 2020                             info_dict.setdefault('__postprocessors', [])
 2021                             info_dict['__postprocessors'].append(fixup_pp)
 2022                         else:
 2023                             self.report_warning(
 2024                                 '%s: writing DASH m4a. '
 2025                                 'Only some players support this container. %s'
 2026                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 2027                     else:
 2028                         assert fixup_policy in ('ignore', 'never')
 2029 
 2030                 if (info_dict.get('protocol') == 'm3u8_native'
 2031                         or info_dict.get('protocol') == 'm3u8'
 2032                         and self.params.get('hls_prefer_native')):
 2033                     if fixup_policy == 'warn':
 2034                         self.report_warning('%s: malformed AAC bitstream detected.' % (
 2035                             info_dict['id']))
 2036                     elif fixup_policy == 'detect_or_warn':
 2037                         fixup_pp = FFmpegFixupM3u8PP(self)
 2038                         if fixup_pp.available:
 2039                             info_dict.setdefault('__postprocessors', [])
 2040                             info_dict['__postprocessors'].append(fixup_pp)
 2041                         else:
 2042                             self.report_warning(
 2043                                 '%s: malformed AAC bitstream detected. %s'
 2044                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 2045                     else:
 2046                         assert fixup_policy in ('ignore', 'never')
 2047 
 2048                 try:
 2049                     self.post_process(filename, info_dict)
 2050                 except (PostProcessingError) as err:
 2051                     self.report_error('postprocessing: %s' % error_to_compat_str(err))
 2052                     return
 2053                 self.record_download_archive(info_dict)
 2054                 # avoid possible nugatory search for further items (PR #26638)
 2055                 if self._num_downloads >= max_downloads:
 2056                     raise MaxDownloadsReached()
 2057 
 2058     def download(self, url_list):
 2059         """Download a given list of URLs."""
 2060         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 2061         if (len(url_list) > 1
 2062                 and outtmpl != '-'
 2063                 and '%' not in outtmpl
 2064                 and self.params.get('max_downloads') != 1):
 2065             raise SameFileError(outtmpl)
 2066 
 2067         for url in url_list:
 2068             try:
 2069                 # It also downloads the videos
 2070                 res = self.extract_info(
 2071                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
 2072             except UnavailableVideoError:
 2073                 self.report_error('unable to download video')
 2074             except MaxDownloadsReached:
 2075                 self.to_screen('[info] Maximum number of downloaded files reached.')
 2076                 raise
 2077             else:
 2078                 if self.params.get('dump_single_json', False):
 2079                     self.to_stdout(json.dumps(res))
 2080 
 2081         return self._download_retcode
 2082 
 2083     def download_with_info_file(self, info_filename):
 2084         with contextlib.closing(fileinput.FileInput(
 2085                 [info_filename], mode='r',
 2086                 openhook=fileinput.hook_encoded('utf-8'))) as f:
 2087             # FileInput doesn't have a read method, we can't call json.load
 2088             info = self.filter_requested_info(json.loads('\n'.join(f)))
 2089         try:
 2090             self.process_ie_result(info, download=True)
 2091         except DownloadError:
 2092             webpage_url = info.get('webpage_url')
 2093             if webpage_url is not None:
 2094                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
 2095                 return self.download([webpage_url])
 2096             else:
 2097                 raise
 2098         return self._download_retcode
 2099 
 2100     @staticmethod
 2101     def filter_requested_info(info_dict):
 2102         return dict(
 2103             (k, v) for k, v in info_dict.items()
 2104             if k not in ['requested_formats', 'requested_subtitles'])
 2105 
 2106     def post_process(self, filename, ie_info):
 2107         """Run all the postprocessors on the given file."""
 2108         info = dict(ie_info)
 2109         info['filepath'] = filename
 2110         pps_chain = []
 2111         if ie_info.get('__postprocessors') is not None:
 2112             pps_chain.extend(ie_info['__postprocessors'])
 2113         pps_chain.extend(self._pps)
 2114         for pp in pps_chain:
 2115             files_to_delete = []
 2116             try:
 2117                 files_to_delete, info = pp.run(info)
 2118             except PostProcessingError as e:
 2119                 self.report_error(e.msg)
 2120             if files_to_delete and not self.params.get('keepvideo', False):
 2121                 for old_filename in files_to_delete:
 2122                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
 2123                     try:
 2124                         os.remove(encodeFilename(old_filename))
 2125                     except (IOError, OSError):
 2126                         self.report_warning('Unable to remove downloaded original file')
 2127 
 2128     def _make_archive_id(self, info_dict):
 2129         video_id = info_dict.get('id')
 2130         if not video_id:
 2131             return
 2132         # Future-proof against any change in case
 2133         # and backwards compatibility with prior versions
 2134         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
 2135         if extractor is None:
 2136             url = str_or_none(info_dict.get('url'))
 2137             if not url:
 2138                 return
 2139             # Try to find matching extractor for the URL and take its ie_key
 2140             for ie in self._ies:
 2141                 if ie.suitable(url):
 2142                     extractor = ie.ie_key()
 2143                     break
 2144             else:
 2145                 return
 2146         return extractor.lower() + ' ' + video_id
 2147 
 2148     def in_download_archive(self, info_dict):
 2149         fn = self.params.get('download_archive')
 2150         if fn is None:
 2151             return False
 2152 
 2153         vid_id = self._make_archive_id(info_dict)
 2154         if not vid_id:
 2155             return False  # Incomplete video information
 2156 
 2157         try:
 2158             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 2159                 for line in archive_file:
 2160                     if line.strip() == vid_id:
 2161                         return True
 2162         except IOError as ioe:
 2163             if ioe.errno != errno.ENOENT:
 2164                 raise
 2165         return False
 2166 
 2167     def record_download_archive(self, info_dict):
 2168         fn = self.params.get('download_archive')
 2169         if fn is None:
 2170             return
 2171         vid_id = self._make_archive_id(info_dict)
 2172         assert vid_id
 2173         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 2174             archive_file.write(vid_id + '\n')
 2175 
 2176     @staticmethod
 2177     def format_resolution(format, default='unknown'):
 2178         if format.get('vcodec') == 'none':
 2179             return 'audio only'
 2180         if format.get('resolution') is not None:
 2181             return format['resolution']
 2182         if format.get('height') is not None:
 2183             if format.get('width') is not None:
 2184                 res = '%sx%s' % (format['width'], format['height'])
 2185             else:
 2186                 res = '%sp' % format['height']
 2187         elif format.get('width') is not None:
 2188             res = '%dx?' % format['width']
 2189         else:
 2190             res = default
 2191         return res
 2192 
 2193     def _format_note(self, fdict):
 2194         res = ''
 2195         if fdict.get('ext') in ['f4f', 'f4m']:
 2196             res += '(unsupported) '
 2197         if fdict.get('language'):
 2198             if res:
 2199                 res += ' '
 2200             res += '[%s] ' % fdict['language']
 2201         if fdict.get('format_note') is not None:
 2202             res += fdict['format_note'] + ' '
 2203         if fdict.get('tbr') is not None:
 2204             res += '%4dk ' % fdict['tbr']
 2205         if fdict.get('container') is not None:
 2206             if res:
 2207                 res += ', '
 2208             res += '%s container' % fdict['container']
 2209         if (fdict.get('vcodec') is not None
 2210                 and fdict.get('vcodec') != 'none'):
 2211             if res:
 2212                 res += ', '
 2213             res += fdict['vcodec']
 2214             if fdict.get('vbr') is not None:
 2215                 res += '@'
 2216         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
 2217             res += 'video@'
 2218         if fdict.get('vbr') is not None:
 2219             res += '%4dk' % fdict['vbr']
 2220         if fdict.get('fps') is not None:
 2221             if res:
 2222                 res += ', '
 2223             res += '%sfps' % fdict['fps']
 2224         if fdict.get('acodec') is not None:
 2225             if res:
 2226                 res += ', '
 2227             if fdict['acodec'] == 'none':
 2228                 res += 'video only'
 2229             else:
 2230                 res += '%-5s' % fdict['acodec']
 2231         elif fdict.get('abr') is not None:
 2232             if res:
 2233                 res += ', '
 2234             res += 'audio'
 2235         if fdict.get('abr') is not None:
 2236             res += '@%3dk' % fdict['abr']
 2237         if fdict.get('asr') is not None:
 2238             res += ' (%5dHz)' % fdict['asr']
 2239         if fdict.get('filesize') is not None:
 2240             if res:
 2241                 res += ', '
 2242             res += format_bytes(fdict['filesize'])
 2243         elif fdict.get('filesize_approx') is not None:
 2244             if res:
 2245                 res += ', '
 2246             res += '~' + format_bytes(fdict['filesize_approx'])
 2247         return res
 2248 
 2249     def list_formats(self, info_dict):
 2250         formats = info_dict.get('formats', [info_dict])
 2251         table = [
 2252             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
 2253             for f in formats
 2254             if f.get('preference') is None or f['preference'] >= -1000]
 2255         if len(formats) > 1:
 2256             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
 2257 
 2258         header_line = ['format code', 'extension', 'resolution', 'note']
 2259         self.to_screen(
 2260             '[info] Available formats for %s:\n%s' %
 2261             (info_dict['id'], render_table(header_line, table)))
 2262 
 2263     def list_thumbnails(self, info_dict):
 2264         thumbnails = info_dict.get('thumbnails')
 2265         if not thumbnails:
 2266             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
 2267             return
 2268 
 2269         self.to_screen(
 2270             '[info] Thumbnails for %s:' % info_dict['id'])
 2271         self.to_screen(render_table(
 2272             ['ID', 'width', 'height', 'URL'],
 2273             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 2274 
 2275     def list_subtitles(self, video_id, subtitles, name='subtitles'):
 2276         if not subtitles:
 2277             self.to_screen('%s has no %s' % (video_id, name))
 2278             return
 2279         self.to_screen(
 2280             'Available %s for %s:' % (name, video_id))
 2281         self.to_screen(render_table(
 2282             ['Language', 'formats'],
 2283             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
 2284                 for lang, formats in subtitles.items()]))
 2285 
 2286     def urlopen(self, req):
 2287         """ Start an HTTP download """
 2288         if isinstance(req, compat_basestring):
 2289             req = sanitized_Request(req)
 2290         return self._opener.open(req, timeout=self._socket_timeout)
 2291 
 2292     def print_debug_header(self):
 2293         if not self.params.get('verbose'):
 2294             return
 2295 
 2296         if type('') is not compat_str:
 2297             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
 2298             self.report_warning(
 2299                 'Your Python is broken! Update to a newer and supported version')
 2300 
 2301         stdout_encoding = getattr(
 2302             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
 2303         encoding_str = (
 2304             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
 2305                 locale.getpreferredencoding(),
 2306                 sys.getfilesystemencoding(),
 2307                 stdout_encoding,
 2308                 self.get_encoding()))
 2309         write_string(encoding_str, encoding=None)
 2310 
 2311         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
 2312         if _LAZY_LOADER:
 2313             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
 2314         try:
 2315             sp = subprocess.Popen(
 2316                 ['git', 'rev-parse', '--short', 'HEAD'],
 2317                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 2318                 cwd=os.path.dirname(os.path.abspath(__file__)))
 2319             out, err = process_communicate_or_kill(sp)
 2320             out = out.decode().strip()
 2321             if re.match('[0-9a-f]+', out):
 2322                 self._write_string('[debug] Git HEAD: ' + out + '\n')
 2323         except Exception:
 2324             try:
 2325                 sys.exc_clear()
 2326             except Exception:
 2327                 pass
 2328 
 2329         def python_implementation():
 2330             impl_name = platform.python_implementation()
 2331             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
 2332                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
 2333             return impl_name
 2334 
 2335         self._write_string('[debug] Python version %s (%s) - %s\n' % (
 2336             platform.python_version(), python_implementation(),
 2337             platform_name()))
 2338 
 2339         exe_versions = FFmpegPostProcessor.get_versions(self)
 2340         exe_versions['rtmpdump'] = rtmpdump_version()
 2341         exe_versions['phantomjs'] = PhantomJSwrapper._version()
 2342         exe_str = ', '.join(
 2343             '%s %s' % (exe, v)
 2344             for exe, v in sorted(exe_versions.items())
 2345             if v
 2346         )
 2347         if not exe_str:
 2348             exe_str = 'none'
 2349         self._write_string('[debug] exe versions: %s\n' % exe_str)
 2350 
 2351         proxy_map = {}
 2352         for handler in self._opener.handlers:
 2353             if hasattr(handler, 'proxies'):
 2354                 proxy_map.update(handler.proxies)
 2355         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
 2356 
 2357         if self.params.get('call_home', False):
 2358             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
 2359             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
 2360             latest_version = self.urlopen(
 2361                 'https://yt-dl.org/latest/version').read().decode('utf-8')
 2362             if version_tuple(latest_version) > version_tuple(__version__):
 2363                 self.report_warning(
 2364                     'You are using an outdated version (newest version: %s)! '
 2365                     'See https://yt-dl.org/update if you need help updating.' %
 2366                     latest_version)
 2367 
 2368     def _setup_opener(self):
 2369         timeout_val = self.params.get('socket_timeout')
 2370         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
 2371 
 2372         opts_cookiefile = self.params.get('cookiefile')
 2373         opts_proxy = self.params.get('proxy')
 2374 
 2375         if opts_cookiefile is None:
 2376             self.cookiejar = compat_cookiejar.CookieJar()
 2377         else:
 2378             opts_cookiefile = expand_path(opts_cookiefile)
 2379             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
 2380             if os.access(opts_cookiefile, os.R_OK):
 2381                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
 2382 
 2383         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
 2384         if opts_proxy is not None:
 2385             if opts_proxy == '':
 2386                 proxies = {}
 2387             else:
 2388                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 2389         else:
 2390             proxies = compat_urllib_request.getproxies()
 2391             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
 2392             if 'http' in proxies and 'https' not in proxies:
 2393                 proxies['https'] = proxies['http']
 2394         proxy_handler = PerRequestProxyHandler(proxies)
 2395 
 2396         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
 2397         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
 2398         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
 2399         redirect_handler = YoutubeDLRedirectHandler()
 2400         data_handler = compat_urllib_request_DataHandler()
 2401 
 2402         # When passing our own FileHandler instance, build_opener won't add the
 2403         # default FileHandler and allows us to disable the file protocol, which
 2404         # can be used for malicious purposes (see
 2405         # https://github.com/ytdl-org/youtube-dl/issues/8227)
 2406         file_handler = compat_urllib_request.FileHandler()
 2407 
 2408         def file_open(*args, **kwargs):
 2409             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
 2410         file_handler.file_open = file_open
 2411 
 2412         opener = compat_urllib_request.build_opener(
 2413             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
 2414 
 2415         # Delete the default user-agent header, which would otherwise apply in
 2416         # cases where our custom HTTP handler doesn't come into play
 2417         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
 2418         opener.addheaders = []
 2419         self._opener = opener
 2420 
 2421     def encode(self, s):
 2422         if isinstance(s, bytes):
 2423             return s  # Already encoded
 2424 
 2425         try:
 2426             return s.encode(self.get_encoding())
 2427         except UnicodeEncodeError as err:
 2428             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
 2429             raise
 2430 
 2431     def get_encoding(self):
 2432         encoding = self.params.get('encoding')
 2433         if encoding is None:
 2434             encoding = preferredencoding()
 2435         return encoding
 2436 
 2437     def _write_thumbnails(self, info_dict, filename):
 2438         if self.params.get('writethumbnail', False):
 2439             thumbnails = info_dict.get('thumbnails')
 2440             if thumbnails:
 2441                 thumbnails = [thumbnails[-1]]
 2442         elif self.params.get('write_all_thumbnails', False):
 2443             thumbnails = info_dict.get('thumbnails')
 2444         else:
 2445             return
 2446 
 2447         if not thumbnails:
 2448             # No thumbnails present, so return immediately
 2449             return
 2450 
 2451         for t in thumbnails:
 2452             thumb_ext = determine_ext(t['url'], 'jpg')
 2453             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
 2454             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
 2455             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
 2456 
 2457             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 2458                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
 2459                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2460             else:
 2461                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
 2462                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2463                 try:
 2464                     uf = self.urlopen(t['url'])
 2465                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
 2466                         shutil.copyfileobj(uf, thumbf)
 2467                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
 2468                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
 2469                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 2470                     self.report_warning('Unable to download thumbnail "%s": %s' %
 2471                                         (t['url'], error_to_compat_str(err)))

Generated by cgit