summaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
blob: 8e85465962e633f034c88a3c13923c330752f189 (plain)
    1 #!/usr/bin/env python
    2 # coding: utf-8
    3 
    4 from __future__ import absolute_import, unicode_literals
    5 
    6 import collections
    7 import contextlib
    8 import copy
    9 import datetime
   10 import errno
   11 import fileinput
   12 import io
   13 import itertools
   14 import json
   15 import locale
   16 import operator
   17 import os
   18 import platform
   19 import re
   20 import shutil
   21 import subprocess
   22 import socket
   23 import sys
   24 import time
   25 import tokenize
   26 import traceback
   27 import random
   28 
   29 from string import ascii_letters
   30 
   31 from .compat import (
   32     compat_basestring,
   33     compat_cookiejar,
   34     compat_get_terminal_size,
   35     compat_http_client,
   36     compat_kwargs,
   37     compat_numeric_types,
   38     compat_os_name,
   39     compat_str,
   40     compat_tokenize_tokenize,
   41     compat_urllib_error,
   42     compat_urllib_request,
   43     compat_urllib_request_DataHandler,
   44 )
   45 from .utils import (
   46     age_restricted,
   47     args_to_str,
   48     ContentTooShortError,
   49     date_from_str,
   50     DateRange,
   51     DEFAULT_OUTTMPL,
   52     determine_ext,
   53     determine_protocol,
   54     DownloadError,
   55     encode_compat_str,
   56     encodeFilename,
   57     error_to_compat_str,
   58     expand_path,
   59     ExtractorError,
   60     format_bytes,
   61     formatSeconds,
   62     GeoRestrictedError,
   63     int_or_none,
   64     ISO3166Utils,
   65     locked_file,
   66     make_HTTPS_handler,
   67     MaxDownloadsReached,
   68     orderedSet,
   69     PagedList,
   70     parse_filesize,
   71     PerRequestProxyHandler,
   72     platform_name,
   73     PostProcessingError,
   74     preferredencoding,
   75     prepend_extension,
   76     process_communicate_or_kill,
   77     register_socks_protocols,
   78     render_table,
   79     replace_extension,
   80     SameFileError,
   81     sanitize_filename,
   82     sanitize_path,
   83     sanitize_url,
   84     sanitized_Request,
   85     std_headers,
   86     str_or_none,
   87     subtitles_filename,
   88     UnavailableVideoError,
   89     url_basename,
   90     version_tuple,
   91     write_json_file,
   92     write_string,
   93     YoutubeDLCookieJar,
   94     YoutubeDLCookieProcessor,
   95     YoutubeDLHandler,
   96     YoutubeDLRedirectHandler,
   97 )
   98 from .cache import Cache
   99 from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
  100 from .extractor.openload import PhantomJSwrapper
  101 from .downloader import get_suitable_downloader
  102 from .downloader.rtmp import rtmpdump_version
  103 from .postprocessor import (
  104     FFmpegFixupM3u8PP,
  105     FFmpegFixupM4aPP,
  106     FFmpegFixupStretchedPP,
  107     FFmpegMergerPP,
  108     FFmpegPostProcessor,
  109     get_postprocessor,
  110 )
  111 from .version import __version__
  112 
  113 if compat_os_name == 'nt':
  114     import ctypes
  115 
  116 
  117 class YoutubeDL(object):
  118     """YoutubeDL class.
  119 
  120     YoutubeDL objects are the ones responsible of downloading the
  121     actual video file and writing it to disk if the user has requested
  122     it, among some other tasks. In most cases there should be one per
  123     program. As, given a video URL, the downloader doesn't know how to
  124     extract all the needed information, task that InfoExtractors do, it
  125     has to pass the URL to one of them.
  126 
  127     For this, YoutubeDL objects have a method that allows
  128     InfoExtractors to be registered in a given order. When it is passed
  129     a URL, the YoutubeDL object handles it to the first InfoExtractor it
  130     finds that reports being able to handle it. The InfoExtractor extracts
  131     all the information about the video or videos the URL refers to, and
  132     YoutubeDL process the extracted information, possibly using a File
  133     Downloader to download the video.
  134 
  135     YoutubeDL objects accept a lot of parameters. In order not to saturate
  136     the object constructor with arguments, it receives a dictionary of
  137     options instead. These options are available through the params
  138     attribute for the InfoExtractors to use. The YoutubeDL also
  139     registers itself as the downloader in charge for the InfoExtractors
  140     that are added to it, so this is a "mutual registration".
  141 
  142     Available options:
  143 
  144     username:          Username for authentication purposes.
  145     password:          Password for authentication purposes.
  146     videopassword:     Password for accessing a video.
  147     ap_mso:            Adobe Pass multiple-system operator identifier.
  148     ap_username:       Multiple-system operator account username.
  149     ap_password:       Multiple-system operator account password.
  150     usenetrc:          Use netrc for authentication instead.
  151     verbose:           Print additional info to stdout.
  152     quiet:             Do not print messages to stdout.
  153     no_warnings:       Do not print out anything for warnings.
  154     forceurl:          Force printing final URL.
  155     forcetitle:        Force printing title.
  156     forceid:           Force printing ID.
  157     forcethumbnail:    Force printing thumbnail URL.
  158     forcedescription:  Force printing description.
  159     forcefilename:     Force printing final filename.
  160     forceduration:     Force printing duration.
  161     forcejson:         Force printing info_dict as JSON.
  162     dump_single_json:  Force printing the info_dict of the whole playlist
  163                        (or video) as a single JSON line.
  164     simulate:          Do not download the video files.
  165     format:            Video format code. See options.py for more information.
  166     outtmpl:           Template for output names.
  167     outtmpl_na_placeholder: Placeholder for unavailable meta fields.
  168     restrictfilenames: Do not allow "&" and spaces in file names
  169     ignoreerrors:      Do not stop on download errors.
  170     force_generic_extractor: Force downloader to use the generic extractor
  171     nooverwrites:      Prevent overwriting files.
  172     playliststart:     Playlist item to start at.
  173     playlistend:       Playlist item to end at.
  174     playlist_items:    Specific indices of playlist to download.
  175     playlistreverse:   Download playlist items in reverse order.
  176     playlistrandom:    Download playlist items in random order.
  177     matchtitle:        Download only matching titles.
  178     rejecttitle:       Reject downloads for matching titles.
  179     logger:            Log messages to a logging.Logger instance.
  180     logtostderr:       Log messages to stderr instead of stdout.
  181     writedescription:  Write the video description to a .description file
  182     writeinfojson:     Write the video description to a .info.json file
  183     writeannotations:  Write the video annotations to a .annotations.xml file
  184     writethumbnail:    Write the thumbnail image to a file
  185     write_all_thumbnails:  Write all thumbnail formats to files
  186     writesubtitles:    Write the video subtitles to a file
  187     writeautomaticsub: Write the automatically generated subtitles to a file
  188     allsubtitles:      Downloads all the subtitles of the video
  189                        (requires writesubtitles or writeautomaticsub)
  190     listsubtitles:     Lists all available subtitles for the video
  191     subtitlesformat:   The format code for subtitles
  192     subtitleslangs:    List of languages of the subtitles to download
  193     keepvideo:         Keep the video file after post-processing
  194     daterange:         A DateRange object, download only if the upload_date is in the range.
  195     skip_download:     Skip the actual download of the video file
  196     cachedir:          Location of the cache files in the filesystem.
  197                        False to disable filesystem cache.
  198     noplaylist:        Download single video instead of a playlist if in doubt.
  199     age_limit:         An integer representing the user's age in years.
  200                        Unsuitable videos for the given age are skipped.
  201     min_views:         An integer representing the minimum view count the video
  202                        must have in order to not be skipped.
  203                        Videos without view count information are always
  204                        downloaded. None for no limit.
  205     max_views:         An integer representing the maximum view count.
  206                        Videos that are more popular than that are not
  207                        downloaded.
  208                        Videos without view count information are always
  209                        downloaded. None for no limit.
  210     download_archive:  File name of a file where all downloads are recorded.
  211                        Videos already present in the file are not downloaded
  212                        again.
  213     cookiefile:        File name where cookies should be read from and dumped to.
  214     nocheckcertificate:Do not verify SSL certificates
  215     prefer_insecure:   Use HTTP instead of HTTPS to retrieve information.
  216                        At the moment, this is only supported by YouTube.
  217     proxy:             URL of the proxy server to use
  218     geo_verification_proxy:  URL of the proxy to use for IP address verification
  219                        on geo-restricted sites.
  220     socket_timeout:    Time to wait for unresponsive hosts, in seconds
  221     bidi_workaround:   Work around buggy terminals without bidirectional text
  222                        support, using fridibi
  223     debug_printtraffic:Print out sent and received HTTP traffic
  224     include_ads:       Download ads as well
  225     default_search:    Prepend this string if an input url is not valid.
  226                        'auto' for elaborate guessing
  227     encoding:          Use this encoding instead of the system-specified.
  228     extract_flat:      Do not resolve URLs, return the immediate result.
  229                        Pass in 'in_playlist' to only show this behavior for
  230                        playlist items.
  231     postprocessors:    A list of dictionaries, each with an entry
  232                        * key:  The name of the postprocessor. See
  233                                youtube_dl/postprocessor/__init__.py for a list.
  234                        as well as any further keyword arguments for the
  235                        postprocessor.
  236     progress_hooks:    A list of functions that get called on download
  237                        progress, with a dictionary with the entries
  238                        * status: One of "downloading", "error", or "finished".
  239                                  Check this first and ignore unknown values.
  240 
  241                        If status is one of "downloading", or "finished", the
  242                        following properties may also be present:
  243                        * filename: The final filename (always present)
  244                        * tmpfilename: The filename we're currently writing to
  245                        * downloaded_bytes: Bytes on disk
  246                        * total_bytes: Size of the whole file, None if unknown
  247                        * total_bytes_estimate: Guess of the eventual file size,
  248                                                None if unavailable.
  249                        * elapsed: The number of seconds since download started.
  250                        * eta: The estimated time in seconds, None if unknown
  251                        * speed: The download speed in bytes/second, None if
  252                                 unknown
  253                        * fragment_index: The counter of the currently
  254                                          downloaded video fragment.
  255                        * fragment_count: The number of fragments (= individual
  256                                          files that will be merged)
  257 
  258                        Progress hooks are guaranteed to be called at least once
  259                        (with status "finished") if the download is successful.
  260     merge_output_format: Extension to use when merging formats.
  261     fixup:             Automatically correct known faults of the file.
  262                        One of:
  263                        - "never": do nothing
  264                        - "warn": only emit a warning
  265                        - "detect_or_warn": check whether we can do anything
  266                                            about it, warn otherwise (default)
  267     source_address:    Client-side IP address to bind to.
  268     call_home:         Boolean, true iff we are allowed to contact the
  269                        youtube-dl servers for debugging.
  270     sleep_interval:    Number of seconds to sleep before each download when
  271                        used alone or a lower bound of a range for randomized
  272                        sleep before each download (minimum possible number
  273                        of seconds to sleep) when used along with
  274                        max_sleep_interval.
  275     max_sleep_interval:Upper bound of a range for randomized sleep before each
  276                        download (maximum possible number of seconds to sleep).
  277                        Must only be used along with sleep_interval.
  278                        Actual sleep time will be a random float from range
  279                        [sleep_interval; max_sleep_interval].
  280     listformats:       Print an overview of available video formats and exit.
  281     list_thumbnails:   Print a table of all thumbnails and exit.
  282     match_filter:      A function that gets called with the info_dict of
  283                        every video.
  284                        If it returns a message, the video is ignored.
  285                        If it returns None, the video is downloaded.
  286                        match_filter_func in utils.py is one example for this.
  287     no_color:          Do not emit color codes in output.
  288     geo_bypass:        Bypass geographic restriction via faking X-Forwarded-For
  289                        HTTP header
  290     geo_bypass_country:
  291                        Two-letter ISO 3166-2 country code that will be used for
  292                        explicit geographic restriction bypassing via faking
  293                        X-Forwarded-For HTTP header
  294     geo_bypass_ip_block:
  295                        IP range in CIDR notation that will be used similarly to
  296                        geo_bypass_country
  297 
  298     The following options determine which downloader is picked:
  299     external_downloader: Executable of the external downloader to call.
  300                        None or unset for standard (built-in) downloader.
  301     hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
  302                        if True, otherwise use ffmpeg/avconv if False, otherwise
  303                        use downloader suggested by extractor if None.
  304 
  305     The following parameters are not used by YoutubeDL itself, they are used by
  306     the downloader (see youtube_dl/downloader/common.py):
  307     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
  308     noresizebuffer, retries, continuedl, noprogress, consoletitle,
  309     xattr_set_filesize, external_downloader_args, hls_use_mpegts,
  310     http_chunk_size.
  311 
  312     The following options are used by the post processors:
  313     prefer_ffmpeg:     If False, use avconv instead of ffmpeg if both are available,
  314                        otherwise prefer ffmpeg.
  315     ffmpeg_location:   Location of the ffmpeg/avconv binary; either the path
  316                        to the binary or its containing directory.
  317     postprocessor_args: A list of additional command-line arguments for the
  318                         postprocessor.
  319 
  320     The following options are used by the Youtube extractor:
  321     youtube_include_dash_manifest: If True (default), DASH manifests and related
  322                         data will be downloaded and processed by extractor.
  323                         You can reduce network I/O by disabling it if you don't
  324                         care about DASH.
  325     """
  326 
  327     _NUMERIC_FIELDS = set((
  328         'width', 'height', 'tbr', 'abr', 'asr', 'vbr', 'fps', 'filesize', 'filesize_approx',
  329         'timestamp', 'upload_year', 'upload_month', 'upload_day',
  330         'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count',
  331         'average_rating', 'comment_count', 'age_limit',
  332         'start_time', 'end_time',
  333         'chapter_number', 'season_number', 'episode_number',
  334         'track_number', 'disc_number', 'release_year',
  335         'playlist_index',
  336     ))
  337 
  338     params = None
  339     _ies = []
  340     _pps = []
  341     _download_retcode = None
  342     _num_downloads = None
  343     _playlist_level = 0
  344     _playlist_urls = set()
  345     _screen_file = None
  346 
  347     def __init__(self, params=None, auto_init=True):
  348         """Create a FileDownloader object with the given options."""
  349         if params is None:
  350             params = {}
  351         self._ies = []
  352         self._ies_instances = {}
  353         self._pps = []
  354         self._progress_hooks = []
  355         self._download_retcode = 0
  356         self._num_downloads = 0
  357         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  358         self._err_file = sys.stderr
  359         self.params = {
  360             # Default parameters
  361             'nocheckcertificate': False,
  362         }
  363         self.params.update(params)
  364         self.cache = Cache(self)
  365 
  366         def check_deprecated(param, option, suggestion):
  367             if self.params.get(param) is not None:
  368                 self.report_warning(
  369                     '%s is deprecated. Use %s instead.' % (option, suggestion))
  370                 return True
  371             return False
  372 
  373         if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
  374             if self.params.get('geo_verification_proxy') is None:
  375                 self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
  376 
  377         check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
  378         check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
  379         check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
  380 
  381         if params.get('bidi_workaround', False):
  382             try:
  383                 import pty
  384                 master, slave = pty.openpty()
  385                 width = compat_get_terminal_size().columns
  386                 if width is None:
  387                     width_args = []
  388                 else:
  389                     width_args = ['-w', str(width)]
  390                 sp_kwargs = dict(
  391                     stdin=subprocess.PIPE,
  392                     stdout=slave,
  393                     stderr=self._err_file)
  394                 try:
  395                     self._output_process = subprocess.Popen(
  396                         ['bidiv'] + width_args, **sp_kwargs
  397                     )
  398                 except OSError:
  399                     self._output_process = subprocess.Popen(
  400                         ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
  401                 self._output_channel = os.fdopen(master, 'rb')
  402             except OSError as ose:
  403                 if ose.errno == errno.ENOENT:
  404                     self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that  fribidi  is an executable file in one of the directories in your $PATH.')
  405                 else:
  406                     raise
  407 
  408         if (sys.platform != 'win32'
  409                 and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
  410                 and not params.get('restrictfilenames', False)):
  411             # Unicode filesystem API will throw errors (#1474, #13027)
  412             self.report_warning(
  413                 'Assuming --restrict-filenames since file system encoding '
  414                 'cannot encode all characters. '
  415                 'Set the LC_ALL environment variable to fix this.')
  416             self.params['restrictfilenames'] = True
  417 
  418         if isinstance(params.get('outtmpl'), bytes):
  419             self.report_warning(
  420                 'Parameter outtmpl is bytes, but should be a unicode string. '
  421                 'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
  422 
  423         self._setup_opener()
  424 
  425         if auto_init:
  426             self.print_debug_header()
  427             self.add_default_info_extractors()
  428 
  429         for pp_def_raw in self.params.get('postprocessors', []):
  430             pp_class = get_postprocessor(pp_def_raw['key'])
  431             pp_def = dict(pp_def_raw)
  432             del pp_def['key']
  433             pp = pp_class(self, **compat_kwargs(pp_def))
  434             self.add_post_processor(pp)
  435 
  436         for ph in self.params.get('progress_hooks', []):
  437             self.add_progress_hook(ph)
  438 
  439         register_socks_protocols()
  440 
  441     def warn_if_short_id(self, argv):
  442         # short YouTube ID starting with dash?
  443         idxs = [
  444             i for i, a in enumerate(argv)
  445             if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
  446         if idxs:
  447             correct_argv = (
  448                 ['youtube-dl']
  449                 + [a for i, a in enumerate(argv) if i not in idxs]
  450                 + ['--'] + [argv[i] for i in idxs]
  451             )
  452             self.report_warning(
  453                 'Long argument string detected. '
  454                 'Use -- to separate parameters and URLs, like this:\n%s\n' %
  455                 args_to_str(correct_argv))
  456 
  457     def add_info_extractor(self, ie):
  458         """Add an InfoExtractor object to the end of the list."""
  459         self._ies.append(ie)
  460         if not isinstance(ie, type):
  461             self._ies_instances[ie.ie_key()] = ie
  462             ie.set_downloader(self)
  463 
  464     def get_info_extractor(self, ie_key):
  465         """
  466         Get an instance of an IE with name ie_key, it will try to get one from
  467         the _ies list, if there's no instance it will create a new one and add
  468         it to the extractor list.
  469         """
  470         ie = self._ies_instances.get(ie_key)
  471         if ie is None:
  472             ie = get_info_extractor(ie_key)()
  473             self.add_info_extractor(ie)
  474         return ie
  475 
  476     def add_default_info_extractors(self):
  477         """
  478         Add the InfoExtractors returned by gen_extractors to the end of the list
  479         """
  480         for ie in gen_extractor_classes():
  481             self.add_info_extractor(ie)
  482 
  483     def add_post_processor(self, pp):
  484         """Add a PostProcessor object to the end of the chain."""
  485         self._pps.append(pp)
  486         pp.set_downloader(self)
  487 
  488     def add_progress_hook(self, ph):
  489         """Add the progress hook (currently only for the file downloader)"""
  490         self._progress_hooks.append(ph)
  491 
  492     def _bidi_workaround(self, message):
  493         if not hasattr(self, '_output_channel'):
  494             return message
  495 
  496         assert hasattr(self, '_output_process')
  497         assert isinstance(message, compat_str)
  498         line_count = message.count('\n') + 1
  499         self._output_process.stdin.write((message + '\n').encode('utf-8'))
  500         self._output_process.stdin.flush()
  501         res = ''.join(self._output_channel.readline().decode('utf-8')
  502                       for _ in range(line_count))
  503         return res[:-len('\n')]
  504 
  505     def to_screen(self, message, skip_eol=False):
  506         """Print message to stdout if not in quiet mode."""
  507         return self.to_stdout(message, skip_eol, check_quiet=True)
  508 
  509     def _write_string(self, s, out=None):
  510         write_string(s, out=out, encoding=self.params.get('encoding'))
  511 
  512     def to_stdout(self, message, skip_eol=False, check_quiet=False):
  513         """Print message to stdout if not in quiet mode."""
  514         if self.params.get('logger'):
  515             self.params['logger'].debug(message)
  516         elif not check_quiet or not self.params.get('quiet', False):
  517             message = self._bidi_workaround(message)
  518             terminator = ['\n', ''][skip_eol]
  519             output = message + terminator
  520 
  521             self._write_string(output, self._screen_file)
  522 
  523     def to_stderr(self, message):
  524         """Print message to stderr."""
  525         assert isinstance(message, compat_str)
  526         if self.params.get('logger'):
  527             self.params['logger'].error(message)
  528         else:
  529             message = self._bidi_workaround(message)
  530             output = message + '\n'
  531             self._write_string(output, self._err_file)
  532 
  533     def to_console_title(self, message):
  534         if not self.params.get('consoletitle', False):
  535             return
  536         if compat_os_name == 'nt':
  537             if ctypes.windll.kernel32.GetConsoleWindow():
  538                 # c_wchar_p() might not be necessary if `message` is
  539                 # already of type unicode()
  540                 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  541         elif 'TERM' in os.environ:
  542             self._write_string('\033]0;%s\007' % message, self._screen_file)
  543 
  544     def save_console_title(self):
  545         if not self.params.get('consoletitle', False):
  546             return
  547         if self.params.get('simulate', False):
  548             return
  549         if compat_os_name != 'nt' and 'TERM' in os.environ:
  550             # Save the title on stack
  551             self._write_string('\033[22;0t', self._screen_file)
  552 
  553     def restore_console_title(self):
  554         if not self.params.get('consoletitle', False):
  555             return
  556         if self.params.get('simulate', False):
  557             return
  558         if compat_os_name != 'nt' and 'TERM' in os.environ:
  559             # Restore the title from stack
  560             self._write_string('\033[23;0t', self._screen_file)
  561 
  562     def __enter__(self):
  563         self.save_console_title()
  564         return self
  565 
  566     def __exit__(self, *args):
  567         self.restore_console_title()
  568 
  569         if self.params.get('cookiefile') is not None:
  570             self.cookiejar.save(ignore_discard=True, ignore_expires=True)
  571 
  572     def trouble(self, message=None, tb=None):
  573         """Determine action to take when a download problem appears.
  574 
  575         Depending on if the downloader has been configured to ignore
  576         download errors or not, this method may throw an exception or
  577         not when errors are found, after printing the message.
  578 
  579         tb, if given, is additional traceback information.
  580         """
  581         if message is not None:
  582             self.to_stderr(message)
  583         if self.params.get('verbose'):
  584             if tb is None:
  585                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
  586                     tb = ''
  587                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  588                         tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
  589                     tb += encode_compat_str(traceback.format_exc())
  590                 else:
  591                     tb_data = traceback.format_list(traceback.extract_stack())
  592                     tb = ''.join(tb_data)
  593             self.to_stderr(tb)
  594         if not self.params.get('ignoreerrors', False):
  595             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  596                 exc_info = sys.exc_info()[1].exc_info
  597             else:
  598                 exc_info = sys.exc_info()
  599             raise DownloadError(message, exc_info)
  600         self._download_retcode = 1
  601 
  602     def report_warning(self, message):
  603         '''
  604         Print the message to stderr, it will be prefixed with 'WARNING:'
  605         If stderr is a tty file the 'WARNING:' will be colored
  606         '''
  607         if self.params.get('logger') is not None:
  608             self.params['logger'].warning(message)
  609         else:
  610             if self.params.get('no_warnings'):
  611                 return
  612             if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  613                 _msg_header = '\033[0;33mWARNING:\033[0m'
  614             else:
  615                 _msg_header = 'WARNING:'
  616             warning_message = '%s %s' % (_msg_header, message)
  617             self.to_stderr(warning_message)
  618 
  619     def report_error(self, message, tb=None):
  620         '''
  621         Do the same as trouble, but prefixes the message with 'ERROR:', colored
  622         in red if stderr is a tty file.
  623         '''
  624         if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
  625             _msg_header = '\033[0;31mERROR:\033[0m'
  626         else:
  627             _msg_header = 'ERROR:'
  628         error_message = '%s %s' % (_msg_header, message)
  629         self.trouble(error_message, tb)
  630 
  631     def report_file_already_downloaded(self, file_name):
  632         """Report file has already been fully downloaded."""
  633         try:
  634             self.to_screen('[download] %s has already been downloaded' % file_name)
  635         except UnicodeEncodeError:
  636             self.to_screen('[download] The file has already been downloaded')
  637 
  638     def prepare_filename(self, info_dict):
  639         """Generate the output filename."""
  640         try:
  641             template_dict = dict(info_dict)
  642 
  643             template_dict['epoch'] = int(time.time())
  644             autonumber_size = self.params.get('autonumber_size')
  645             if autonumber_size is None:
  646                 autonumber_size = 5
  647             template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
  648             if template_dict.get('resolution') is None:
  649                 if template_dict.get('width') and template_dict.get('height'):
  650                     template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
  651                 elif template_dict.get('height'):
  652                     template_dict['resolution'] = '%sp' % template_dict['height']
  653                 elif template_dict.get('width'):
  654                     template_dict['resolution'] = '%dx?' % template_dict['width']
  655 
  656             sanitize = lambda k, v: sanitize_filename(
  657                 compat_str(v),
  658                 restricted=self.params.get('restrictfilenames'),
  659                 is_id=(k == 'id' or k.endswith('_id')))
  660             template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
  661                                  for k, v in template_dict.items()
  662                                  if v is not None and not isinstance(v, (list, tuple, dict)))
  663             template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
  664 
  665             outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
  666 
  667             # For fields playlist_index and autonumber convert all occurrences
  668             # of %(field)s to %(field)0Nd for backward compatibility
  669             field_size_compat_map = {
  670                 'playlist_index': len(str(template_dict['n_entries'])),
  671                 'autonumber': autonumber_size,
  672             }
  673             FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
  674             mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
  675             if mobj:
  676                 outtmpl = re.sub(
  677                     FIELD_SIZE_COMPAT_RE,
  678                     r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
  679                     outtmpl)
  680 
  681             # Missing numeric fields used together with integer presentation types
  682             # in format specification will break the argument substitution since
  683             # string NA placeholder is returned for missing fields. We will patch
  684             # output template for missing fields to meet string presentation type.
  685             for numeric_field in self._NUMERIC_FIELDS:
  686                 if numeric_field not in template_dict:
  687                     # As of [1] format syntax is:
  688                     #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
  689                     # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
  690                     FORMAT_RE = r'''(?x)
  691                         (?<!%)
  692                         %
  693                         \({0}\)  # mapping key
  694                         (?:[#0\-+ ]+)?  # conversion flags (optional)
  695                         (?:\d+)?  # minimum field width (optional)
  696                         (?:\.\d+)?  # precision (optional)
  697                         [hlL]?  # length modifier (optional)
  698                         [diouxXeEfFgGcrs%]  # conversion type
  699                     '''
  700                     outtmpl = re.sub(
  701                         FORMAT_RE.format(numeric_field),
  702                         r'%({0})s'.format(numeric_field), outtmpl)
  703 
  704             # expand_path translates '%%' into '%' and '$$' into '$'
  705             # correspondingly that is not what we want since we need to keep
  706             # '%%' intact for template dict substitution step. Working around
  707             # with boundary-alike separator hack.
  708             sep = ''.join([random.choice(ascii_letters) for _ in range(32)])
  709             outtmpl = outtmpl.replace('%%', '%{0}%'.format(sep)).replace('$$', '${0}$'.format(sep))
  710 
  711             # outtmpl should be expand_path'ed before template dict substitution
  712             # because meta fields may contain env variables we don't want to
  713             # be expanded. For example, for outtmpl "%(title)s.%(ext)s" and
  714             # title "Hello $PATH", we don't want `$PATH` to be expanded.
  715             filename = expand_path(outtmpl).replace(sep, '') % template_dict
  716 
  717             # Temporary fix for #4787
  718             # 'Treat' all problem characters by passing filename through preferredencoding
  719             # to workaround encoding issues with subprocess on python2 @ Windows
  720             if sys.version_info < (3, 0) and sys.platform == 'win32':
  721                 filename = encodeFilename(filename, True).decode(preferredencoding())
  722             return sanitize_path(filename)
  723         except ValueError as err:
  724             self.report_error('Error in output template: ' + error_to_compat_str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
  725             return None
  726 
  727     def _match_entry(self, info_dict, incomplete):
  728         """ Returns None iff the file should be downloaded """
  729 
  730         video_title = info_dict.get('title', info_dict.get('id', 'video'))
  731         if 'title' in info_dict:
  732             # This can happen when we're just evaluating the playlist
  733             title = info_dict['title']
  734             matchtitle = self.params.get('matchtitle', False)
  735             if matchtitle:
  736                 if not re.search(matchtitle, title, re.IGNORECASE):
  737                     return '"' + title + '" title did not match pattern "' + matchtitle + '"'
  738             rejecttitle = self.params.get('rejecttitle', False)
  739             if rejecttitle:
  740                 if re.search(rejecttitle, title, re.IGNORECASE):
  741                     return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  742         date = info_dict.get('upload_date')
  743         if date is not None:
  744             dateRange = self.params.get('daterange', DateRange())
  745             if date not in dateRange:
  746                 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
  747         view_count = info_dict.get('view_count')
  748         if view_count is not None:
  749             min_views = self.params.get('min_views')
  750             if min_views is not None and view_count < min_views:
  751                 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
  752             max_views = self.params.get('max_views')
  753             if max_views is not None and view_count > max_views:
  754                 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
  755         if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
  756             return 'Skipping "%s" because it is age restricted' % video_title
  757         if self.in_download_archive(info_dict):
  758             return '%s has already been recorded in archive' % video_title
  759 
  760         if not incomplete:
  761             match_filter = self.params.get('match_filter')
  762             if match_filter is not None:
  763                 ret = match_filter(info_dict)
  764                 if ret is not None:
  765                     return ret
  766 
  767         return None
  768 
  769     @staticmethod
  770     def add_extra_info(info_dict, extra_info):
  771         '''Set the keys from extra_info in info dict if they are missing'''
  772         for key, value in extra_info.items():
  773             info_dict.setdefault(key, value)
  774 
  775     def extract_info(self, url, download=True, ie_key=None, extra_info={},
  776                      process=True, force_generic_extractor=False):
  777         """
  778         Return a list with a dictionary for each video extracted.
  779 
  780         Arguments:
  781         url -- URL to extract
  782 
  783         Keyword arguments:
  784         download -- whether to download videos during extraction
  785         ie_key -- extractor key hint
  786         extra_info -- dictionary containing the extra values to add to each result
  787         process -- whether to resolve all unresolved references (URLs, playlist items),
  788             must be True for download to work.
  789         force_generic_extractor -- force using the generic extractor
  790         """
  791 
  792         if not ie_key and force_generic_extractor:
  793             ie_key = 'Generic'
  794 
  795         if ie_key:
  796             ies = [self.get_info_extractor(ie_key)]
  797         else:
  798             ies = self._ies
  799 
  800         for ie in ies:
  801             if not ie.suitable(url):
  802                 continue
  803 
  804             ie = self.get_info_extractor(ie.ie_key())
  805             if not ie.working():
  806                 self.report_warning('The program functionality for this site has been marked as broken, '
  807                                     'and will probably not work.')
  808 
  809             return self.__extract_info(url, ie, download, extra_info, process)
  810         else:
  811             self.report_error('no suitable InfoExtractor for URL %s' % url)
  812 
  813     def __handle_extraction_exceptions(func):
  814         def wrapper(self, *args, **kwargs):
  815             try:
  816                 return func(self, *args, **kwargs)
  817             except GeoRestrictedError as e:
  818                 msg = e.msg
  819                 if e.countries:
  820                     msg += '\nThis video is available in %s.' % ', '.join(
  821                         map(ISO3166Utils.short2full, e.countries))
  822                 msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
  823                 self.report_error(msg)
  824             except ExtractorError as e:  # An error we somewhat expected
  825                 self.report_error(compat_str(e), e.format_traceback())
  826             except MaxDownloadsReached:
  827                 raise
  828             except Exception as e:
  829                 if self.params.get('ignoreerrors', False):
  830                     self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
  831                 else:
  832                     raise
  833         return wrapper
  834 
  835     @__handle_extraction_exceptions
  836     def __extract_info(self, url, ie, download, extra_info, process):
  837         ie_result = ie.extract(url)
  838         if ie_result is None:  # Finished already (backwards compatibility; listformats and friends should be moved here)
  839             return
  840         if isinstance(ie_result, list):
  841             # Backwards compatibility: old IE result format
  842             ie_result = {
  843                 '_type': 'compat_list',
  844                 'entries': ie_result,
  845             }
  846         self.add_default_extra_info(ie_result, ie, url)
  847         if process:
  848             return self.process_ie_result(ie_result, download, extra_info)
  849         else:
  850             return ie_result
  851 
  852     def add_default_extra_info(self, ie_result, ie, url):
  853         self.add_extra_info(ie_result, {
  854             'extractor': ie.IE_NAME,
  855             'webpage_url': url,
  856             'webpage_url_basename': url_basename(url),
  857             'extractor_key': ie.ie_key(),
  858         })
  859 
  860     def process_ie_result(self, ie_result, download=True, extra_info={}):
  861         """
  862         Take the result of the ie(may be modified) and resolve all unresolved
  863         references (URLs, playlist items).
  864 
  865         It will also download the videos if 'download'.
  866         Returns the resolved ie_result.
  867         """
  868         result_type = ie_result.get('_type', 'video')
  869 
  870         if result_type in ('url', 'url_transparent'):
  871             ie_result['url'] = sanitize_url(ie_result['url'])
  872             extract_flat = self.params.get('extract_flat', False)
  873             if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
  874                     or extract_flat is True):
  875                 self.__forced_printings(
  876                     ie_result, self.prepare_filename(ie_result),
  877                     incomplete=True)
  878                 return ie_result
  879 
  880         if result_type == 'video':
  881             self.add_extra_info(ie_result, extra_info)
  882             return self.process_video_result(ie_result, download=download)
  883         elif result_type == 'url':
  884             # We have to add extra_info to the results because it may be
  885             # contained in a playlist
  886             return self.extract_info(ie_result['url'],
  887                                      download,
  888                                      ie_key=ie_result.get('ie_key'),
  889                                      extra_info=extra_info)
  890         elif result_type == 'url_transparent':
  891             # Use the information from the embedding page
  892             info = self.extract_info(
  893                 ie_result['url'], ie_key=ie_result.get('ie_key'),
  894                 extra_info=extra_info, download=False, process=False)
  895 
  896             # extract_info may return None when ignoreerrors is enabled and
  897             # extraction failed with an error, don't crash and return early
  898             # in this case
  899             if not info:
  900                 return info
  901 
  902             force_properties = dict(
  903                 (k, v) for k, v in ie_result.items() if v is not None)
  904             for f in ('_type', 'url', 'id', 'extractor', 'extractor_key', 'ie_key'):
  905                 if f in force_properties:
  906                     del force_properties[f]
  907             new_result = info.copy()
  908             new_result.update(force_properties)
  909 
  910             # Extracted info may not be a video result (i.e.
  911             # info.get('_type', 'video') != video) but rather an url or
  912             # url_transparent. In such cases outer metadata (from ie_result)
  913             # should be propagated to inner one (info). For this to happen
  914             # _type of info should be overridden with url_transparent. This
  915             # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163.
  916             if new_result.get('_type') == 'url':
  917                 new_result['_type'] = 'url_transparent'
  918 
  919             return self.process_ie_result(
  920                 new_result, download=download, extra_info=extra_info)
  921         elif result_type in ('playlist', 'multi_video'):
  922             # Protect from infinite recursion due to recursively nested playlists
  923             # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
  924             webpage_url = ie_result['webpage_url']
  925             if webpage_url in self._playlist_urls:
  926                 self.to_screen(
  927                     '[download] Skipping already downloaded playlist: %s'
  928                     % ie_result.get('title') or ie_result.get('id'))
  929                 return
  930 
  931             self._playlist_level += 1
  932             self._playlist_urls.add(webpage_url)
  933             try:
  934                 return self.__process_playlist(ie_result, download)
  935             finally:
  936                 self._playlist_level -= 1
  937                 if not self._playlist_level:
  938                     self._playlist_urls.clear()
  939         elif result_type == 'compat_list':
  940             self.report_warning(
  941                 'Extractor %s returned a compat_list result. '
  942                 'It needs to be updated.' % ie_result.get('extractor'))
  943 
  944             def _fixup(r):
  945                 self.add_extra_info(
  946                     r,
  947                     {
  948                         'extractor': ie_result['extractor'],
  949                         'webpage_url': ie_result['webpage_url'],
  950                         'webpage_url_basename': url_basename(ie_result['webpage_url']),
  951                         'extractor_key': ie_result['extractor_key'],
  952                     }
  953                 )
  954                 return r
  955             ie_result['entries'] = [
  956                 self.process_ie_result(_fixup(r), download, extra_info)
  957                 for r in ie_result['entries']
  958             ]
  959             return ie_result
  960         else:
  961             raise Exception('Invalid result type: %s' % result_type)
  962 
  963     def __process_playlist(self, ie_result, download):
  964         # We process each entry in the playlist
  965         playlist = ie_result.get('title') or ie_result.get('id')
  966 
  967         self.to_screen('[download] Downloading playlist: %s' % playlist)
  968 
  969         playlist_results = []
  970 
  971         playliststart = self.params.get('playliststart', 1) - 1
  972         playlistend = self.params.get('playlistend')
  973         # For backwards compatibility, interpret -1 as whole list
  974         if playlistend == -1:
  975             playlistend = None
  976 
  977         playlistitems_str = self.params.get('playlist_items')
  978         playlistitems = None
  979         if playlistitems_str is not None:
  980             def iter_playlistitems(format):
  981                 for string_segment in format.split(','):
  982                     if '-' in string_segment:
  983                         start, end = string_segment.split('-')
  984                         for item in range(int(start), int(end) + 1):
  985                             yield int(item)
  986                     else:
  987                         yield int(string_segment)
  988             playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
  989 
  990         ie_entries = ie_result['entries']
  991 
  992         def make_playlistitems_entries(list_ie_entries):
  993             num_entries = len(list_ie_entries)
  994             return [
  995                 list_ie_entries[i - 1] for i in playlistitems
  996                 if -num_entries <= i - 1 < num_entries]
  997 
  998         def report_download(num_entries):
  999             self.to_screen(
 1000                 '[%s] playlist %s: Downloading %d videos' %
 1001                 (ie_result['extractor'], playlist, num_entries))
 1002 
 1003         if isinstance(ie_entries, list):
 1004             n_all_entries = len(ie_entries)
 1005             if playlistitems:
 1006                 entries = make_playlistitems_entries(ie_entries)
 1007             else:
 1008                 entries = ie_entries[playliststart:playlistend]
 1009             n_entries = len(entries)
 1010             self.to_screen(
 1011                 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
 1012                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
 1013         elif isinstance(ie_entries, PagedList):
 1014             if playlistitems:
 1015                 entries = []
 1016                 for item in playlistitems:
 1017                     entries.extend(ie_entries.getslice(
 1018                         item - 1, item
 1019                     ))
 1020             else:
 1021                 entries = ie_entries.getslice(
 1022                     playliststart, playlistend)
 1023             n_entries = len(entries)
 1024             report_download(n_entries)
 1025         else:  # iterable
 1026             if playlistitems:
 1027                 entries = make_playlistitems_entries(list(itertools.islice(
 1028                     ie_entries, 0, max(playlistitems))))
 1029             else:
 1030                 entries = list(itertools.islice(
 1031                     ie_entries, playliststart, playlistend))
 1032             n_entries = len(entries)
 1033             report_download(n_entries)
 1034 
 1035         if self.params.get('playlistreverse', False):
 1036             entries = entries[::-1]
 1037 
 1038         if self.params.get('playlistrandom', False):
 1039             random.shuffle(entries)
 1040 
 1041         x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
 1042 
 1043         for i, entry in enumerate(entries, 1):
 1044             self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
 1045             # This __x_forwarded_for_ip thing is a bit ugly but requires
 1046             # minimal changes
 1047             if x_forwarded_for:
 1048                 entry['__x_forwarded_for_ip'] = x_forwarded_for
 1049             extra = {
 1050                 'n_entries': n_entries,
 1051                 'playlist': playlist,
 1052                 'playlist_id': ie_result.get('id'),
 1053                 'playlist_title': ie_result.get('title'),
 1054                 'playlist_uploader': ie_result.get('uploader'),
 1055                 'playlist_uploader_id': ie_result.get('uploader_id'),
 1056                 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
 1057                 'extractor': ie_result['extractor'],
 1058                 'webpage_url': ie_result['webpage_url'],
 1059                 'webpage_url_basename': url_basename(ie_result['webpage_url']),
 1060                 'extractor_key': ie_result['extractor_key'],
 1061             }
 1062 
 1063             reason = self._match_entry(entry, incomplete=True)
 1064             if reason is not None:
 1065                 self.to_screen('[download] ' + reason)
 1066                 continue
 1067 
 1068             entry_result = self.__process_iterable_entry(entry, download, extra)
 1069             # TODO: skip failed (empty) entries?
 1070             playlist_results.append(entry_result)
 1071         ie_result['entries'] = playlist_results
 1072         self.to_screen('[download] Finished downloading playlist: %s' % playlist)
 1073         return ie_result
 1074 
 1075     @__handle_extraction_exceptions
 1076     def __process_iterable_entry(self, entry, download, extra_info):
 1077         return self.process_ie_result(
 1078             entry, download=download, extra_info=extra_info)
 1079 
 1080     def _build_format_filter(self, filter_spec):
 1081         " Returns a function to filter the formats according to the filter_spec "
 1082 
 1083         OPERATORS = {
 1084             '<': operator.lt,
 1085             '<=': operator.le,
 1086             '>': operator.gt,
 1087             '>=': operator.ge,
 1088             '=': operator.eq,
 1089             '!=': operator.ne,
 1090         }
 1091         operator_rex = re.compile(r'''(?x)\s*
 1092             (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
 1093             \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 1094             (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
 1095             $
 1096             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
 1097         m = operator_rex.search(filter_spec)
 1098         if m:
 1099             try:
 1100                 comparison_value = int(m.group('value'))
 1101             except ValueError:
 1102                 comparison_value = parse_filesize(m.group('value'))
 1103                 if comparison_value is None:
 1104                     comparison_value = parse_filesize(m.group('value') + 'B')
 1105                 if comparison_value is None:
 1106                     raise ValueError(
 1107                         'Invalid value %r in format specification %r' % (
 1108                             m.group('value'), filter_spec))
 1109             op = OPERATORS[m.group('op')]
 1110 
 1111         if not m:
 1112             STR_OPERATORS = {
 1113                 '=': operator.eq,
 1114                 '^=': lambda attr, value: attr.startswith(value),
 1115                 '$=': lambda attr, value: attr.endswith(value),
 1116                 '*=': lambda attr, value: value in attr,
 1117             }
 1118             str_operator_rex = re.compile(r'''(?x)
 1119                 \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id|language)
 1120                 \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
 1121                 \s*(?P<value>[a-zA-Z0-9._-]+)
 1122                 \s*$
 1123                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
 1124             m = str_operator_rex.search(filter_spec)
 1125             if m:
 1126                 comparison_value = m.group('value')
 1127                 str_op = STR_OPERATORS[m.group('op')]
 1128                 if m.group('negation'):
 1129                     op = lambda attr, value: not str_op(attr, value)
 1130                 else:
 1131                     op = str_op
 1132 
 1133         if not m:
 1134             raise ValueError('Invalid filter specification %r' % filter_spec)
 1135 
 1136         def _filter(f):
 1137             actual_value = f.get(m.group('key'))
 1138             if actual_value is None:
 1139                 return m.group('none_inclusive')
 1140             return op(actual_value, comparison_value)
 1141         return _filter
 1142 
 1143     def _default_format_spec(self, info_dict, download=True):
 1144 
 1145         def can_merge():
 1146             merger = FFmpegMergerPP(self)
 1147             return merger.available and merger.can_merge()
 1148 
 1149         def prefer_best():
 1150             if self.params.get('simulate', False):
 1151                 return False
 1152             if not download:
 1153                 return False
 1154             if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
 1155                 return True
 1156             if info_dict.get('is_live'):
 1157                 return True
 1158             if not can_merge():
 1159                 return True
 1160             return False
 1161 
 1162         req_format_list = ['bestvideo+bestaudio', 'best']
 1163         if prefer_best():
 1164             req_format_list.reverse()
 1165         return '/'.join(req_format_list)
 1166 
 1167     def build_format_selector(self, format_spec):
 1168         def syntax_error(note, start):
 1169             message = (
 1170                 'Invalid format specification: '
 1171                 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
 1172             return SyntaxError(message)
 1173 
 1174         PICKFIRST = 'PICKFIRST'
 1175         MERGE = 'MERGE'
 1176         SINGLE = 'SINGLE'
 1177         GROUP = 'GROUP'
 1178         FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
 1179 
 1180         def _parse_filter(tokens):
 1181             filter_parts = []
 1182             for type, string, start, _, _ in tokens:
 1183                 if type == tokenize.OP and string == ']':
 1184                     return ''.join(filter_parts)
 1185                 else:
 1186                     filter_parts.append(string)
 1187 
 1188         def _remove_unused_ops(tokens):
 1189             # Remove operators that we don't use and join them with the surrounding strings
 1190             # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
 1191             ALLOWED_OPS = ('/', '+', ',', '(', ')')
 1192             last_string, last_start, last_end, last_line = None, None, None, None
 1193             for type, string, start, end, line in tokens:
 1194                 if type == tokenize.OP and string == '[':
 1195                     if last_string:
 1196                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 1197                         last_string = None
 1198                     yield type, string, start, end, line
 1199                     # everything inside brackets will be handled by _parse_filter
 1200                     for type, string, start, end, line in tokens:
 1201                         yield type, string, start, end, line
 1202                         if type == tokenize.OP and string == ']':
 1203                             break
 1204                 elif type == tokenize.OP and string in ALLOWED_OPS:
 1205                     if last_string:
 1206                         yield tokenize.NAME, last_string, last_start, last_end, last_line
 1207                         last_string = None
 1208                     yield type, string, start, end, line
 1209                 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
 1210                     if not last_string:
 1211                         last_string = string
 1212                         last_start = start
 1213                         last_end = end
 1214                     else:
 1215                         last_string += string
 1216             if last_string:
 1217                 yield tokenize.NAME, last_string, last_start, last_end, last_line
 1218 
 1219         def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
 1220             selectors = []
 1221             current_selector = None
 1222             for type, string, start, _, _ in tokens:
 1223                 # ENCODING is only defined in python 3.x
 1224                 if type == getattr(tokenize, 'ENCODING', None):
 1225                     continue
 1226                 elif type in [tokenize.NAME, tokenize.NUMBER]:
 1227                     current_selector = FormatSelector(SINGLE, string, [])
 1228                 elif type == tokenize.OP:
 1229                     if string == ')':
 1230                         if not inside_group:
 1231                             # ')' will be handled by the parentheses group
 1232                             tokens.restore_last_token()
 1233                         break
 1234                     elif inside_merge and string in ['/', ',']:
 1235                         tokens.restore_last_token()
 1236                         break
 1237                     elif inside_choice and string == ',':
 1238                         tokens.restore_last_token()
 1239                         break
 1240                     elif string == ',':
 1241                         if not current_selector:
 1242                             raise syntax_error('"," must follow a format selector', start)
 1243                         selectors.append(current_selector)
 1244                         current_selector = None
 1245                     elif string == '/':
 1246                         if not current_selector:
 1247                             raise syntax_error('"/" must follow a format selector', start)
 1248                         first_choice = current_selector
 1249                         second_choice = _parse_format_selection(tokens, inside_choice=True)
 1250                         current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
 1251                     elif string == '[':
 1252                         if not current_selector:
 1253                             current_selector = FormatSelector(SINGLE, 'best', [])
 1254                         format_filter = _parse_filter(tokens)
 1255                         current_selector.filters.append(format_filter)
 1256                     elif string == '(':
 1257                         if current_selector:
 1258                             raise syntax_error('Unexpected "("', start)
 1259                         group = _parse_format_selection(tokens, inside_group=True)
 1260                         current_selector = FormatSelector(GROUP, group, [])
 1261                     elif string == '+':
 1262                         if inside_merge:
 1263                             raise syntax_error('Unexpected "+"', start)
 1264                         video_selector = current_selector
 1265                         audio_selector = _parse_format_selection(tokens, inside_merge=True)
 1266                         if not video_selector or not audio_selector:
 1267                             raise syntax_error('"+" must be between two format selectors', start)
 1268                         current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
 1269                     else:
 1270                         raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
 1271                 elif type == tokenize.ENDMARKER:
 1272                     break
 1273             if current_selector:
 1274                 selectors.append(current_selector)
 1275             return selectors
 1276 
 1277         def _build_selector_function(selector):
 1278             if isinstance(selector, list):
 1279                 fs = [_build_selector_function(s) for s in selector]
 1280 
 1281                 def selector_function(ctx):
 1282                     for f in fs:
 1283                         for format in f(ctx):
 1284                             yield format
 1285                 return selector_function
 1286             elif selector.type == GROUP:
 1287                 selector_function = _build_selector_function(selector.selector)
 1288             elif selector.type == PICKFIRST:
 1289                 fs = [_build_selector_function(s) for s in selector.selector]
 1290 
 1291                 def selector_function(ctx):
 1292                     for f in fs:
 1293                         picked_formats = list(f(ctx))
 1294                         if picked_formats:
 1295                             return picked_formats
 1296                     return []
 1297             elif selector.type == SINGLE:
 1298                 format_spec = selector.selector
 1299 
 1300                 def selector_function(ctx):
 1301                     formats = list(ctx['formats'])
 1302                     if not formats:
 1303                         return
 1304                     if format_spec == 'all':
 1305                         for f in formats:
 1306                             yield f
 1307                     elif format_spec in ['best', 'worst', None]:
 1308                         format_idx = 0 if format_spec == 'worst' else -1
 1309                         audiovideo_formats = [
 1310                             f for f in formats
 1311                             if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
 1312                         if audiovideo_formats:
 1313                             yield audiovideo_formats[format_idx]
 1314                         # for extractors with incomplete formats (audio only (soundcloud)
 1315                         # or video only (imgur)) we will fallback to best/worst
 1316                         # {video,audio}-only format
 1317                         elif ctx['incomplete_formats']:
 1318                             yield formats[format_idx]
 1319                     elif format_spec == 'bestaudio':
 1320                         audio_formats = [
 1321                             f for f in formats
 1322                             if f.get('vcodec') == 'none']
 1323                         if audio_formats:
 1324                             yield audio_formats[-1]
 1325                     elif format_spec == 'worstaudio':
 1326                         audio_formats = [
 1327                             f for f in formats
 1328                             if f.get('vcodec') == 'none']
 1329                         if audio_formats:
 1330                             yield audio_formats[0]
 1331                     elif format_spec == 'bestvideo':
 1332                         video_formats = [
 1333                             f for f in formats
 1334                             if f.get('acodec') == 'none']
 1335                         if video_formats:
 1336                             yield video_formats[-1]
 1337                     elif format_spec == 'worstvideo':
 1338                         video_formats = [
 1339                             f for f in formats
 1340                             if f.get('acodec') == 'none']
 1341                         if video_formats:
 1342                             yield video_formats[0]
 1343                     else:
 1344                         extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
 1345                         if format_spec in extensions:
 1346                             filter_f = lambda f: f['ext'] == format_spec
 1347                         else:
 1348                             filter_f = lambda f: f['format_id'] == format_spec
 1349                         matches = list(filter(filter_f, formats))
 1350                         if matches:
 1351                             yield matches[-1]
 1352             elif selector.type == MERGE:
 1353                 def _merge(formats_info):
 1354                     format_1, format_2 = [f['format_id'] for f in formats_info]
 1355                     # The first format must contain the video and the
 1356                     # second the audio
 1357                     if formats_info[0].get('vcodec') == 'none':
 1358                         self.report_error('The first format must '
 1359                                           'contain the video, try using '
 1360                                           '"-f %s+%s"' % (format_2, format_1))
 1361                         return
 1362                     # Formats must be opposite (video+audio)
 1363                     if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
 1364                         self.report_error(
 1365                             'Both formats %s and %s are video-only, you must specify "-f video+audio"'
 1366                             % (format_1, format_2))
 1367                         return
 1368                     output_ext = (
 1369                         formats_info[0]['ext']
 1370                         if self.params.get('merge_output_format') is None
 1371                         else self.params['merge_output_format'])
 1372                     return {
 1373                         'requested_formats': formats_info,
 1374                         'format': '%s+%s' % (formats_info[0].get('format'),
 1375                                              formats_info[1].get('format')),
 1376                         'format_id': '%s+%s' % (formats_info[0].get('format_id'),
 1377                                                 formats_info[1].get('format_id')),
 1378                         'width': formats_info[0].get('width'),
 1379                         'height': formats_info[0].get('height'),
 1380                         'resolution': formats_info[0].get('resolution'),
 1381                         'fps': formats_info[0].get('fps'),
 1382                         'vcodec': formats_info[0].get('vcodec'),
 1383                         'vbr': formats_info[0].get('vbr'),
 1384                         'stretched_ratio': formats_info[0].get('stretched_ratio'),
 1385                         'acodec': formats_info[1].get('acodec'),
 1386                         'abr': formats_info[1].get('abr'),
 1387                         'ext': output_ext,
 1388                     }
 1389                 video_selector, audio_selector = map(_build_selector_function, selector.selector)
 1390 
 1391                 def selector_function(ctx):
 1392                     for pair in itertools.product(
 1393                             video_selector(copy.deepcopy(ctx)), audio_selector(copy.deepcopy(ctx))):
 1394                         yield _merge(pair)
 1395 
 1396             filters = [self._build_format_filter(f) for f in selector.filters]
 1397 
 1398             def final_selector(ctx):
 1399                 ctx_copy = copy.deepcopy(ctx)
 1400                 for _filter in filters:
 1401                     ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats']))
 1402                 return selector_function(ctx_copy)
 1403             return final_selector
 1404 
 1405         stream = io.BytesIO(format_spec.encode('utf-8'))
 1406         try:
 1407             tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
 1408         except tokenize.TokenError:
 1409             raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
 1410 
 1411         class TokenIterator(object):
 1412             def __init__(self, tokens):
 1413                 self.tokens = tokens
 1414                 self.counter = 0
 1415 
 1416             def __iter__(self):
 1417                 return self
 1418 
 1419             def __next__(self):
 1420                 if self.counter >= len(self.tokens):
 1421                     raise StopIteration()
 1422                 value = self.tokens[self.counter]
 1423                 self.counter += 1
 1424                 return value
 1425 
 1426             next = __next__
 1427 
 1428             def restore_last_token(self):
 1429                 self.counter -= 1
 1430 
 1431         parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
 1432         return _build_selector_function(parsed_selector)
 1433 
 1434     def _calc_headers(self, info_dict):
 1435         res = std_headers.copy()
 1436 
 1437         add_headers = info_dict.get('http_headers')
 1438         if add_headers:
 1439             res.update(add_headers)
 1440 
 1441         cookies = self._calc_cookies(info_dict)
 1442         if cookies:
 1443             res['Cookie'] = cookies
 1444 
 1445         if 'X-Forwarded-For' not in res:
 1446             x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip')
 1447             if x_forwarded_for_ip:
 1448                 res['X-Forwarded-For'] = x_forwarded_for_ip
 1449 
 1450         return res
 1451 
 1452     def _calc_cookies(self, info_dict):
 1453         pr = sanitized_Request(info_dict['url'])
 1454         self.cookiejar.add_cookie_header(pr)
 1455         return pr.get_header('Cookie')
 1456 
 1457     def process_video_result(self, info_dict, download=True):
 1458         assert info_dict.get('_type', 'video') == 'video'
 1459 
 1460         if 'id' not in info_dict:
 1461             raise ExtractorError('Missing "id" field in extractor result')
 1462         if 'title' not in info_dict:
 1463             raise ExtractorError('Missing "title" field in extractor result')
 1464 
 1465         def report_force_conversion(field, field_not, conversion):
 1466             self.report_warning(
 1467                 '"%s" field is not %s - forcing %s conversion, there is an error in extractor'
 1468                 % (field, field_not, conversion))
 1469 
 1470         def sanitize_string_field(info, string_field):
 1471             field = info.get(string_field)
 1472             if field is None or isinstance(field, compat_str):
 1473                 return
 1474             report_force_conversion(string_field, 'a string', 'string')
 1475             info[string_field] = compat_str(field)
 1476 
 1477         def sanitize_numeric_fields(info):
 1478             for numeric_field in self._NUMERIC_FIELDS:
 1479                 field = info.get(numeric_field)
 1480                 if field is None or isinstance(field, compat_numeric_types):
 1481                     continue
 1482                 report_force_conversion(numeric_field, 'numeric', 'int')
 1483                 info[numeric_field] = int_or_none(field)
 1484 
 1485         sanitize_string_field(info_dict, 'id')
 1486         sanitize_numeric_fields(info_dict)
 1487 
 1488         if 'playlist' not in info_dict:
 1489             # It isn't part of a playlist
 1490             info_dict['playlist'] = None
 1491             info_dict['playlist_index'] = None
 1492 
 1493         thumbnails = info_dict.get('thumbnails')
 1494         if thumbnails is None:
 1495             thumbnail = info_dict.get('thumbnail')
 1496             if thumbnail:
 1497                 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
 1498         if thumbnails:
 1499             thumbnails.sort(key=lambda t: (
 1500                 t.get('preference') if t.get('preference') is not None else -1,
 1501                 t.get('width') if t.get('width') is not None else -1,
 1502                 t.get('height') if t.get('height') is not None else -1,
 1503                 t.get('id') if t.get('id') is not None else '', t.get('url')))
 1504             for i, t in enumerate(thumbnails):
 1505                 t['url'] = sanitize_url(t['url'])
 1506                 if t.get('width') and t.get('height'):
 1507                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
 1508                 if t.get('id') is None:
 1509                     t['id'] = '%d' % i
 1510 
 1511         if self.params.get('list_thumbnails'):
 1512             self.list_thumbnails(info_dict)
 1513             return
 1514 
 1515         thumbnail = info_dict.get('thumbnail')
 1516         if thumbnail:
 1517             info_dict['thumbnail'] = sanitize_url(thumbnail)
 1518         elif thumbnails:
 1519             info_dict['thumbnail'] = thumbnails[-1]['url']
 1520 
 1521         if 'display_id' not in info_dict and 'id' in info_dict:
 1522             info_dict['display_id'] = info_dict['id']
 1523 
 1524         for ts_key, date_key in (
 1525                 ('timestamp', 'upload_date'),
 1526                 ('release_timestamp', 'release_date'),
 1527         ):
 1528             if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
 1529                 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
 1530                 # see http://bugs.python.org/issue1646728)
 1531                 try:
 1532                     upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
 1533                     info_dict[date_key] = compat_str(upload_date.strftime('%Y%m%d'))
 1534                 except (ValueError, OverflowError, OSError):
 1535                     pass
 1536 
 1537         # Auto generate title fields corresponding to the *_number fields when missing
 1538         # in order to always have clean titles. This is very common for TV series.
 1539         for field in ('chapter', 'season', 'episode'):
 1540             if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
 1541                 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
 1542 
 1543         for cc_kind in ('subtitles', 'automatic_captions'):
 1544             cc = info_dict.get(cc_kind)
 1545             if cc:
 1546                 for _, subtitle in cc.items():
 1547                     for subtitle_format in subtitle:
 1548                         if subtitle_format.get('url'):
 1549                             subtitle_format['url'] = sanitize_url(subtitle_format['url'])
 1550                         if subtitle_format.get('ext') is None:
 1551                             subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
 1552 
 1553         automatic_captions = info_dict.get('automatic_captions')
 1554         subtitles = info_dict.get('subtitles')
 1555 
 1556         if self.params.get('listsubtitles', False):
 1557             if 'automatic_captions' in info_dict:
 1558                 self.list_subtitles(
 1559                     info_dict['id'], automatic_captions, 'automatic captions')
 1560             self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
 1561             return
 1562 
 1563         info_dict['requested_subtitles'] = self.process_subtitles(
 1564             info_dict['id'], subtitles, automatic_captions)
 1565 
 1566         # We now pick which formats have to be downloaded
 1567         if info_dict.get('formats') is None:
 1568             # There's only one format available
 1569             formats = [info_dict]
 1570         else:
 1571             formats = info_dict['formats']
 1572 
 1573         def is_wellformed(f):
 1574             url = f.get('url')
 1575             if not url:
 1576                 self.report_warning(
 1577                     '"url" field is missing or empty - skipping format, '
 1578                     'there is an error in extractor')
 1579                 return False
 1580             if isinstance(url, bytes):
 1581                 sanitize_string_field(f, 'url')
 1582             return True
 1583 
 1584         # Filter out malformed formats for better extraction robustness
 1585         formats = list(filter(is_wellformed, formats or []))
 1586 
 1587         if not formats:
 1588             raise ExtractorError('No video formats found!')
 1589 
 1590         formats_dict = {}
 1591 
 1592         # We check that all the formats have the format and format_id fields
 1593         for i, format in enumerate(formats):
 1594             sanitize_string_field(format, 'format_id')
 1595             sanitize_numeric_fields(format)
 1596             format['url'] = sanitize_url(format['url'])
 1597             if not format.get('format_id'):
 1598                 format['format_id'] = compat_str(i)
 1599             else:
 1600                 # Sanitize format_id from characters used in format selector expression
 1601                 format['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', format['format_id'])
 1602             format_id = format['format_id']
 1603             if format_id not in formats_dict:
 1604                 formats_dict[format_id] = []
 1605             formats_dict[format_id].append(format)
 1606 
 1607         # Make sure all formats have unique format_id
 1608         for format_id, ambiguous_formats in formats_dict.items():
 1609             if len(ambiguous_formats) > 1:
 1610                 for i, format in enumerate(ambiguous_formats):
 1611                     format['format_id'] = '%s-%d' % (format_id, i)
 1612 
 1613         for i, format in enumerate(formats):
 1614             if format.get('format') is None:
 1615                 format['format'] = '{id} - {res}{note}'.format(
 1616                     id=format['format_id'],
 1617                     res=self.format_resolution(format),
 1618                     note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
 1619                 )
 1620             # Automatically determine file extension if missing
 1621             if format.get('ext') is None:
 1622                 format['ext'] = determine_ext(format['url']).lower()
 1623             # Automatically determine protocol if missing (useful for format
 1624             # selection purposes)
 1625             if format.get('protocol') is None:
 1626                 format['protocol'] = determine_protocol(format)
 1627             # Add HTTP headers, so that external programs can use them from the
 1628             # json output
 1629             full_format_info = info_dict.copy()
 1630             full_format_info.update(format)
 1631             format['http_headers'] = self._calc_headers(full_format_info)
 1632         # Remove private housekeeping stuff
 1633         if '__x_forwarded_for_ip' in info_dict:
 1634             del info_dict['__x_forwarded_for_ip']
 1635 
 1636         # TODO Central sorting goes here
 1637 
 1638         if formats[0] is not info_dict:
 1639             # only set the 'formats' fields if the original info_dict list them
 1640             # otherwise we end up with a circular reference, the first (and unique)
 1641             # element in the 'formats' field in info_dict is info_dict itself,
 1642             # which can't be exported to json
 1643             info_dict['formats'] = formats
 1644         if self.params.get('listformats'):
 1645             self.list_formats(info_dict)
 1646             return
 1647 
 1648         req_format = self.params.get('format')
 1649         if req_format is None:
 1650             req_format = self._default_format_spec(info_dict, download=download)
 1651             if self.params.get('verbose'):
 1652                 self._write_string('[debug] Default format spec: %s\n' % req_format)
 1653 
 1654         format_selector = self.build_format_selector(req_format)
 1655 
 1656         # While in format selection we may need to have an access to the original
 1657         # format set in order to calculate some metrics or do some processing.
 1658         # For now we need to be able to guess whether original formats provided
 1659         # by extractor are incomplete or not (i.e. whether extractor provides only
 1660         # video-only or audio-only formats) for proper formats selection for
 1661         # extractors with such incomplete formats (see
 1662         # https://github.com/ytdl-org/youtube-dl/pull/5556).
 1663         # Since formats may be filtered during format selection and may not match
 1664         # the original formats the results may be incorrect. Thus original formats
 1665         # or pre-calculated metrics should be passed to format selection routines
 1666         # as well.
 1667         # We will pass a context object containing all necessary additional data
 1668         # instead of just formats.
 1669         # This fixes incorrect format selection issue (see
 1670         # https://github.com/ytdl-org/youtube-dl/issues/10083).
 1671         incomplete_formats = (
 1672             # All formats are video-only or
 1673             all(f.get('vcodec') != 'none' and f.get('acodec') == 'none' for f in formats)
 1674             # all formats are audio-only
 1675             or all(f.get('vcodec') == 'none' and f.get('acodec') != 'none' for f in formats))
 1676 
 1677         ctx = {
 1678             'formats': formats,
 1679             'incomplete_formats': incomplete_formats,
 1680         }
 1681 
 1682         formats_to_download = list(format_selector(ctx))
 1683         if not formats_to_download:
 1684             raise ExtractorError('requested format not available',
 1685                                  expected=True)
 1686 
 1687         if download:
 1688             if len(formats_to_download) > 1:
 1689                 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
 1690             for format in formats_to_download:
 1691                 new_info = dict(info_dict)
 1692                 new_info.update(format)
 1693                 self.process_info(new_info)
 1694         # We update the info dict with the best quality format (backwards compatibility)
 1695         info_dict.update(formats_to_download[-1])
 1696         return info_dict
 1697 
 1698     def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
 1699         """Select the requested subtitles and their format"""
 1700         available_subs = {}
 1701         if normal_subtitles and self.params.get('writesubtitles'):
 1702             available_subs.update(normal_subtitles)
 1703         if automatic_captions and self.params.get('writeautomaticsub'):
 1704             for lang, cap_info in automatic_captions.items():
 1705                 if lang not in available_subs:
 1706                     available_subs[lang] = cap_info
 1707 
 1708         if (not self.params.get('writesubtitles') and not
 1709                 self.params.get('writeautomaticsub') or not
 1710                 available_subs):
 1711             return None
 1712 
 1713         if self.params.get('allsubtitles', False):
 1714             requested_langs = available_subs.keys()
 1715         else:
 1716             if self.params.get('subtitleslangs', False):
 1717                 requested_langs = self.params.get('subtitleslangs')
 1718             elif 'en' in available_subs:
 1719                 requested_langs = ['en']
 1720             else:
 1721                 requested_langs = [list(available_subs.keys())[0]]
 1722 
 1723         formats_query = self.params.get('subtitlesformat', 'best')
 1724         formats_preference = formats_query.split('/') if formats_query else []
 1725         subs = {}
 1726         for lang in requested_langs:
 1727             formats = available_subs.get(lang)
 1728             if formats is None:
 1729                 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
 1730                 continue
 1731             for ext in formats_preference:
 1732                 if ext == 'best':
 1733                     f = formats[-1]
 1734                     break
 1735                 matches = list(filter(lambda f: f['ext'] == ext, formats))
 1736                 if matches:
 1737                     f = matches[-1]
 1738                     break
 1739             else:
 1740                 f = formats[-1]
 1741                 self.report_warning(
 1742                     'No subtitle format found matching "%s" for language %s, '
 1743                     'using %s' % (formats_query, lang, f['ext']))
 1744             subs[lang] = f
 1745         return subs
 1746 
 1747     def __forced_printings(self, info_dict, filename, incomplete):
 1748         def print_mandatory(field):
 1749             if (self.params.get('force%s' % field, False)
 1750                     and (not incomplete or info_dict.get(field) is not None)):
 1751                 self.to_stdout(info_dict[field])
 1752 
 1753         def print_optional(field):
 1754             if (self.params.get('force%s' % field, False)
 1755                     and info_dict.get(field) is not None):
 1756                 self.to_stdout(info_dict[field])
 1757 
 1758         print_mandatory('title')
 1759         print_mandatory('id')
 1760         if self.params.get('forceurl', False) and not incomplete:
 1761             if info_dict.get('requested_formats') is not None:
 1762                 for f in info_dict['requested_formats']:
 1763                     self.to_stdout(f['url'] + f.get('play_path', ''))
 1764             else:
 1765                 # For RTMP URLs, also include the playpath
 1766                 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
 1767         print_optional('thumbnail')
 1768         print_optional('description')
 1769         if self.params.get('forcefilename', False) and filename is not None:
 1770             self.to_stdout(filename)
 1771         if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
 1772             self.to_stdout(formatSeconds(info_dict['duration']))
 1773         print_mandatory('format')
 1774         if self.params.get('forcejson', False):
 1775             self.to_stdout(json.dumps(info_dict))
 1776 
 1777     def process_info(self, info_dict):
 1778         """Process a single resolved IE result."""
 1779 
 1780         assert info_dict.get('_type', 'video') == 'video'
 1781 
 1782         max_downloads = int_or_none(self.params.get('max_downloads')) or float('inf')
 1783         if self._num_downloads >= max_downloads:
 1784             raise MaxDownloadsReached()
 1785 
 1786         # TODO: backward compatibility, to be removed
 1787         info_dict['fulltitle'] = info_dict['title']
 1788 
 1789         if 'format' not in info_dict:
 1790             info_dict['format'] = info_dict['ext']
 1791 
 1792         reason = self._match_entry(info_dict, incomplete=False)
 1793         if reason is not None:
 1794             self.to_screen('[download] ' + reason)
 1795             return
 1796 
 1797         self._num_downloads += 1
 1798 
 1799         info_dict['_filename'] = filename = self.prepare_filename(info_dict)
 1800 
 1801         # Forced printings
 1802         self.__forced_printings(info_dict, filename, incomplete=False)
 1803 
 1804         # Do nothing else if in simulate mode
 1805         if self.params.get('simulate', False):
 1806             return
 1807 
 1808         if filename is None:
 1809             return
 1810 
 1811         def ensure_dir_exists(path):
 1812             try:
 1813                 dn = os.path.dirname(path)
 1814                 if dn and not os.path.exists(dn):
 1815                     os.makedirs(dn)
 1816                 return True
 1817             except (OSError, IOError) as err:
 1818                 if isinstance(err, OSError) and err.errno == errno.EEXIST:
 1819                     return True
 1820                 self.report_error('unable to create directory ' + error_to_compat_str(err))
 1821                 return False
 1822 
 1823         if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
 1824             return
 1825 
 1826         if self.params.get('writedescription', False):
 1827             descfn = replace_extension(filename, 'description', info_dict.get('ext'))
 1828             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
 1829                 self.to_screen('[info] Video description is already present')
 1830             elif info_dict.get('description') is None:
 1831                 self.report_warning('There\'s no description to write.')
 1832             else:
 1833                 try:
 1834                     self.to_screen('[info] Writing video description to: ' + descfn)
 1835                     with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
 1836                         descfile.write(info_dict['description'])
 1837                 except (OSError, IOError):
 1838                     self.report_error('Cannot write description file ' + descfn)
 1839                     return
 1840 
 1841         if self.params.get('writeannotations', False):
 1842             annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
 1843             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
 1844                 self.to_screen('[info] Video annotations are already present')
 1845             elif not info_dict.get('annotations'):
 1846                 self.report_warning('There are no annotations to write.')
 1847             else:
 1848                 try:
 1849                     self.to_screen('[info] Writing video annotations to: ' + annofn)
 1850                     with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
 1851                         annofile.write(info_dict['annotations'])
 1852                 except (KeyError, TypeError):
 1853                     self.report_warning('There are no annotations to write.')
 1854                 except (OSError, IOError):
 1855                     self.report_error('Cannot write annotations file: ' + annofn)
 1856                     return
 1857 
 1858         subtitles_are_requested = any([self.params.get('writesubtitles', False),
 1859                                        self.params.get('writeautomaticsub')])
 1860 
 1861         if subtitles_are_requested and info_dict.get('requested_subtitles'):
 1862             # subtitles download errors are already managed as troubles in relevant IE
 1863             # that way it will silently go on when used with unsupporting IE
 1864             subtitles = info_dict['requested_subtitles']
 1865             ie = self.get_info_extractor(info_dict['extractor_key'])
 1866             for sub_lang, sub_info in subtitles.items():
 1867                 sub_format = sub_info['ext']
 1868                 sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
 1869                 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
 1870                     self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
 1871                 else:
 1872                     self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
 1873                     if sub_info.get('data') is not None:
 1874                         try:
 1875                             # Use newline='' to prevent conversion of newline characters
 1876                             # See https://github.com/ytdl-org/youtube-dl/issues/10268
 1877                             with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
 1878                                 subfile.write(sub_info['data'])
 1879                         except (OSError, IOError):
 1880                             self.report_error('Cannot write subtitles file ' + sub_filename)
 1881                             return
 1882                     else:
 1883                         try:
 1884                             sub_data = ie._request_webpage(
 1885                                 sub_info['url'], info_dict['id'], note=False).read()
 1886                             with io.open(encodeFilename(sub_filename), 'wb') as subfile:
 1887                                 subfile.write(sub_data)
 1888                         except (ExtractorError, IOError, OSError, ValueError) as err:
 1889                             self.report_warning('Unable to download subtitle for "%s": %s' %
 1890                                                 (sub_lang, error_to_compat_str(err)))
 1891                             continue
 1892 
 1893         if self.params.get('writeinfojson', False):
 1894             infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
 1895             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
 1896                 self.to_screen('[info] Video description metadata is already present')
 1897             else:
 1898                 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
 1899                 try:
 1900                     write_json_file(self.filter_requested_info(info_dict), infofn)
 1901                 except (OSError, IOError):
 1902                     self.report_error('Cannot write metadata to JSON file ' + infofn)
 1903                     return
 1904 
 1905         self._write_thumbnails(info_dict, filename)
 1906 
 1907         if not self.params.get('skip_download', False):
 1908             try:
 1909                 def checked_get_suitable_downloader(info_dict, params):
 1910                     ed_args = params.get('external_downloader_args')
 1911                     dler = get_suitable_downloader(info_dict, params)
 1912                     if ed_args and not params.get('external_downloader_args'):
 1913                         # external_downloader_args was cleared because external_downloader was rejected
 1914                         self.report_warning('Requested external downloader cannot be used: '
 1915                                             'ignoring --external-downloader-args.')
 1916                     return dler
 1917 
 1918                 def dl(name, info):
 1919                     fd = checked_get_suitable_downloader(info, self.params)(self, self.params)
 1920                     for ph in self._progress_hooks:
 1921                         fd.add_progress_hook(ph)
 1922                     if self.params.get('verbose'):
 1923                         self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
 1924                     return fd.download(name, info)
 1925 
 1926                 if info_dict.get('requested_formats') is not None:
 1927                     downloaded = []
 1928                     success = True
 1929                     merger = FFmpegMergerPP(self)
 1930                     if not merger.available:
 1931                         postprocessors = []
 1932                         self.report_warning('You have requested multiple '
 1933                                             'formats but ffmpeg or avconv are not installed.'
 1934                                             ' The formats won\'t be merged.')
 1935                     else:
 1936                         postprocessors = [merger]
 1937 
 1938                     def compatible_formats(formats):
 1939                         video, audio = formats
 1940                         # Check extension
 1941                         video_ext, audio_ext = video.get('ext'), audio.get('ext')
 1942                         if video_ext and audio_ext:
 1943                             COMPATIBLE_EXTS = (
 1944                                 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma'),
 1945                                 ('webm')
 1946                             )
 1947                             for exts in COMPATIBLE_EXTS:
 1948                                 if video_ext in exts and audio_ext in exts:
 1949                                     return True
 1950                         # TODO: Check acodec/vcodec
 1951                         return False
 1952 
 1953                     filename_real_ext = os.path.splitext(filename)[1][1:]
 1954                     filename_wo_ext = (
 1955                         os.path.splitext(filename)[0]
 1956                         if filename_real_ext == info_dict['ext']
 1957                         else filename)
 1958                     requested_formats = info_dict['requested_formats']
 1959                     if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
 1960                         info_dict['ext'] = 'mkv'
 1961                         self.report_warning(
 1962                             'Requested formats are incompatible for merge and will be merged into mkv.')
 1963                     # Ensure filename always has a correct extension for successful merge
 1964                     filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
 1965                     if os.path.exists(encodeFilename(filename)):
 1966                         self.to_screen(
 1967                             '[download] %s has already been downloaded and '
 1968                             'merged' % filename)
 1969                     else:
 1970                         for f in requested_formats:
 1971                             new_info = dict(info_dict)
 1972                             new_info.update(f)
 1973                             fname = prepend_extension(
 1974                                 self.prepare_filename(new_info),
 1975                                 'f%s' % f['format_id'], new_info['ext'])
 1976                             if not ensure_dir_exists(fname):
 1977                                 return
 1978                             downloaded.append(fname)
 1979                             partial_success = dl(fname, new_info)
 1980                             success = success and partial_success
 1981                         info_dict['__postprocessors'] = postprocessors
 1982                         info_dict['__files_to_merge'] = downloaded
 1983                 else:
 1984                     # Just a single file
 1985                     success = dl(filename, info_dict)
 1986             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 1987                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
 1988                 return
 1989             except (OSError, IOError) as err:
 1990                 raise UnavailableVideoError(err)
 1991             except (ContentTooShortError, ) as err:
 1992                 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 1993                 return
 1994 
 1995             if success and filename != '-':
 1996                 # Fixup content
 1997                 fixup_policy = self.params.get('fixup')
 1998                 if fixup_policy is None:
 1999                     fixup_policy = 'detect_or_warn'
 2000 
 2001                 INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
 2002 
 2003                 stretched_ratio = info_dict.get('stretched_ratio')
 2004                 if stretched_ratio is not None and stretched_ratio != 1:
 2005                     if fixup_policy == 'warn':
 2006                         self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
 2007                             info_dict['id'], stretched_ratio))
 2008                     elif fixup_policy == 'detect_or_warn':
 2009                         stretched_pp = FFmpegFixupStretchedPP(self)
 2010                         if stretched_pp.available:
 2011                             info_dict.setdefault('__postprocessors', [])
 2012                             info_dict['__postprocessors'].append(stretched_pp)
 2013                         else:
 2014                             self.report_warning(
 2015                                 '%s: Non-uniform pixel ratio (%s). %s'
 2016                                 % (info_dict['id'], stretched_ratio, INSTALL_FFMPEG_MESSAGE))
 2017                     else:
 2018                         assert fixup_policy in ('ignore', 'never')
 2019 
 2020                 if (info_dict.get('requested_formats') is None
 2021                         and info_dict.get('container') == 'm4a_dash'):
 2022                     if fixup_policy == 'warn':
 2023                         self.report_warning(
 2024                             '%s: writing DASH m4a. '
 2025                             'Only some players support this container.'
 2026                             % info_dict['id'])
 2027                     elif fixup_policy == 'detect_or_warn':
 2028                         fixup_pp = FFmpegFixupM4aPP(self)
 2029                         if fixup_pp.available:
 2030                             info_dict.setdefault('__postprocessors', [])
 2031                             info_dict['__postprocessors'].append(fixup_pp)
 2032                         else:
 2033                             self.report_warning(
 2034                                 '%s: writing DASH m4a. '
 2035                                 'Only some players support this container. %s'
 2036                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 2037                     else:
 2038                         assert fixup_policy in ('ignore', 'never')
 2039 
 2040                 if (info_dict.get('protocol') == 'm3u8_native'
 2041                         or info_dict.get('protocol') == 'm3u8'
 2042                         and self.params.get('hls_prefer_native')):
 2043                     if fixup_policy == 'warn':
 2044                         self.report_warning('%s: malformed AAC bitstream detected.' % (
 2045                             info_dict['id']))
 2046                     elif fixup_policy == 'detect_or_warn':
 2047                         fixup_pp = FFmpegFixupM3u8PP(self)
 2048                         if fixup_pp.available:
 2049                             info_dict.setdefault('__postprocessors', [])
 2050                             info_dict['__postprocessors'].append(fixup_pp)
 2051                         else:
 2052                             self.report_warning(
 2053                                 '%s: malformed AAC bitstream detected. %s'
 2054                                 % (info_dict['id'], INSTALL_FFMPEG_MESSAGE))
 2055                     else:
 2056                         assert fixup_policy in ('ignore', 'never')
 2057 
 2058                 try:
 2059                     self.post_process(filename, info_dict)
 2060                 except (PostProcessingError) as err:
 2061                     self.report_error('postprocessing: %s' % error_to_compat_str(err))
 2062                     return
 2063                 self.record_download_archive(info_dict)
 2064                 # avoid possible nugatory search for further items (PR #26638)
 2065                 if self._num_downloads >= max_downloads:
 2066                     raise MaxDownloadsReached()
 2067 
 2068     def download(self, url_list):
 2069         """Download a given list of URLs."""
 2070         outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
 2071         if (len(url_list) > 1
 2072                 and outtmpl != '-'
 2073                 and '%' not in outtmpl
 2074                 and self.params.get('max_downloads') != 1):
 2075             raise SameFileError(outtmpl)
 2076 
 2077         for url in url_list:
 2078             try:
 2079                 # It also downloads the videos
 2080                 res = self.extract_info(
 2081                     url, force_generic_extractor=self.params.get('force_generic_extractor', False))
 2082             except UnavailableVideoError:
 2083                 self.report_error('unable to download video')
 2084             except MaxDownloadsReached:
 2085                 self.to_screen('[info] Maximum number of downloaded files reached.')
 2086                 raise
 2087             else:
 2088                 if self.params.get('dump_single_json', False):
 2089                     self.to_stdout(json.dumps(res))
 2090 
 2091         return self._download_retcode
 2092 
 2093     def download_with_info_file(self, info_filename):
 2094         with contextlib.closing(fileinput.FileInput(
 2095                 [info_filename], mode='r',
 2096                 openhook=fileinput.hook_encoded('utf-8'))) as f:
 2097             # FileInput doesn't have a read method, we can't call json.load
 2098             info = self.filter_requested_info(json.loads('\n'.join(f)))
 2099         try:
 2100             self.process_ie_result(info, download=True)
 2101         except DownloadError:
 2102             webpage_url = info.get('webpage_url')
 2103             if webpage_url is not None:
 2104                 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
 2105                 return self.download([webpage_url])
 2106             else:
 2107                 raise
 2108         return self._download_retcode
 2109 
 2110     @staticmethod
 2111     def filter_requested_info(info_dict):
 2112         return dict(
 2113             (k, v) for k, v in info_dict.items()
 2114             if k not in ['requested_formats', 'requested_subtitles'])
 2115 
 2116     def post_process(self, filename, ie_info):
 2117         """Run all the postprocessors on the given file."""
 2118         info = dict(ie_info)
 2119         info['filepath'] = filename
 2120         pps_chain = []
 2121         if ie_info.get('__postprocessors') is not None:
 2122             pps_chain.extend(ie_info['__postprocessors'])
 2123         pps_chain.extend(self._pps)
 2124         for pp in pps_chain:
 2125             files_to_delete = []
 2126             try:
 2127                 files_to_delete, info = pp.run(info)
 2128             except PostProcessingError as e:
 2129                 self.report_error(e.msg)
 2130             if files_to_delete and not self.params.get('keepvideo', False):
 2131                 for old_filename in files_to_delete:
 2132                     self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
 2133                     try:
 2134                         os.remove(encodeFilename(old_filename))
 2135                     except (IOError, OSError):
 2136                         self.report_warning('Unable to remove downloaded original file')
 2137 
 2138     def _make_archive_id(self, info_dict):
 2139         video_id = info_dict.get('id')
 2140         if not video_id:
 2141             return
 2142         # Future-proof against any change in case
 2143         # and backwards compatibility with prior versions
 2144         extractor = info_dict.get('extractor_key') or info_dict.get('ie_key')  # key in a playlist
 2145         if extractor is None:
 2146             url = str_or_none(info_dict.get('url'))
 2147             if not url:
 2148                 return
 2149             # Try to find matching extractor for the URL and take its ie_key
 2150             for ie in self._ies:
 2151                 if ie.suitable(url):
 2152                     extractor = ie.ie_key()
 2153                     break
 2154             else:
 2155                 return
 2156         return extractor.lower() + ' ' + video_id
 2157 
 2158     def in_download_archive(self, info_dict):
 2159         fn = self.params.get('download_archive')
 2160         if fn is None:
 2161             return False
 2162 
 2163         vid_id = self._make_archive_id(info_dict)
 2164         if not vid_id:
 2165             return False  # Incomplete video information
 2166 
 2167         try:
 2168             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
 2169                 for line in archive_file:
 2170                     if line.strip() == vid_id:
 2171                         return True
 2172         except IOError as ioe:
 2173             if ioe.errno != errno.ENOENT:
 2174                 raise
 2175         return False
 2176 
 2177     def record_download_archive(self, info_dict):
 2178         fn = self.params.get('download_archive')
 2179         if fn is None:
 2180             return
 2181         vid_id = self._make_archive_id(info_dict)
 2182         assert vid_id
 2183         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
 2184             archive_file.write(vid_id + '\n')
 2185 
 2186     @staticmethod
 2187     def format_resolution(format, default='unknown'):
 2188         if format.get('vcodec') == 'none':
 2189             return 'audio only'
 2190         if format.get('resolution') is not None:
 2191             return format['resolution']
 2192         if format.get('height') is not None:
 2193             if format.get('width') is not None:
 2194                 res = '%sx%s' % (format['width'], format['height'])
 2195             else:
 2196                 res = '%sp' % format['height']
 2197         elif format.get('width') is not None:
 2198             res = '%dx?' % format['width']
 2199         else:
 2200             res = default
 2201         return res
 2202 
 2203     def _format_note(self, fdict):
 2204         res = ''
 2205         if fdict.get('ext') in ['f4f', 'f4m']:
 2206             res += '(unsupported) '
 2207         if fdict.get('language'):
 2208             if res:
 2209                 res += ' '
 2210             res += '[%s] ' % fdict['language']
 2211         if fdict.get('format_note') is not None:
 2212             res += fdict['format_note'] + ' '
 2213         if fdict.get('tbr') is not None:
 2214             res += '%4dk ' % fdict['tbr']
 2215         if fdict.get('container') is not None:
 2216             if res:
 2217                 res += ', '
 2218             res += '%s container' % fdict['container']
 2219         if (fdict.get('vcodec') is not None
 2220                 and fdict.get('vcodec') != 'none'):
 2221             if res:
 2222                 res += ', '
 2223             res += fdict['vcodec']
 2224             if fdict.get('vbr') is not None:
 2225                 res += '@'
 2226         elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
 2227             res += 'video@'
 2228         if fdict.get('vbr') is not None:
 2229             res += '%4dk' % fdict['vbr']
 2230         if fdict.get('fps') is not None:
 2231             if res:
 2232                 res += ', '
 2233             res += '%sfps' % fdict['fps']
 2234         if fdict.get('acodec') is not None:
 2235             if res:
 2236                 res += ', '
 2237             if fdict['acodec'] == 'none':
 2238                 res += 'video only'
 2239             else:
 2240                 res += '%-5s' % fdict['acodec']
 2241         elif fdict.get('abr') is not None:
 2242             if res:
 2243                 res += ', '
 2244             res += 'audio'
 2245         if fdict.get('abr') is not None:
 2246             res += '@%3dk' % fdict['abr']
 2247         if fdict.get('asr') is not None:
 2248             res += ' (%5dHz)' % fdict['asr']
 2249         if fdict.get('filesize') is not None:
 2250             if res:
 2251                 res += ', '
 2252             res += format_bytes(fdict['filesize'])
 2253         elif fdict.get('filesize_approx') is not None:
 2254             if res:
 2255                 res += ', '
 2256             res += '~' + format_bytes(fdict['filesize_approx'])
 2257         return res
 2258 
 2259     def list_formats(self, info_dict):
 2260         formats = info_dict.get('formats', [info_dict])
 2261         table = [
 2262             [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
 2263             for f in formats
 2264             if f.get('preference') is None or f['preference'] >= -1000]
 2265         if len(formats) > 1:
 2266             table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
 2267 
 2268         header_line = ['format code', 'extension', 'resolution', 'note']
 2269         self.to_screen(
 2270             '[info] Available formats for %s:\n%s' %
 2271             (info_dict['id'], render_table(header_line, table)))
 2272 
 2273     def list_thumbnails(self, info_dict):
 2274         thumbnails = info_dict.get('thumbnails')
 2275         if not thumbnails:
 2276             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
 2277             return
 2278 
 2279         self.to_screen(
 2280             '[info] Thumbnails for %s:' % info_dict['id'])
 2281         self.to_screen(render_table(
 2282             ['ID', 'width', 'height', 'URL'],
 2283             [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
 2284 
 2285     def list_subtitles(self, video_id, subtitles, name='subtitles'):
 2286         if not subtitles:
 2287             self.to_screen('%s has no %s' % (video_id, name))
 2288             return
 2289         self.to_screen(
 2290             'Available %s for %s:' % (name, video_id))
 2291         self.to_screen(render_table(
 2292             ['Language', 'formats'],
 2293             [[lang, ', '.join(f['ext'] for f in reversed(formats))]
 2294                 for lang, formats in subtitles.items()]))
 2295 
 2296     def urlopen(self, req):
 2297         """ Start an HTTP download """
 2298         if isinstance(req, compat_basestring):
 2299             req = sanitized_Request(req)
 2300         return self._opener.open(req, timeout=self._socket_timeout)
 2301 
 2302     def print_debug_header(self):
 2303         if not self.params.get('verbose'):
 2304             return
 2305 
 2306         if type('') is not compat_str:
 2307             # Python 2.6 on SLES11 SP1 (https://github.com/ytdl-org/youtube-dl/issues/3326)
 2308             self.report_warning(
 2309                 'Your Python is broken! Update to a newer and supported version')
 2310 
 2311         stdout_encoding = getattr(
 2312             sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
 2313         encoding_str = (
 2314             '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
 2315                 locale.getpreferredencoding(),
 2316                 sys.getfilesystemencoding(),
 2317                 stdout_encoding,
 2318                 self.get_encoding()))
 2319         write_string(encoding_str, encoding=None)
 2320 
 2321         self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
 2322         if _LAZY_LOADER:
 2323             self._write_string('[debug] Lazy loading extractors enabled' + '\n')
 2324         try:
 2325             sp = subprocess.Popen(
 2326                 ['git', 'rev-parse', '--short', 'HEAD'],
 2327                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 2328                 cwd=os.path.dirname(os.path.abspath(__file__)))
 2329             out, err = process_communicate_or_kill(sp)
 2330             out = out.decode().strip()
 2331             if re.match('[0-9a-f]+', out):
 2332                 self._write_string('[debug] Git HEAD: ' + out + '\n')
 2333         except Exception:
 2334             try:
 2335                 sys.exc_clear()
 2336             except Exception:
 2337                 pass
 2338 
 2339         def python_implementation():
 2340             impl_name = platform.python_implementation()
 2341             if impl_name == 'PyPy' and hasattr(sys, 'pypy_version_info'):
 2342                 return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
 2343             return impl_name
 2344 
 2345         self._write_string('[debug] Python version %s (%s) - %s\n' % (
 2346             platform.python_version(), python_implementation(),
 2347             platform_name()))
 2348 
 2349         exe_versions = FFmpegPostProcessor.get_versions(self)
 2350         exe_versions['rtmpdump'] = rtmpdump_version()
 2351         exe_versions['phantomjs'] = PhantomJSwrapper._version()
 2352         exe_str = ', '.join(
 2353             '%s %s' % (exe, v)
 2354             for exe, v in sorted(exe_versions.items())
 2355             if v
 2356         )
 2357         if not exe_str:
 2358             exe_str = 'none'
 2359         self._write_string('[debug] exe versions: %s\n' % exe_str)
 2360 
 2361         proxy_map = {}
 2362         for handler in self._opener.handlers:
 2363             if hasattr(handler, 'proxies'):
 2364                 proxy_map.update(handler.proxies)
 2365         self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
 2366 
 2367         if self.params.get('call_home', False):
 2368             ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
 2369             self._write_string('[debug] Public IP address: %s\n' % ipaddr)
 2370             latest_version = self.urlopen(
 2371                 'https://yt-dl.org/latest/version').read().decode('utf-8')
 2372             if version_tuple(latest_version) > version_tuple(__version__):
 2373                 self.report_warning(
 2374                     'You are using an outdated version (newest version: %s)! '
 2375                     'See https://yt-dl.org/update if you need help updating.' %
 2376                     latest_version)
 2377 
 2378     def _setup_opener(self):
 2379         timeout_val = self.params.get('socket_timeout')
 2380         self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
 2381 
 2382         opts_cookiefile = self.params.get('cookiefile')
 2383         opts_proxy = self.params.get('proxy')
 2384 
 2385         if opts_cookiefile is None:
 2386             self.cookiejar = compat_cookiejar.CookieJar()
 2387         else:
 2388             opts_cookiefile = expand_path(opts_cookiefile)
 2389             self.cookiejar = YoutubeDLCookieJar(opts_cookiefile)
 2390             if os.access(opts_cookiefile, os.R_OK):
 2391                 self.cookiejar.load(ignore_discard=True, ignore_expires=True)
 2392 
 2393         cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
 2394         if opts_proxy is not None:
 2395             if opts_proxy == '':
 2396                 proxies = {}
 2397             else:
 2398                 proxies = {'http': opts_proxy, 'https': opts_proxy}
 2399         else:
 2400             proxies = compat_urllib_request.getproxies()
 2401             # Set HTTPS proxy to HTTP one if given (https://github.com/ytdl-org/youtube-dl/issues/805)
 2402             if 'http' in proxies and 'https' not in proxies:
 2403                 proxies['https'] = proxies['http']
 2404         proxy_handler = PerRequestProxyHandler(proxies)
 2405 
 2406         debuglevel = 1 if self.params.get('debug_printtraffic') else 0
 2407         https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
 2408         ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
 2409         redirect_handler = YoutubeDLRedirectHandler()
 2410         data_handler = compat_urllib_request_DataHandler()
 2411 
 2412         # When passing our own FileHandler instance, build_opener won't add the
 2413         # default FileHandler and allows us to disable the file protocol, which
 2414         # can be used for malicious purposes (see
 2415         # https://github.com/ytdl-org/youtube-dl/issues/8227)
 2416         file_handler = compat_urllib_request.FileHandler()
 2417 
 2418         def file_open(*args, **kwargs):
 2419             raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
 2420         file_handler.file_open = file_open
 2421 
 2422         opener = compat_urllib_request.build_opener(
 2423             proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler)
 2424 
 2425         # Delete the default user-agent header, which would otherwise apply in
 2426         # cases where our custom HTTP handler doesn't come into play
 2427         # (See https://github.com/ytdl-org/youtube-dl/issues/1309 for details)
 2428         opener.addheaders = []
 2429         self._opener = opener
 2430 
 2431     def encode(self, s):
 2432         if isinstance(s, bytes):
 2433             return s  # Already encoded
 2434 
 2435         try:
 2436             return s.encode(self.get_encoding())
 2437         except UnicodeEncodeError as err:
 2438             err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
 2439             raise
 2440 
 2441     def get_encoding(self):
 2442         encoding = self.params.get('encoding')
 2443         if encoding is None:
 2444             encoding = preferredencoding()
 2445         return encoding
 2446 
 2447     def _write_thumbnails(self, info_dict, filename):
 2448         if self.params.get('writethumbnail', False):
 2449             thumbnails = info_dict.get('thumbnails')
 2450             if thumbnails:
 2451                 thumbnails = [thumbnails[-1]]
 2452         elif self.params.get('write_all_thumbnails', False):
 2453             thumbnails = info_dict.get('thumbnails')
 2454         else:
 2455             return
 2456 
 2457         if not thumbnails:
 2458             # No thumbnails present, so return immediately
 2459             return
 2460 
 2461         for t in thumbnails:
 2462             thumb_ext = determine_ext(t['url'], 'jpg')
 2463             suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
 2464             thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
 2465             t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
 2466 
 2467             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
 2468                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
 2469                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2470             else:
 2471                 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
 2472                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
 2473                 try:
 2474                     uf = self.urlopen(t['url'])
 2475                     with open(encodeFilename(thumb_filename), 'wb') as thumbf:
 2476                         shutil.copyfileobj(uf, thumbf)
 2477                     self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
 2478                                    (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
 2479                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 2480                     self.report_warning('Unable to download thumbnail "%s": %s' %
 2481                                         (t['url'], error_to_compat_str(err)))

Generated by cgit