youtube_dl/YoutubeDL.py



    1 #!/usr/bin/env python
    2 # -*- coding: utf-8 -*-
    3 
    4 from __future__ import absolute_import
    5 
    6 import errno
    7 import io
    8 import json
    9 import os
   10 import platform
   11 import re
   12 import shutil
   13 import subprocess
   14 import socket
   15 import sys
   16 import time
   17 import traceback
   18 
   19 if os.name == 'nt':
   20     import ctypes
   21 
   22 from .utils import (
   23     compat_cookiejar,
   24     compat_http_client,
   25     compat_print,
   26     compat_str,
   27     compat_urllib_error,
   28     compat_urllib_request,
   29     ContentTooShortError,
   30     date_from_str,
   31     DateRange,
   32     determine_ext,
   33     DownloadError,
   34     encodeFilename,
   35     ExtractorError,
   36     format_bytes,
   37     locked_file,
   38     make_HTTPS_handler,
   39     MaxDownloadsReached,
   40     PostProcessingError,
   41     platform_name,
   42     preferredencoding,
   43     SameFileError,
   44     sanitize_filename,
   45     subtitles_filename,
   46     takewhile_inclusive,
   47     UnavailableVideoError,
   48     write_json_file,
   49     write_string,
   50     YoutubeDLHandler,
   51 )
   52 from .extractor import get_info_extractor, gen_extractors
   53 from .FileDownloader import FileDownloader
   54 from .version import __version__
   55 
   56 
   57 class YoutubeDL(object):
   58     """YoutubeDL class.
   59 
   60     YoutubeDL objects are the ones responsible of downloading the
   61     actual video file and writing it to disk if the user has requested
   62     it, among some other tasks. In most cases there should be one per
   63     program. As, given a video URL, the downloader doesn't know how to
   64     extract all the needed information, task that InfoExtractors do, it
   65     has to pass the URL to one of them.
   66 
   67     For this, YoutubeDL objects have a method that allows
   68     InfoExtractors to be registered in a given order. When it is passed
   69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
   70     finds that reports being able to handle it. The InfoExtractor extracts
   71     all the information about the video or videos the URL refers to, and
   72     YoutubeDL process the extracted information, possibly using a File
   73     Downloader to download the video.
   74 
   75     YoutubeDL objects accept a lot of parameters. In order not to saturate
   76     the object constructor with arguments, it receives a dictionary of
   77     options instead. These options are available through the params
   78     attribute for the InfoExtractors to use. The YoutubeDL also
   79     registers itself as the downloader in charge for the InfoExtractors
   80     that are added to it, so this is a "mutual registration".
   81 
   82     Available options:
   83 
   84     username:          Username for authentication purposes.
   85     password:          Password for authentication purposes.
   86     videopassword:     Password for acces a video.
   87     usenetrc:          Use netrc for authentication instead.
   88     verbose:           Print additional info to stdout.
   89     quiet:             Do not print messages to stdout.
   90     forceurl:          Force printing final URL.
   91     forcetitle:        Force printing title.
   92     forceid:           Force printing ID.
   93     forcethumbnail:    Force printing thumbnail URL.
   94     forcedescription:  Force printing description.
   95     forcefilename:     Force printing final filename.
   96     forcejson:         Force printing info_dict as JSON.
   97     simulate:          Do not download the video files.
   98     format:            Video format code.
   99     format_limit:      Highest quality format to try.
  100     outtmpl:           Template for output names.
  101     restrictfilenames: Do not allow "&" and spaces in file names
  102     ignoreerrors:      Do not stop on download errors.
  103     nooverwrites:      Prevent overwriting files.
  104     playliststart:     Playlist item to start at.
  105     playlistend:       Playlist item to end at.
  106     matchtitle:        Download only matching titles.
  107     rejecttitle:       Reject downloads for matching titles.
  108     logger:            Log messages to a logging.Logger instance.
  109     logtostderr:       Log messages to stderr instead of stdout.
  110     writedescription:  Write the video description to a .description file
  111     writeinfojson:     Write the video description to a .info.json file
  112     writeannotations:  Write the video annotations to a .annotations.xml file
  113     writethumbnail:    Write the thumbnail image to a file
  114     writesubtitles:    Write the video subtitles to a file
  115     writeautomaticsub: Write the automatic subtitles to a file
  116     allsubtitles:      Downloads all the subtitles of the video
  117                        (requires writesubtitles or writeautomaticsub)
  118     listsubtitles:     Lists all available subtitles for the video
  119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
  120     subtitleslangs:    List of languages of the subtitles to download
  121     keepvideo:         Keep the video file after post-processing
  122     daterange:         A DateRange object, download only if the upload_date is in the range.
  123     skip_download:     Skip the actual download of the video file
  124     cachedir:          Location of the cache files in the filesystem.
  125                        None to disable filesystem cache.
  126     noplaylist:        Download single video instead of a playlist if in doubt.
  127     age_limit:         An integer representing the user's age in years.
  128                        Unsuitable videos for the given age are skipped.
  129     downloadarchive:   File name of a file where all downloads are recorded.
  130                        Videos already present in the file are not downloaded
  131                        again.
  132     cookiefile:        File name where cookies should be read from and dumped to.
  133     nocheckcertificate:Do not verify SSL certificates
  134     proxy:             URL of the proxy server to use
  135 
  136     The following parameters are not used by YoutubeDL itself, they are used by
  137     the FileDownloader:
  138     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
  139     noresizebuffer, retries, continuedl, noprogress, consoletitle
  140     """
  141 
  142     params = None
  143     _ies = []
  144     _pps = []
  145     _download_retcode = None
  146     _num_downloads = None
  147     _screen_file = None
  148 
  149     def __init__(self, params):
  150         """Create a FileDownloader object with the given options."""
  151         self._ies = []
  152         self._ies_instances = {}
  153         self._pps = []
  154         self._progress_hooks = []
  155         self._download_retcode = 0
  156         self._num_downloads = 0
  157         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  158 
  159         if (sys.version_info >= (3,) and sys.platform != 'win32' and
  160                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
  161                 and not params['restrictfilenames']):
  162             # On Python 3, the Unicode filesystem API will throw errors (#1474)
  163             self.report_warning(
  164                 u'Assuming --restrict-filenames since file system encoding '
  165                 u'cannot encode all charactes. '
  166                 u'Set the LC_ALL environment variable to fix this.')
  167             params['restrictfilenames'] = True
  168 
  169         self.params = params
  170         self.fd = FileDownloader(self, self.params)
  171 
  172         if '%(stitle)s' in self.params['outtmpl']:
  173             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  174 
  175         self._setup_opener()
  176 
  177     def add_info_extractor(self, ie):
  178         """Add an InfoExtractor object to the end of the list."""
  179         self._ies.append(ie)
  180         self._ies_instances[ie.ie_key()] = ie
  181         ie.set_downloader(self)
  182 
  183     def get_info_extractor(self, ie_key):
  184         """
  185         Get an instance of an IE with name ie_key, it will try to get one from
  186         the _ies list, if there's no instance it will create a new one and add
  187         it to the extractor list.
  188         """
  189         ie = self._ies_instances.get(ie_key)
  190         if ie is None:
  191             ie = get_info_extractor(ie_key)()
  192             self.add_info_extractor(ie)
  193         return ie
  194 
  195     def add_default_info_extractors(self):
  196         """
  197         Add the InfoExtractors returned by gen_extractors to the end of the list
  198         """
  199         for ie in gen_extractors():
  200             self.add_info_extractor(ie)
  201 
  202     def add_post_processor(self, pp):
  203         """Add a PostProcessor object to the end of the chain."""
  204         self._pps.append(pp)
  205         pp.set_downloader(self)
  206 
  207     def to_screen(self, message, skip_eol=False):
  208         """Print message to stdout if not in quiet mode."""
  209         if self.params.get('logger'):
  210             self.params['logger'].debug(message)
  211         elif not self.params.get('quiet', False):
  212             terminator = [u'\n', u''][skip_eol]
  213             output = message + terminator
  214             write_string(output, self._screen_file)
  215 
  216     def to_stderr(self, message):
  217         """Print message to stderr."""
  218         assert type(message) == type(u'')
  219         if self.params.get('logger'):
  220             self.params['logger'].error(message)
  221         else:
  222             output = message + u'\n'
  223             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
  224                 output = output.encode(preferredencoding())
  225             sys.stderr.write(output)
  226 
  227     def to_console_title(self, message):
  228         if not self.params.get('consoletitle', False):
  229             return
  230         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
  231             # c_wchar_p() might not be necessary if `message` is
  232             # already of type unicode()
  233             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  234         elif 'TERM' in os.environ:
  235             write_string(u'\033]0;%s\007' % message, self._screen_file)
  236 
  237     def save_console_title(self):
  238         if not self.params.get('consoletitle', False):
  239             return
  240         if 'TERM' in os.environ:
  241             # Save the title on stack
  242             write_string(u'\033[22;0t', self._screen_file)
  243 
  244     def restore_console_title(self):
  245         if not self.params.get('consoletitle', False):
  246             return
  247         if 'TERM' in os.environ:
  248             # Restore the title from stack
  249             write_string(u'\033[23;0t', self._screen_file)
  250 
  251     def __enter__(self):
  252         self.save_console_title()
  253         return self
  254 
  255     def __exit__(self, *args):
  256         self.restore_console_title()
  257     
  258         if self.params.get('cookiefile') is not None:
  259             self.cookiejar.save()
  260 
  261     def fixed_template(self):
  262         """Checks if the output template is fixed."""
  263         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
  264 
  265     def trouble(self, message=None, tb=None):
  266         """Determine action to take when a download problem appears.
  267 
  268         Depending on if the downloader has been configured to ignore
  269         download errors or not, this method may throw an exception or
  270         not when errors are found, after printing the message.
  271 
  272         tb, if given, is additional traceback information.
  273         """
  274         if message is not None:
  275             self.to_stderr(message)
  276         if self.params.get('verbose'):
  277             if tb is None:
  278                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
  279                     tb = u''
  280                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  281                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
  282                     tb += compat_str(traceback.format_exc())
  283                 else:
  284                     tb_data = traceback.format_list(traceback.extract_stack())
  285                     tb = u''.join(tb_data)
  286             self.to_stderr(tb)
  287         if not self.params.get('ignoreerrors', False):
  288             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  289                 exc_info = sys.exc_info()[1].exc_info
  290             else:
  291                 exc_info = sys.exc_info()
  292             raise DownloadError(message, exc_info)
  293         self._download_retcode = 1
  294 
  295     def report_warning(self, message):
  296         '''
  297         Print the message to stderr, it will be prefixed with 'WARNING:'
  298         If stderr is a tty file the 'WARNING:' will be colored
  299         '''
  300         if sys.stderr.isatty() and os.name != 'nt':
  301             _msg_header = u'\033[0;33mWARNING:\033[0m'
  302         else:
  303             _msg_header = u'WARNING:'
  304         warning_message = u'%s %s' % (_msg_header, message)
  305         self.to_stderr(warning_message)
  306 
  307     def report_error(self, message, tb=None):
  308         '''
  309         Do the same as trouble, but prefixes the message with 'ERROR:', colored
  310         in red if stderr is a tty file.
  311         '''
  312         if sys.stderr.isatty() and os.name != 'nt':
  313             _msg_header = u'\033[0;31mERROR:\033[0m'
  314         else:
  315             _msg_header = u'ERROR:'
  316         error_message = u'%s %s' % (_msg_header, message)
  317         self.trouble(error_message, tb)
  318 
  319     def report_writedescription(self, descfn):
  320         """ Report that the description file is being written """
  321         self.to_screen(u'[info] Writing video description to: ' + descfn)
  322 
  323     def report_writesubtitles(self, sub_filename):
  324         """ Report that the subtitles file is being written """
  325         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
  326 
  327     def report_writeinfojson(self, infofn):
  328         """ Report that the metadata file has been written """
  329         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
  330 
  331     def report_writeannotations(self, annofn):
  332         """ Report that the annotations file has been written. """
  333         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
  334 
  335     def report_file_already_downloaded(self, file_name):
  336         """Report file has already been fully downloaded."""
  337         try:
  338             self.to_screen(u'[download] %s has already been downloaded' % file_name)
  339         except UnicodeEncodeError:
  340             self.to_screen(u'[download] The file has already been downloaded')
  341 
  342     def increment_downloads(self):
  343         """Increment the ordinal that assigns a number to each file."""
  344         self._num_downloads += 1
  345 
  346     def prepare_filename(self, info_dict):
  347         """Generate the output filename."""
  348         try:
  349             template_dict = dict(info_dict)
  350 
  351             template_dict['epoch'] = int(time.time())
  352             autonumber_size = self.params.get('autonumber_size')
  353             if autonumber_size is None:
  354                 autonumber_size = 5
  355             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
  356             template_dict['autonumber'] = autonumber_templ % self._num_downloads
  357             if template_dict.get('playlist_index') is not None:
  358                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  359 
  360             sanitize = lambda k, v: sanitize_filename(
  361                 u'NA' if v is None else compat_str(v),
  362                 restricted=self.params.get('restrictfilenames'),
  363                 is_id=(k == u'id'))
  364             template_dict = dict((k, sanitize(k, v))
  365                                  for k, v in template_dict.items())
  366 
  367             tmpl = os.path.expanduser(self.params['outtmpl'])
  368             filename = tmpl % template_dict
  369             return filename
  370         except KeyError as err:
  371             self.report_error(u'Erroneous output template')
  372             return None
  373         except ValueError as err:
  374             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
  375             return None
  376 
  377     def _match_entry(self, info_dict):
  378         """ Returns None iff the file should be downloaded """
  379 
  380         if 'title' in info_dict:
  381             # This can happen when we're just evaluating the playlist
  382             title = info_dict['title']
  383             matchtitle = self.params.get('matchtitle', False)
  384             if matchtitle:
  385                 if not re.search(matchtitle, title, re.IGNORECASE):
  386                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
  387             rejecttitle = self.params.get('rejecttitle', False)
  388             if rejecttitle:
  389                 if re.search(rejecttitle, title, re.IGNORECASE):
  390                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  391         date = info_dict.get('upload_date', None)
  392         if date is not None:
  393             dateRange = self.params.get('daterange', DateRange())
  394             if date not in dateRange:
  395                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
  396         age_limit = self.params.get('age_limit')
  397         if age_limit is not None:
  398             if age_limit < info_dict.get('age_limit', 0):
  399                 return u'Skipping "' + title + '" because it is age restricted'
  400         if self.in_download_archive(info_dict):
  401             return (u'%s has already been recorded in archive'
  402                     % info_dict.get('title', info_dict.get('id', u'video')))
  403         return None
  404 
  405     @staticmethod
  406     def add_extra_info(info_dict, extra_info):
  407         '''Set the keys from extra_info in info dict if they are missing'''
  408         for key, value in extra_info.items():
  409             info_dict.setdefault(key, value)
  410 
  411     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
  412         '''
  413         Returns a list with a dictionary for each video we find.
  414         If 'download', also downloads the videos.
  415         extra_info is a dict containing the extra values to add to each result
  416          '''
  417 
  418         if ie_key:
  419             ies = [self.get_info_extractor(ie_key)]
  420         else:
  421             ies = self._ies
  422 
  423         for ie in ies:
  424             if not ie.suitable(url):
  425                 continue
  426 
  427             if not ie.working():
  428                 self.report_warning(u'The program functionality for this site has been marked as broken, '
  429                                     u'and will probably not work.')
  430 
  431             try:
  432                 ie_result = ie.extract(url)
  433                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
  434                     break
  435                 if isinstance(ie_result, list):
  436                     # Backwards compatibility: old IE result format
  437                     ie_result = {
  438                         '_type': 'compat_list',
  439                         'entries': ie_result,
  440                     }
  441                 self.add_extra_info(ie_result,
  442                     {
  443                         'extractor': ie.IE_NAME,
  444                         'webpage_url': url,
  445                         'extractor_key': ie.ie_key(),
  446                     })
  447                 return self.process_ie_result(ie_result, download, extra_info)
  448             except ExtractorError as de: # An error we somewhat expected
  449                 self.report_error(compat_str(de), de.format_traceback())
  450                 break
  451             except Exception as e:
  452                 if self.params.get('ignoreerrors', False):
  453                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
  454                     break
  455                 else:
  456                     raise
  457         else:
  458             self.report_error(u'no suitable InfoExtractor: %s' % url)
  459 
  460     def process_ie_result(self, ie_result, download=True, extra_info={}):
  461         """
  462         Take the result of the ie(may be modified) and resolve all unresolved
  463         references (URLs, playlist items).
  464 
  465         It will also download the videos if 'download'.
  466         Returns the resolved ie_result.
  467         """
  468 
  469         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
  470         if result_type == 'video':
  471             self.add_extra_info(ie_result, extra_info)
  472             return self.process_video_result(ie_result, download=download)
  473         elif result_type == 'url':
  474             # We have to add extra_info to the results because it may be
  475             # contained in a playlist
  476             return self.extract_info(ie_result['url'],
  477                                      download,
  478                                      ie_key=ie_result.get('ie_key'),
  479                                      extra_info=extra_info)
  480         elif result_type == 'playlist':
  481 
  482             # We process each entry in the playlist
  483             playlist = ie_result.get('title', None) or ie_result.get('id', None)
  484             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
  485 
  486             playlist_results = []
  487 
  488             n_all_entries = len(ie_result['entries'])
  489             playliststart = self.params.get('playliststart', 1) - 1
  490             playlistend = self.params.get('playlistend', -1)
  491 
  492             if playlistend == -1:
  493                 entries = ie_result['entries'][playliststart:]
  494             else:
  495                 entries = ie_result['entries'][playliststart:playlistend]
  496 
  497             n_entries = len(entries)
  498 
  499             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
  500                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
  501 
  502             for i, entry in enumerate(entries, 1):
  503                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
  504                 extra = {
  505                     'playlist': playlist,
  506                     'playlist_index': i + playliststart,
  507                     'extractor': ie_result['extractor'],
  508                     'webpage_url': ie_result['webpage_url'],
  509                     'extractor_key': ie_result['extractor_key'],
  510                 }
  511 
  512                 reason = self._match_entry(entry)
  513                 if reason is not None:
  514                     self.to_screen(u'[download] ' + reason)
  515                     continue
  516 
  517                 entry_result = self.process_ie_result(entry,
  518                                                       download=download,
  519                                                       extra_info=extra)
  520                 playlist_results.append(entry_result)
  521             ie_result['entries'] = playlist_results
  522             return ie_result
  523         elif result_type == 'compat_list':
  524             def _fixup(r):
  525                 self.add_extra_info(r,
  526                     {
  527                         'extractor': ie_result['extractor'],
  528                         'webpage_url': ie_result['webpage_url'],
  529                         'extractor_key': ie_result['extractor_key'],
  530                     })
  531                 return r
  532             ie_result['entries'] = [
  533                 self.process_ie_result(_fixup(r), download, extra_info)
  534                 for r in ie_result['entries']
  535             ]
  536             return ie_result
  537         else:
  538             raise Exception('Invalid result type: %s' % result_type)
  539 
  540     def select_format(self, format_spec, available_formats):
  541         if format_spec == 'best' or format_spec is None:
  542             return available_formats[-1]
  543         elif format_spec == 'worst':
  544             return available_formats[0]
  545         else:
  546             extensions = [u'mp4', u'flv', u'webm', u'3gp']
  547             if format_spec in extensions:
  548                 filter_f = lambda f: f['ext'] == format_spec
  549             else:
  550                 filter_f = lambda f: f['format_id'] == format_spec
  551             matches = list(filter(filter_f, available_formats))
  552             if matches:
  553                 return matches[-1]
  554         return None
  555 
  556     def process_video_result(self, info_dict, download=True):
  557         assert info_dict.get('_type', 'video') == 'video'
  558 
  559         if 'playlist' not in info_dict:
  560             # It isn't part of a playlist
  561             info_dict['playlist'] = None
  562             info_dict['playlist_index'] = None
  563 
  564         # This extractors handle format selection themselves
  565         if info_dict['extractor'] in [u'youtube', u'Youku']:
  566             if download:
  567                 self.process_info(info_dict)
  568             return info_dict
  569 
  570         # We now pick which formats have to be downloaded
  571         if info_dict.get('formats') is None:
  572             # There's only one format available
  573             formats = [info_dict]
  574         else:
  575             formats = info_dict['formats']
  576 
  577         # We check that all the formats have the format and format_id fields
  578         for (i, format) in enumerate(formats):
  579             if format.get('format_id') is None:
  580                 format['format_id'] = compat_str(i)
  581             if format.get('format') is None:
  582                 format['format'] = u'{id} - {res}{note}'.format(
  583                     id=format['format_id'],
  584                     res=self.format_resolution(format),
  585                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
  586                 )
  587             # Automatically determine file extension if missing
  588             if 'ext' not in format:
  589                 format['ext'] = determine_ext(format['url'])
  590 
  591         if self.params.get('listformats', None):
  592             self.list_formats(info_dict)
  593             return
  594 
  595         format_limit = self.params.get('format_limit', None)
  596         if format_limit:
  597             formats = list(takewhile_inclusive(
  598                 lambda f: f['format_id'] != format_limit, formats
  599             ))
  600         if self.params.get('prefer_free_formats'):
  601             def _free_formats_key(f):
  602                 try:
  603                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
  604                 except ValueError:
  605                     ext_ord = -1
  606                 # We only compare the extension if they have the same height and width
  607                 return (f.get('height'), f.get('width'), ext_ord)
  608             formats = sorted(formats, key=_free_formats_key)
  609 
  610         req_format = self.params.get('format', 'best')
  611         if req_format is None:
  612             req_format = 'best'
  613         formats_to_download = []
  614         # The -1 is for supporting YoutubeIE
  615         if req_format in ('-1', 'all'):
  616             formats_to_download = formats
  617         else:
  618             # We can accept formats requestd in the format: 34/5/best, we pick
  619             # the first that is available, starting from left
  620             req_formats = req_format.split('/')
  621             for rf in req_formats:
  622                 selected_format = self.select_format(rf, formats)
  623                 if selected_format is not None:
  624                     formats_to_download = [selected_format]
  625                     break
  626         if not formats_to_download:
  627             raise ExtractorError(u'requested format not available',
  628                                  expected=True)
  629 
  630         if download:
  631             if len(formats_to_download) > 1:
  632                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
  633             for format in formats_to_download:
  634                 new_info = dict(info_dict)
  635                 new_info.update(format)
  636                 self.process_info(new_info)
  637         # We update the info dict with the best quality format (backwards compatibility)
  638         info_dict.update(formats_to_download[-1])
  639         return info_dict
  640 
  641     def process_info(self, info_dict):
  642         """Process a single resolved IE result."""
  643 
  644         assert info_dict.get('_type', 'video') == 'video'
  645         #We increment the download the download count here to match the previous behaviour.
  646         self.increment_downloads()
  647 
  648         info_dict['fulltitle'] = info_dict['title']
  649         if len(info_dict['title']) > 200:
  650             info_dict['title'] = info_dict['title'][:197] + u'...'
  651 
  652         # Keep for backwards compatibility
  653         info_dict['stitle'] = info_dict['title']
  654 
  655         if not 'format' in info_dict:
  656             info_dict['format'] = info_dict['ext']
  657 
  658         reason = self._match_entry(info_dict)
  659         if reason is not None:
  660             self.to_screen(u'[download] ' + reason)
  661             return
  662 
  663         max_downloads = self.params.get('max_downloads')
  664         if max_downloads is not None:
  665             if self._num_downloads > int(max_downloads):
  666                 raise MaxDownloadsReached()
  667 
  668         filename = self.prepare_filename(info_dict)
  669 
  670         # Forced printings
  671         if self.params.get('forcetitle', False):
  672             compat_print(info_dict['fulltitle'])
  673         if self.params.get('forceid', False):
  674             compat_print(info_dict['id'])
  675         if self.params.get('forceurl', False):
  676             # For RTMP URLs, also include the playpath
  677             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
  678         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
  679             compat_print(info_dict['thumbnail'])
  680         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
  681             compat_print(info_dict['description'])
  682         if self.params.get('forcefilename', False) and filename is not None:
  683             compat_print(filename)
  684         if self.params.get('forceformat', False):
  685             compat_print(info_dict['format'])
  686         if self.params.get('forcejson', False):
  687             compat_print(json.dumps(info_dict))
  688 
  689         # Do nothing else if in simulate mode
  690         if self.params.get('simulate', False):
  691             return
  692 
  693         if filename is None:
  694             return
  695 
  696         try:
  697             dn = os.path.dirname(encodeFilename(filename))
  698             if dn != '' and not os.path.exists(dn):
  699                 os.makedirs(dn)
  700         except (OSError, IOError) as err:
  701             self.report_error(u'unable to create directory ' + compat_str(err))
  702             return
  703 
  704         if self.params.get('writedescription', False):
  705             try:
  706                 descfn = filename + u'.description'
  707                 self.report_writedescription(descfn)
  708                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
  709                     descfile.write(info_dict['description'])
  710             except (KeyError, TypeError):
  711                 self.report_warning(u'There\'s no description to write.')
  712             except (OSError, IOError):
  713                 self.report_error(u'Cannot write description file ' + descfn)
  714                 return
  715 
  716         if self.params.get('writeannotations', False):
  717             try:
  718                 annofn = filename + u'.annotations.xml'
  719                 self.report_writeannotations(annofn)
  720                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
  721                     annofile.write(info_dict['annotations'])
  722             except (KeyError, TypeError):
  723                 self.report_warning(u'There are no annotations to write.')
  724             except (OSError, IOError):
  725                 self.report_error(u'Cannot write annotations file: ' + annofn)
  726                 return
  727 
  728         subtitles_are_requested = any([self.params.get('writesubtitles', False),
  729                                        self.params.get('writeautomaticsub')])
  730 
  731         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
  732             # subtitles download errors are already managed as troubles in relevant IE
  733             # that way it will silently go on when used with unsupporting IE
  734             subtitles = info_dict['subtitles']
  735             sub_format = self.params.get('subtitlesformat', 'srt')
  736             for sub_lang in subtitles.keys():
  737                 sub = subtitles[sub_lang]
  738                 if sub is None:
  739                     continue
  740                 try:
  741                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
  742                     self.report_writesubtitles(sub_filename)
  743                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
  744                             subfile.write(sub)
  745                 except (OSError, IOError):
  746                     self.report_error(u'Cannot write subtitles file ' + descfn)
  747                     return
  748 
  749         if self.params.get('writeinfojson', False):
  750             infofn = os.path.splitext(filename)[0] + u'.info.json'
  751             self.report_writeinfojson(infofn)
  752             try:
  753                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
  754                 write_json_file(json_info_dict, encodeFilename(infofn))
  755             except (OSError, IOError):
  756                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
  757                 return
  758 
  759         if self.params.get('writethumbnail', False):
  760             if info_dict.get('thumbnail') is not None:
  761                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
  762                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
  763                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
  764                                (info_dict['extractor'], info_dict['id']))
  765                 try:
  766                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
  767                     with open(thumb_filename, 'wb') as thumbf:
  768                         shutil.copyfileobj(uf, thumbf)
  769                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
  770                         (info_dict['extractor'], info_dict['id'], thumb_filename))
  771                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  772                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
  773                         (info_dict['thumbnail'], compat_str(err)))
  774 
  775         if not self.params.get('skip_download', False):
  776             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
  777                 success = True
  778             else:
  779                 try:
  780                     success = self.fd._do_download(filename, info_dict)
  781                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  782                     self.report_error(u'unable to download video data: %s' % str(err))
  783                     return
  784                 except (OSError, IOError) as err:
  785                     raise UnavailableVideoError(err)
  786                 except (ContentTooShortError, ) as err:
  787                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
  788                     return
  789 
  790             if success:
  791                 try:
  792                     self.post_process(filename, info_dict)
  793                 except (PostProcessingError) as err:
  794                     self.report_error(u'postprocessing: %s' % str(err))
  795                     return
  796 
  797         self.record_download_archive(info_dict)
  798 
  799     def download(self, url_list):
  800         """Download a given list of URLs."""
  801         if len(url_list) > 1 and self.fixed_template():
  802             raise SameFileError(self.params['outtmpl'])
  803 
  804         for url in url_list:
  805             try:
  806                 #It also downloads the videos
  807                 self.extract_info(url)
  808             except UnavailableVideoError:
  809                 self.report_error(u'unable to download video')
  810             except MaxDownloadsReached:
  811                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
  812                 raise
  813 
  814         return self._download_retcode
  815 
  816     def post_process(self, filename, ie_info):
  817         """Run all the postprocessors on the given file."""
  818         info = dict(ie_info)
  819         info['filepath'] = filename
  820         keep_video = None
  821         for pp in self._pps:
  822             try:
  823                 keep_video_wish, new_info = pp.run(info)
  824                 if keep_video_wish is not None:
  825                     if keep_video_wish:
  826                         keep_video = keep_video_wish
  827                     elif keep_video is None:
  828                         # No clear decision yet, let IE decide
  829                         keep_video = keep_video_wish
  830             except PostProcessingError as e:
  831                 self.report_error(e.msg)
  832         if keep_video is False and not self.params.get('keepvideo', False):
  833             try:
  834                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
  835                 os.remove(encodeFilename(filename))
  836             except (IOError, OSError):
  837                 self.report_warning(u'Unable to remove downloaded video file')
  838 
  839     def in_download_archive(self, info_dict):
  840         fn = self.params.get('download_archive')
  841         if fn is None:
  842             return False
  843         extractor = info_dict.get('extractor_id')
  844         if extractor is None:
  845             if 'id' in info_dict:
  846                 extractor = info_dict.get('ie_key')  # key in a playlist
  847         if extractor is None:
  848             return False  # Incomplete video information
  849         # Future-proof against any change in case
  850         # and backwards compatibility with prior versions
  851         extractor = extractor.lower()
  852         vid_id = extractor + u' ' + info_dict['id']
  853         try:
  854             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
  855                 for line in archive_file:
  856                     if line.strip() == vid_id:
  857                         return True
  858         except IOError as ioe:
  859             if ioe.errno != errno.ENOENT:
  860                 raise
  861         return False
  862 
  863     def record_download_archive(self, info_dict):
  864         fn = self.params.get('download_archive')
  865         if fn is None:
  866             return
  867         vid_id = info_dict['extractor'] + u' ' + info_dict['id']
  868         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
  869             archive_file.write(vid_id + u'\n')
  870 
  871     @staticmethod
  872     def format_resolution(format, default='unknown'):
  873         if format.get('_resolution') is not None:
  874             return format['_resolution']
  875         if format.get('height') is not None:
  876             if format.get('width') is not None:
  877                 res = u'%sx%s' % (format['width'], format['height'])
  878             else:
  879                 res = u'%sp' % format['height']
  880         else:
  881             res = default
  882         return res
  883 
  884     def list_formats(self, info_dict):
  885         def format_note(fdict):
  886             res = u''
  887             if fdict.get('format_note') is not None:
  888                 res += fdict['format_note'] + u' '
  889             if fdict.get('vcodec') is not None:
  890                 res += u'%-5s' % fdict['vcodec']
  891             elif fdict.get('vbr') is not None:
  892                 res += u'video'
  893             if fdict.get('vbr') is not None:
  894                 res += u'@%4dk' % fdict['vbr']
  895             if fdict.get('acodec') is not None:
  896                 if res:
  897                     res += u', '
  898                 res += u'%-5s' % fdict['acodec']
  899             elif fdict.get('abr') is not None:
  900                 if res:
  901                     res += u', '
  902                 res += 'audio'
  903             if fdict.get('abr') is not None:
  904                 res += u'@%3dk' % fdict['abr']
  905             if fdict.get('filesize') is not None:
  906                 if res:
  907                     res += u', '
  908                 res += format_bytes(fdict['filesize'])
  909             return res
  910 
  911         def line(format, idlen=20):
  912             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
  913                 format['format_id'],
  914                 format['ext'],
  915                 self.format_resolution(format),
  916                 format_note(format),
  917             ))
  918 
  919         formats = info_dict.get('formats', [info_dict])
  920         idlen = max(len(u'format code'),
  921                     max(len(f['format_id']) for f in formats))
  922         formats_s = [line(f, idlen) for f in formats]
  923         if len(formats) > 1:
  924             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
  925             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
  926 
  927         header_line = line({
  928             'format_id': u'format code', 'ext': u'extension',
  929             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
  930         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
  931                        (info_dict['id'], header_line, u"\n".join(formats_s)))
  932 
  933     def urlopen(self, req):
  934         """ Start an HTTP download """
  935         return self._opener.open(req)
  936 
  937     def print_debug_header(self):
  938         if not self.params.get('verbose'):
  939             return
  940         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
  941         try:
  942             sp = subprocess.Popen(
  943                 ['git', 'rev-parse', '--short', 'HEAD'],
  944                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
  945                 cwd=os.path.dirname(os.path.abspath(__file__)))
  946             out, err = sp.communicate()
  947             out = out.decode().strip()
  948             if re.match('[0-9a-f]+', out):
  949                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
  950         except:
  951             try:
  952                 sys.exc_clear()
  953             except:
  954                 pass
  955         write_string(u'[debug] Python version %s - %s' %
  956                      (platform.python_version(), platform_name()) + u'\n')
  957 
  958         proxy_map = {}
  959         for handler in self._opener.handlers:
  960             if hasattr(handler, 'proxies'):
  961                 proxy_map.update(handler.proxies)
  962         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
  963 
  964     def _setup_opener(self, timeout=300):
  965         opts_cookiefile = self.params.get('cookiefile')
  966         opts_proxy = self.params.get('proxy')
  967 
  968         if opts_cookiefile is None:
  969             self.cookiejar = compat_cookiejar.CookieJar()
  970         else:
  971             self.cookiejar = compat_cookiejar.MozillaCookieJar(
  972                 opts_cookiefile)
  973             if os.access(opts_cookiefile, os.R_OK):
  974                 self.cookiejar.load()
  975 
  976         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
  977             self.cookiejar)
  978         if opts_proxy is not None:
  979             if opts_proxy == '':
  980                 proxies = {}
  981             else:
  982                 proxies = {'http': opts_proxy, 'https': opts_proxy}
  983         else:
  984             proxies = compat_urllib_request.getproxies()
  985             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
  986             if 'http' in proxies and 'https' not in proxies:
  987                 proxies['https'] = proxies['http']
  988         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
  989         https_handler = make_HTTPS_handler(
  990             self.params.get('nocheckcertificate', False))
  991         opener = compat_urllib_request.build_opener(
  992             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
  993         # Delete the default user-agent header, which would otherwise apply in
  994         # cases where our custom HTTP handler doesn't come into play
  995         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
  996         opener.addheaders = []
  997         self._opener = opener
  998 
  999         # TODO remove this global modification
 1000         compat_urllib_request.install_opener(opener)
 1001         socket.setdefaulttimeout(timeout)