summaryrefslogtreecommitdiff
path: root/youtube_dl/YoutubeDL.py
blob: e86e8a0907d39c061fb5f8a1def6487182f05056 (plain)
    1 #!/usr/bin/env python
    2 # -*- coding: utf-8 -*-
    3 
    4 from __future__ import absolute_import
    5 
    6 import errno
    7 import io
    8 import json
    9 import os
   10 import platform
   11 import re
   12 import shutil
   13 import subprocess
   14 import socket
   15 import sys
   16 import time
   17 import traceback
   18 
   19 if os.name == 'nt':
   20     import ctypes
   21 
   22 from .utils import (
   23     compat_cookiejar,
   24     compat_http_client,
   25     compat_print,
   26     compat_str,
   27     compat_urllib_error,
   28     compat_urllib_request,
   29     ContentTooShortError,
   30     date_from_str,
   31     DateRange,
   32     determine_ext,
   33     DownloadError,
   34     encodeFilename,
   35     ExtractorError,
   36     format_bytes,
   37     locked_file,
   38     make_HTTPS_handler,
   39     MaxDownloadsReached,
   40     PostProcessingError,
   41     platform_name,
   42     preferredencoding,
   43     SameFileError,
   44     sanitize_filename,
   45     subtitles_filename,
   46     takewhile_inclusive,
   47     UnavailableVideoError,
   48     write_json_file,
   49     write_string,
   50     YoutubeDLHandler,
   51 )
   52 from .extractor import get_info_extractor, gen_extractors
   53 from .FileDownloader import FileDownloader
   54 from .version import __version__
   55 
   56 
   57 class YoutubeDL(object):
   58     """YoutubeDL class.
   59 
   60     YoutubeDL objects are the ones responsible of downloading the
   61     actual video file and writing it to disk if the user has requested
   62     it, among some other tasks. In most cases there should be one per
   63     program. As, given a video URL, the downloader doesn't know how to
   64     extract all the needed information, task that InfoExtractors do, it
   65     has to pass the URL to one of them.
   66 
   67     For this, YoutubeDL objects have a method that allows
   68     InfoExtractors to be registered in a given order. When it is passed
   69     a URL, the YoutubeDL object handles it to the first InfoExtractor it
   70     finds that reports being able to handle it. The InfoExtractor extracts
   71     all the information about the video or videos the URL refers to, and
   72     YoutubeDL process the extracted information, possibly using a File
   73     Downloader to download the video.
   74 
   75     YoutubeDL objects accept a lot of parameters. In order not to saturate
   76     the object constructor with arguments, it receives a dictionary of
   77     options instead. These options are available through the params
   78     attribute for the InfoExtractors to use. The YoutubeDL also
   79     registers itself as the downloader in charge for the InfoExtractors
   80     that are added to it, so this is a "mutual registration".
   81 
   82     Available options:
   83 
   84     username:          Username for authentication purposes.
   85     password:          Password for authentication purposes.
   86     videopassword:     Password for acces a video.
   87     usenetrc:          Use netrc for authentication instead.
   88     verbose:           Print additional info to stdout.
   89     quiet:             Do not print messages to stdout.
   90     forceurl:          Force printing final URL.
   91     forcetitle:        Force printing title.
   92     forceid:           Force printing ID.
   93     forcethumbnail:    Force printing thumbnail URL.
   94     forcedescription:  Force printing description.
   95     forcefilename:     Force printing final filename.
   96     forcejson:         Force printing info_dict as JSON.
   97     simulate:          Do not download the video files.
   98     format:            Video format code.
   99     format_limit:      Highest quality format to try.
  100     outtmpl:           Template for output names.
  101     restrictfilenames: Do not allow "&" and spaces in file names
  102     ignoreerrors:      Do not stop on download errors.
  103     nooverwrites:      Prevent overwriting files.
  104     playliststart:     Playlist item to start at.
  105     playlistend:       Playlist item to end at.
  106     matchtitle:        Download only matching titles.
  107     rejecttitle:       Reject downloads for matching titles.
  108     logger:            Log messages to a logging.Logger instance.
  109     logtostderr:       Log messages to stderr instead of stdout.
  110     writedescription:  Write the video description to a .description file
  111     writeinfojson:     Write the video description to a .info.json file
  112     writeannotations:  Write the video annotations to a .annotations.xml file
  113     writethumbnail:    Write the thumbnail image to a file
  114     writesubtitles:    Write the video subtitles to a file
  115     writeautomaticsub: Write the automatic subtitles to a file
  116     allsubtitles:      Downloads all the subtitles of the video
  117                        (requires writesubtitles or writeautomaticsub)
  118     listsubtitles:     Lists all available subtitles for the video
  119     subtitlesformat:   Subtitle format [srt/sbv/vtt] (default=srt)
  120     subtitleslangs:    List of languages of the subtitles to download
  121     keepvideo:         Keep the video file after post-processing
  122     daterange:         A DateRange object, download only if the upload_date is in the range.
  123     skip_download:     Skip the actual download of the video file
  124     cachedir:          Location of the cache files in the filesystem.
  125                        None to disable filesystem cache.
  126     noplaylist:        Download single video instead of a playlist if in doubt.
  127     age_limit:         An integer representing the user's age in years.
  128                        Unsuitable videos for the given age are skipped.
  129     download_archive:   File name of a file where all downloads are recorded.
  130                        Videos already present in the file are not downloaded
  131                        again.
  132     cookiefile:        File name where cookies should be read from and dumped to.
  133     nocheckcertificate:Do not verify SSL certificates
  134     proxy:             URL of the proxy server to use
  135 
  136     The following parameters are not used by YoutubeDL itself, they are used by
  137     the FileDownloader:
  138     nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
  139     noresizebuffer, retries, continuedl, noprogress, consoletitle
  140     """
  141 
  142     params = None
  143     _ies = []
  144     _pps = []
  145     _download_retcode = None
  146     _num_downloads = None
  147     _screen_file = None
  148 
  149     def __init__(self, params={}):
  150         """Create a FileDownloader object with the given options."""
  151         self._ies = []
  152         self._ies_instances = {}
  153         self._pps = []
  154         self._progress_hooks = []
  155         self._download_retcode = 0
  156         self._num_downloads = 0
  157         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  158 
  159         if (sys.version_info >= (3,) and sys.platform != 'win32' and
  160                 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
  161                 and not params['restrictfilenames']):
  162             # On Python 3, the Unicode filesystem API will throw errors (#1474)
  163             self.report_warning(
  164                 u'Assuming --restrict-filenames since file system encoding '
  165                 u'cannot encode all charactes. '
  166                 u'Set the LC_ALL environment variable to fix this.')
  167             params['restrictfilenames'] = True
  168 
  169         self.params = params
  170         self.fd = FileDownloader(self, self.params)
  171 
  172         if '%(stitle)s' in self.params.get('outtmpl', ''):
  173             self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  174 
  175         self._setup_opener()
  176 
  177     def add_info_extractor(self, ie):
  178         """Add an InfoExtractor object to the end of the list."""
  179         self._ies.append(ie)
  180         self._ies_instances[ie.ie_key()] = ie
  181         ie.set_downloader(self)
  182 
  183     def get_info_extractor(self, ie_key):
  184         """
  185         Get an instance of an IE with name ie_key, it will try to get one from
  186         the _ies list, if there's no instance it will create a new one and add
  187         it to the extractor list.
  188         """
  189         ie = self._ies_instances.get(ie_key)
  190         if ie is None:
  191             ie = get_info_extractor(ie_key)()
  192             self.add_info_extractor(ie)
  193         return ie
  194 
  195     def add_default_info_extractors(self):
  196         """
  197         Add the InfoExtractors returned by gen_extractors to the end of the list
  198         """
  199         for ie in gen_extractors():
  200             self.add_info_extractor(ie)
  201 
  202     def add_post_processor(self, pp):
  203         """Add a PostProcessor object to the end of the chain."""
  204         self._pps.append(pp)
  205         pp.set_downloader(self)
  206 
  207     def to_screen(self, message, skip_eol=False):
  208         """Print message to stdout if not in quiet mode."""
  209         if self.params.get('logger'):
  210             self.params['logger'].debug(message)
  211         elif not self.params.get('quiet', False):
  212             terminator = [u'\n', u''][skip_eol]
  213             output = message + terminator
  214             write_string(output, self._screen_file)
  215 
  216     def to_stderr(self, message):
  217         """Print message to stderr."""
  218         assert type(message) == type(u'')
  219         if self.params.get('logger'):
  220             self.params['logger'].error(message)
  221         else:
  222             output = message + u'\n'
  223             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
  224                 output = output.encode(preferredencoding())
  225             sys.stderr.write(output)
  226 
  227     def to_console_title(self, message):
  228         if not self.params.get('consoletitle', False):
  229             return
  230         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
  231             # c_wchar_p() might not be necessary if `message` is
  232             # already of type unicode()
  233             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
  234         elif 'TERM' in os.environ:
  235             write_string(u'\033]0;%s\007' % message, self._screen_file)
  236 
  237     def save_console_title(self):
  238         if not self.params.get('consoletitle', False):
  239             return
  240         if 'TERM' in os.environ:
  241             # Save the title on stack
  242             write_string(u'\033[22;0t', self._screen_file)
  243 
  244     def restore_console_title(self):
  245         if not self.params.get('consoletitle', False):
  246             return
  247         if 'TERM' in os.environ:
  248             # Restore the title from stack
  249             write_string(u'\033[23;0t', self._screen_file)
  250 
  251     def __enter__(self):
  252         self.save_console_title()
  253         return self
  254 
  255     def __exit__(self, *args):
  256         self.restore_console_title()
  257     
  258         if self.params.get('cookiefile') is not None:
  259             self.cookiejar.save()
  260 
  261     def trouble(self, message=None, tb=None):
  262         """Determine action to take when a download problem appears.
  263 
  264         Depending on if the downloader has been configured to ignore
  265         download errors or not, this method may throw an exception or
  266         not when errors are found, after printing the message.
  267 
  268         tb, if given, is additional traceback information.
  269         """
  270         if message is not None:
  271             self.to_stderr(message)
  272         if self.params.get('verbose'):
  273             if tb is None:
  274                 if sys.exc_info()[0]:  # if .trouble has been called from an except block
  275                     tb = u''
  276                     if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  277                         tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
  278                     tb += compat_str(traceback.format_exc())
  279                 else:
  280                     tb_data = traceback.format_list(traceback.extract_stack())
  281                     tb = u''.join(tb_data)
  282             self.to_stderr(tb)
  283         if not self.params.get('ignoreerrors', False):
  284             if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
  285                 exc_info = sys.exc_info()[1].exc_info
  286             else:
  287                 exc_info = sys.exc_info()
  288             raise DownloadError(message, exc_info)
  289         self._download_retcode = 1
  290 
  291     def report_warning(self, message):
  292         '''
  293         Print the message to stderr, it will be prefixed with 'WARNING:'
  294         If stderr is a tty file the 'WARNING:' will be colored
  295         '''
  296         if sys.stderr.isatty() and os.name != 'nt':
  297             _msg_header = u'\033[0;33mWARNING:\033[0m'
  298         else:
  299             _msg_header = u'WARNING:'
  300         warning_message = u'%s %s' % (_msg_header, message)
  301         self.to_stderr(warning_message)
  302 
  303     def report_error(self, message, tb=None):
  304         '''
  305         Do the same as trouble, but prefixes the message with 'ERROR:', colored
  306         in red if stderr is a tty file.
  307         '''
  308         if sys.stderr.isatty() and os.name != 'nt':
  309             _msg_header = u'\033[0;31mERROR:\033[0m'
  310         else:
  311             _msg_header = u'ERROR:'
  312         error_message = u'%s %s' % (_msg_header, message)
  313         self.trouble(error_message, tb)
  314 
  315     def report_writedescription(self, descfn):
  316         """ Report that the description file is being written """
  317         self.to_screen(u'[info] Writing video description to: ' + descfn)
  318 
  319     def report_writesubtitles(self, sub_filename):
  320         """ Report that the subtitles file is being written """
  321         self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
  322 
  323     def report_writeinfojson(self, infofn):
  324         """ Report that the metadata file has been written """
  325         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
  326 
  327     def report_writeannotations(self, annofn):
  328         """ Report that the annotations file has been written. """
  329         self.to_screen(u'[info] Writing video annotations to: ' + annofn)
  330 
  331     def report_file_already_downloaded(self, file_name):
  332         """Report file has already been fully downloaded."""
  333         try:
  334             self.to_screen(u'[download] %s has already been downloaded' % file_name)
  335         except UnicodeEncodeError:
  336             self.to_screen(u'[download] The file has already been downloaded')
  337 
  338     def increment_downloads(self):
  339         """Increment the ordinal that assigns a number to each file."""
  340         self._num_downloads += 1
  341 
  342     def prepare_filename(self, info_dict):
  343         """Generate the output filename."""
  344         try:
  345             template_dict = dict(info_dict)
  346 
  347             template_dict['epoch'] = int(time.time())
  348             autonumber_size = self.params.get('autonumber_size')
  349             if autonumber_size is None:
  350                 autonumber_size = 5
  351             autonumber_templ = u'%0' + str(autonumber_size) + u'd'
  352             template_dict['autonumber'] = autonumber_templ % self._num_downloads
  353             if template_dict.get('playlist_index') is not None:
  354                 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  355 
  356             sanitize = lambda k, v: sanitize_filename(
  357                 u'NA' if v is None else compat_str(v),
  358                 restricted=self.params.get('restrictfilenames'),
  359                 is_id=(k == u'id'))
  360             template_dict = dict((k, sanitize(k, v))
  361                                  for k, v in template_dict.items())
  362 
  363             tmpl = os.path.expanduser(self.params['outtmpl'])
  364             filename = tmpl % template_dict
  365             return filename
  366         except KeyError as err:
  367             self.report_error(u'Erroneous output template')
  368             return None
  369         except ValueError as err:
  370             self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
  371             return None
  372 
  373     def _match_entry(self, info_dict):
  374         """ Returns None iff the file should be downloaded """
  375 
  376         if 'title' in info_dict:
  377             # This can happen when we're just evaluating the playlist
  378             title = info_dict['title']
  379             matchtitle = self.params.get('matchtitle', False)
  380             if matchtitle:
  381                 if not re.search(matchtitle, title, re.IGNORECASE):
  382                     return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
  383             rejecttitle = self.params.get('rejecttitle', False)
  384             if rejecttitle:
  385                 if re.search(rejecttitle, title, re.IGNORECASE):
  386                     return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
  387         date = info_dict.get('upload_date', None)
  388         if date is not None:
  389             dateRange = self.params.get('daterange', DateRange())
  390             if date not in dateRange:
  391                 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
  392         age_limit = self.params.get('age_limit')
  393         if age_limit is not None:
  394             if age_limit < info_dict.get('age_limit', 0):
  395                 return u'Skipping "' + title + '" because it is age restricted'
  396         if self.in_download_archive(info_dict):
  397             return (u'%s has already been recorded in archive'
  398                     % info_dict.get('title', info_dict.get('id', u'video')))
  399         return None
  400 
  401     @staticmethod
  402     def add_extra_info(info_dict, extra_info):
  403         '''Set the keys from extra_info in info dict if they are missing'''
  404         for key, value in extra_info.items():
  405             info_dict.setdefault(key, value)
  406 
  407     def extract_info(self, url, download=True, ie_key=None, extra_info={}):
  408         '''
  409         Returns a list with a dictionary for each video we find.
  410         If 'download', also downloads the videos.
  411         extra_info is a dict containing the extra values to add to each result
  412          '''
  413 
  414         if ie_key:
  415             ies = [self.get_info_extractor(ie_key)]
  416         else:
  417             ies = self._ies
  418 
  419         for ie in ies:
  420             if not ie.suitable(url):
  421                 continue
  422 
  423             if not ie.working():
  424                 self.report_warning(u'The program functionality for this site has been marked as broken, '
  425                                     u'and will probably not work.')
  426 
  427             try:
  428                 ie_result = ie.extract(url)
  429                 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
  430                     break
  431                 if isinstance(ie_result, list):
  432                     # Backwards compatibility: old IE result format
  433                     ie_result = {
  434                         '_type': 'compat_list',
  435                         'entries': ie_result,
  436                     }
  437                 self.add_extra_info(ie_result,
  438                     {
  439                         'extractor': ie.IE_NAME,
  440                         'webpage_url': url,
  441                         'extractor_key': ie.ie_key(),
  442                     })
  443                 return self.process_ie_result(ie_result, download, extra_info)
  444             except ExtractorError as de: # An error we somewhat expected
  445                 self.report_error(compat_str(de), de.format_traceback())
  446                 break
  447             except Exception as e:
  448                 if self.params.get('ignoreerrors', False):
  449                     self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
  450                     break
  451                 else:
  452                     raise
  453         else:
  454             self.report_error(u'no suitable InfoExtractor: %s' % url)
  455 
  456     def process_ie_result(self, ie_result, download=True, extra_info={}):
  457         """
  458         Take the result of the ie(may be modified) and resolve all unresolved
  459         references (URLs, playlist items).
  460 
  461         It will also download the videos if 'download'.
  462         Returns the resolved ie_result.
  463         """
  464 
  465         result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
  466         if result_type == 'video':
  467             self.add_extra_info(ie_result, extra_info)
  468             return self.process_video_result(ie_result, download=download)
  469         elif result_type == 'url':
  470             # We have to add extra_info to the results because it may be
  471             # contained in a playlist
  472             return self.extract_info(ie_result['url'],
  473                                      download,
  474                                      ie_key=ie_result.get('ie_key'),
  475                                      extra_info=extra_info)
  476         elif result_type == 'playlist':
  477 
  478             # We process each entry in the playlist
  479             playlist = ie_result.get('title', None) or ie_result.get('id', None)
  480             self.to_screen(u'[download] Downloading playlist: %s' % playlist)
  481 
  482             playlist_results = []
  483 
  484             n_all_entries = len(ie_result['entries'])
  485             playliststart = self.params.get('playliststart', 1) - 1
  486             playlistend = self.params.get('playlistend', -1)
  487 
  488             if playlistend == -1:
  489                 entries = ie_result['entries'][playliststart:]
  490             else:
  491                 entries = ie_result['entries'][playliststart:playlistend]
  492 
  493             n_entries = len(entries)
  494 
  495             self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
  496                 (ie_result['extractor'], playlist, n_all_entries, n_entries))
  497 
  498             for i, entry in enumerate(entries, 1):
  499                 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
  500                 extra = {
  501                     'playlist': playlist,
  502                     'playlist_index': i + playliststart,
  503                     'extractor': ie_result['extractor'],
  504                     'webpage_url': ie_result['webpage_url'],
  505                     'extractor_key': ie_result['extractor_key'],
  506                 }
  507 
  508                 reason = self._match_entry(entry)
  509                 if reason is not None:
  510                     self.to_screen(u'[download] ' + reason)
  511                     continue
  512 
  513                 entry_result = self.process_ie_result(entry,
  514                                                       download=download,
  515                                                       extra_info=extra)
  516                 playlist_results.append(entry_result)
  517             ie_result['entries'] = playlist_results
  518             return ie_result
  519         elif result_type == 'compat_list':
  520             def _fixup(r):
  521                 self.add_extra_info(r,
  522                     {
  523                         'extractor': ie_result['extractor'],
  524                         'webpage_url': ie_result['webpage_url'],
  525                         'extractor_key': ie_result['extractor_key'],
  526                     })
  527                 return r
  528             ie_result['entries'] = [
  529                 self.process_ie_result(_fixup(r), download, extra_info)
  530                 for r in ie_result['entries']
  531             ]
  532             return ie_result
  533         else:
  534             raise Exception('Invalid result type: %s' % result_type)
  535 
  536     def select_format(self, format_spec, available_formats):
  537         if format_spec == 'best' or format_spec is None:
  538             return available_formats[-1]
  539         elif format_spec == 'worst':
  540             return available_formats[0]
  541         else:
  542             extensions = [u'mp4', u'flv', u'webm', u'3gp']
  543             if format_spec in extensions:
  544                 filter_f = lambda f: f['ext'] == format_spec
  545             else:
  546                 filter_f = lambda f: f['format_id'] == format_spec
  547             matches = list(filter(filter_f, available_formats))
  548             if matches:
  549                 return matches[-1]
  550         return None
  551 
  552     def process_video_result(self, info_dict, download=True):
  553         assert info_dict.get('_type', 'video') == 'video'
  554 
  555         if 'playlist' not in info_dict:
  556             # It isn't part of a playlist
  557             info_dict['playlist'] = None
  558             info_dict['playlist_index'] = None
  559 
  560         # This extractors handle format selection themselves
  561         if info_dict['extractor'] in [u'youtube', u'Youku']:
  562             if download:
  563                 self.process_info(info_dict)
  564             return info_dict
  565 
  566         # We now pick which formats have to be downloaded
  567         if info_dict.get('formats') is None:
  568             # There's only one format available
  569             formats = [info_dict]
  570         else:
  571             formats = info_dict['formats']
  572 
  573         # We check that all the formats have the format and format_id fields
  574         for (i, format) in enumerate(formats):
  575             if format.get('format_id') is None:
  576                 format['format_id'] = compat_str(i)
  577             if format.get('format') is None:
  578                 format['format'] = u'{id} - {res}{note}'.format(
  579                     id=format['format_id'],
  580                     res=self.format_resolution(format),
  581                     note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
  582                 )
  583             # Automatically determine file extension if missing
  584             if 'ext' not in format:
  585                 format['ext'] = determine_ext(format['url'])
  586 
  587         if self.params.get('listformats', None):
  588             self.list_formats(info_dict)
  589             return
  590 
  591         format_limit = self.params.get('format_limit', None)
  592         if format_limit:
  593             formats = list(takewhile_inclusive(
  594                 lambda f: f['format_id'] != format_limit, formats
  595             ))
  596         if self.params.get('prefer_free_formats'):
  597             def _free_formats_key(f):
  598                 try:
  599                     ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
  600                 except ValueError:
  601                     ext_ord = -1
  602                 # We only compare the extension if they have the same height and width
  603                 return (f.get('height'), f.get('width'), ext_ord)
  604             formats = sorted(formats, key=_free_formats_key)
  605 
  606         req_format = self.params.get('format', 'best')
  607         if req_format is None:
  608             req_format = 'best'
  609         formats_to_download = []
  610         # The -1 is for supporting YoutubeIE
  611         if req_format in ('-1', 'all'):
  612             formats_to_download = formats
  613         else:
  614             # We can accept formats requestd in the format: 34/5/best, we pick
  615             # the first that is available, starting from left
  616             req_formats = req_format.split('/')
  617             for rf in req_formats:
  618                 selected_format = self.select_format(rf, formats)
  619                 if selected_format is not None:
  620                     formats_to_download = [selected_format]
  621                     break
  622         if not formats_to_download:
  623             raise ExtractorError(u'requested format not available',
  624                                  expected=True)
  625 
  626         if download:
  627             if len(formats_to_download) > 1:
  628                 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
  629             for format in formats_to_download:
  630                 new_info = dict(info_dict)
  631                 new_info.update(format)
  632                 self.process_info(new_info)
  633         # We update the info dict with the best quality format (backwards compatibility)
  634         info_dict.update(formats_to_download[-1])
  635         return info_dict
  636 
  637     def process_info(self, info_dict):
  638         """Process a single resolved IE result."""
  639 
  640         assert info_dict.get('_type', 'video') == 'video'
  641         #We increment the download the download count here to match the previous behaviour.
  642         self.increment_downloads()
  643 
  644         info_dict['fulltitle'] = info_dict['title']
  645         if len(info_dict['title']) > 200:
  646             info_dict['title'] = info_dict['title'][:197] + u'...'
  647 
  648         # Keep for backwards compatibility
  649         info_dict['stitle'] = info_dict['title']
  650 
  651         if not 'format' in info_dict:
  652             info_dict['format'] = info_dict['ext']
  653 
  654         reason = self._match_entry(info_dict)
  655         if reason is not None:
  656             self.to_screen(u'[download] ' + reason)
  657             return
  658 
  659         max_downloads = self.params.get('max_downloads')
  660         if max_downloads is not None:
  661             if self._num_downloads > int(max_downloads):
  662                 raise MaxDownloadsReached()
  663 
  664         filename = self.prepare_filename(info_dict)
  665 
  666         # Forced printings
  667         if self.params.get('forcetitle', False):
  668             compat_print(info_dict['fulltitle'])
  669         if self.params.get('forceid', False):
  670             compat_print(info_dict['id'])
  671         if self.params.get('forceurl', False):
  672             # For RTMP URLs, also include the playpath
  673             compat_print(info_dict['url'] + info_dict.get('play_path', u''))
  674         if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
  675             compat_print(info_dict['thumbnail'])
  676         if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
  677             compat_print(info_dict['description'])
  678         if self.params.get('forcefilename', False) and filename is not None:
  679             compat_print(filename)
  680         if self.params.get('forceformat', False):
  681             compat_print(info_dict['format'])
  682         if self.params.get('forcejson', False):
  683             compat_print(json.dumps(info_dict))
  684 
  685         # Do nothing else if in simulate mode
  686         if self.params.get('simulate', False):
  687             return
  688 
  689         if filename is None:
  690             return
  691 
  692         try:
  693             dn = os.path.dirname(encodeFilename(filename))
  694             if dn != '' and not os.path.exists(dn):
  695                 os.makedirs(dn)
  696         except (OSError, IOError) as err:
  697             self.report_error(u'unable to create directory ' + compat_str(err))
  698             return
  699 
  700         if self.params.get('writedescription', False):
  701             try:
  702                 descfn = filename + u'.description'
  703                 self.report_writedescription(descfn)
  704                 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
  705                     descfile.write(info_dict['description'])
  706             except (KeyError, TypeError):
  707                 self.report_warning(u'There\'s no description to write.')
  708             except (OSError, IOError):
  709                 self.report_error(u'Cannot write description file ' + descfn)
  710                 return
  711 
  712         if self.params.get('writeannotations', False):
  713             try:
  714                 annofn = filename + u'.annotations.xml'
  715                 self.report_writeannotations(annofn)
  716                 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
  717                     annofile.write(info_dict['annotations'])
  718             except (KeyError, TypeError):
  719                 self.report_warning(u'There are no annotations to write.')
  720             except (OSError, IOError):
  721                 self.report_error(u'Cannot write annotations file: ' + annofn)
  722                 return
  723 
  724         subtitles_are_requested = any([self.params.get('writesubtitles', False),
  725                                        self.params.get('writeautomaticsub')])
  726 
  727         if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
  728             # subtitles download errors are already managed as troubles in relevant IE
  729             # that way it will silently go on when used with unsupporting IE
  730             subtitles = info_dict['subtitles']
  731             sub_format = self.params.get('subtitlesformat', 'srt')
  732             for sub_lang in subtitles.keys():
  733                 sub = subtitles[sub_lang]
  734                 if sub is None:
  735                     continue
  736                 try:
  737                     sub_filename = subtitles_filename(filename, sub_lang, sub_format)
  738                     self.report_writesubtitles(sub_filename)
  739                     with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
  740                             subfile.write(sub)
  741                 except (OSError, IOError):
  742                     self.report_error(u'Cannot write subtitles file ' + descfn)
  743                     return
  744 
  745         if self.params.get('writeinfojson', False):
  746             infofn = os.path.splitext(filename)[0] + u'.info.json'
  747             self.report_writeinfojson(infofn)
  748             try:
  749                 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
  750                 write_json_file(json_info_dict, encodeFilename(infofn))
  751             except (OSError, IOError):
  752                 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
  753                 return
  754 
  755         if self.params.get('writethumbnail', False):
  756             if info_dict.get('thumbnail') is not None:
  757                 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
  758                 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
  759                 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
  760                                (info_dict['extractor'], info_dict['id']))
  761                 try:
  762                     uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
  763                     with open(thumb_filename, 'wb') as thumbf:
  764                         shutil.copyfileobj(uf, thumbf)
  765                     self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
  766                         (info_dict['extractor'], info_dict['id'], thumb_filename))
  767                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  768                     self.report_warning(u'Unable to download thumbnail "%s": %s' %
  769                         (info_dict['thumbnail'], compat_str(err)))
  770 
  771         if not self.params.get('skip_download', False):
  772             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
  773                 success = True
  774             else:
  775                 try:
  776                     success = self.fd._do_download(filename, info_dict)
  777                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
  778                     self.report_error(u'unable to download video data: %s' % str(err))
  779                     return
  780                 except (OSError, IOError) as err:
  781                     raise UnavailableVideoError(err)
  782                 except (ContentTooShortError, ) as err:
  783                     self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
  784                     return
  785 
  786             if success:
  787                 try:
  788                     self.post_process(filename, info_dict)
  789                 except (PostProcessingError) as err:
  790                     self.report_error(u'postprocessing: %s' % str(err))
  791                     return
  792 
  793         self.record_download_archive(info_dict)
  794 
  795     def download(self, url_list):
  796         """Download a given list of URLs."""
  797         if (len(url_list) > 1 and
  798                 '%' not in self.params['outtmpl']
  799                 and self.params.get('max_downloads') != 1):
  800             raise SameFileError(self.params['outtmpl'])
  801 
  802         for url in url_list:
  803             try:
  804                 #It also downloads the videos
  805                 self.extract_info(url)
  806             except UnavailableVideoError:
  807                 self.report_error(u'unable to download video')
  808             except MaxDownloadsReached:
  809                 self.to_screen(u'[info] Maximum number of downloaded files reached.')
  810                 raise
  811 
  812         return self._download_retcode
  813 
  814     def post_process(self, filename, ie_info):
  815         """Run all the postprocessors on the given file."""
  816         info = dict(ie_info)
  817         info['filepath'] = filename
  818         keep_video = None
  819         for pp in self._pps:
  820             try:
  821                 keep_video_wish, new_info = pp.run(info)
  822                 if keep_video_wish is not None:
  823                     if keep_video_wish:
  824                         keep_video = keep_video_wish
  825                     elif keep_video is None:
  826                         # No clear decision yet, let IE decide
  827                         keep_video = keep_video_wish
  828             except PostProcessingError as e:
  829                 self.report_error(e.msg)
  830         if keep_video is False and not self.params.get('keepvideo', False):
  831             try:
  832                 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
  833                 os.remove(encodeFilename(filename))
  834             except (IOError, OSError):
  835                 self.report_warning(u'Unable to remove downloaded video file')
  836 
  837     def _make_archive_id(self, info_dict):
  838         # Future-proof against any change in case
  839         # and backwards compatibility with prior versions
  840         extractor = info_dict.get('extractor_key')
  841         if extractor is None:
  842             if 'id' in info_dict:
  843                 extractor = info_dict.get('ie_key')  # key in a playlist
  844         if extractor is None:
  845             return None  # Incomplete video information
  846         return extractor.lower() + u' ' + info_dict['id']
  847 
  848     def in_download_archive(self, info_dict):
  849         fn = self.params.get('download_archive')
  850         if fn is None:
  851             return False
  852 
  853         vid_id = self._make_archive_id(info_dict)
  854         if vid_id is None:
  855             return False  # Incomplete video information
  856 
  857         try:
  858             with locked_file(fn, 'r', encoding='utf-8') as archive_file:
  859                 for line in archive_file:
  860                     if line.strip() == vid_id:
  861                         return True
  862         except IOError as ioe:
  863             if ioe.errno != errno.ENOENT:
  864                 raise
  865         return False
  866 
  867     def record_download_archive(self, info_dict):
  868         fn = self.params.get('download_archive')
  869         if fn is None:
  870             return
  871         vid_id = self._make_archive_id(info_dict)
  872         assert vid_id
  873         with locked_file(fn, 'a', encoding='utf-8') as archive_file:
  874             archive_file.write(vid_id + u'\n')
  875 
  876     @staticmethod
  877     def format_resolution(format, default='unknown'):
  878         if format.get('vcodec') == 'none':
  879             return 'audio only'
  880         if format.get('_resolution') is not None:
  881             return format['_resolution']
  882         if format.get('height') is not None:
  883             if format.get('width') is not None:
  884                 res = u'%sx%s' % (format['width'], format['height'])
  885             else:
  886                 res = u'%sp' % format['height']
  887         else:
  888             res = default
  889         return res
  890 
  891     def list_formats(self, info_dict):
  892         def format_note(fdict):
  893             res = u''
  894             if fdict.get('format_note') is not None:
  895                 res += fdict['format_note'] + u' '
  896             if (fdict.get('vcodec') is not None and
  897                     fdict.get('vcodec') != 'none'):
  898                 res += u'%-5s' % fdict['vcodec']
  899             elif fdict.get('vbr') is not None:
  900                 res += u'video'
  901             if fdict.get('vbr') is not None:
  902                 res += u'@%4dk' % fdict['vbr']
  903             if fdict.get('acodec') is not None:
  904                 if res:
  905                     res += u', '
  906                 res += u'%-5s' % fdict['acodec']
  907             elif fdict.get('abr') is not None:
  908                 if res:
  909                     res += u', '
  910                 res += 'audio'
  911             if fdict.get('abr') is not None:
  912                 res += u'@%3dk' % fdict['abr']
  913             if fdict.get('filesize') is not None:
  914                 if res:
  915                     res += u', '
  916                 res += format_bytes(fdict['filesize'])
  917             return res
  918 
  919         def line(format, idlen=20):
  920             return ((u'%-' + compat_str(idlen + 1) + u's%-10s%-12s%s') % (
  921                 format['format_id'],
  922                 format['ext'],
  923                 self.format_resolution(format),
  924                 format_note(format),
  925             ))
  926 
  927         formats = info_dict.get('formats', [info_dict])
  928         idlen = max(len(u'format code'),
  929                     max(len(f['format_id']) for f in formats))
  930         formats_s = [line(f, idlen) for f in formats]
  931         if len(formats) > 1:
  932             formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
  933             formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
  934 
  935         header_line = line({
  936             'format_id': u'format code', 'ext': u'extension',
  937             '_resolution': u'resolution', 'format_note': u'note'}, idlen=idlen)
  938         self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
  939                        (info_dict['id'], header_line, u"\n".join(formats_s)))
  940 
  941     def urlopen(self, req):
  942         """ Start an HTTP download """
  943         return self._opener.open(req)
  944 
  945     def print_debug_header(self):
  946         if not self.params.get('verbose'):
  947             return
  948         write_string(u'[debug] youtube-dl version ' + __version__ + u'\n')
  949         try:
  950             sp = subprocess.Popen(
  951                 ['git', 'rev-parse', '--short', 'HEAD'],
  952                 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
  953                 cwd=os.path.dirname(os.path.abspath(__file__)))
  954             out, err = sp.communicate()
  955             out = out.decode().strip()
  956             if re.match('[0-9a-f]+', out):
  957                 write_string(u'[debug] Git HEAD: ' + out + u'\n')
  958         except:
  959             try:
  960                 sys.exc_clear()
  961             except:
  962                 pass
  963         write_string(u'[debug] Python version %s - %s' %
  964                      (platform.python_version(), platform_name()) + u'\n')
  965 
  966         proxy_map = {}
  967         for handler in self._opener.handlers:
  968             if hasattr(handler, 'proxies'):
  969                 proxy_map.update(handler.proxies)
  970         write_string(u'[debug] Proxy map: ' + compat_str(proxy_map) + u'\n')
  971 
  972     def _setup_opener(self, timeout=20):
  973         opts_cookiefile = self.params.get('cookiefile')
  974         opts_proxy = self.params.get('proxy')
  975 
  976         if opts_cookiefile is None:
  977             self.cookiejar = compat_cookiejar.CookieJar()
  978         else:
  979             self.cookiejar = compat_cookiejar.MozillaCookieJar(
  980                 opts_cookiefile)
  981             if os.access(opts_cookiefile, os.R_OK):
  982                 self.cookiejar.load()
  983 
  984         cookie_processor = compat_urllib_request.HTTPCookieProcessor(
  985             self.cookiejar)
  986         if opts_proxy is not None:
  987             if opts_proxy == '':
  988                 proxies = {}
  989             else:
  990                 proxies = {'http': opts_proxy, 'https': opts_proxy}
  991         else:
  992             proxies = compat_urllib_request.getproxies()
  993             # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
  994             if 'http' in proxies and 'https' not in proxies:
  995                 proxies['https'] = proxies['http']
  996         proxy_handler = compat_urllib_request.ProxyHandler(proxies)
  997         https_handler = make_HTTPS_handler(
  998             self.params.get('nocheckcertificate', False))
  999         opener = compat_urllib_request.build_opener(
 1000             https_handler, proxy_handler, cookie_processor, YoutubeDLHandler())
 1001         # Delete the default user-agent header, which would otherwise apply in
 1002         # cases where our custom HTTP handler doesn't come into play
 1003         # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
 1004         opener.addheaders = []
 1005         self._opener = opener
 1006 
 1007         # TODO remove this global modification
 1008         compat_urllib_request.install_opener(opener)
 1009         socket.setdefaulttimeout(timeout)

Generated by cgit