summaryrefslogtreecommitdiff
path: root/youtube_dl/downloader/common.py
blob: 75b8166c514485bad26fa87a90ca08ab330d654f (plain)
    1 from __future__ import division, unicode_literals
    2 
    3 import os
    4 import re
    5 import sys
    6 import time
    7 import random
    8 
    9 from ..compat import compat_os_name
   10 from ..utils import (
   11     decodeArgument,
   12     encodeFilename,
   13     error_to_compat_str,
   14     format_bytes,
   15     shell_quote,
   16     timeconvert,
   17 )
   18 
   19 
   20 class FileDownloader(object):
   21     """File Downloader class.
   22 
   23     File downloader objects are the ones responsible of downloading the
   24     actual video file and writing it to disk.
   25 
   26     File downloaders accept a lot of parameters. In order not to saturate
   27     the object constructor with arguments, it receives a dictionary of
   28     options instead.
   29 
   30     Available options:
   31 
   32     verbose:            Print additional info to stdout.
   33     quiet:              Do not print messages to stdout.
   34     ratelimit:          Download speed limit, in bytes/sec.
   35     retries:            Number of times to retry for HTTP error 5xx
   36     buffersize:         Size of download buffer in bytes.
   37     noresizebuffer:     Do not automatically resize the download buffer.
   38     continuedl:         Try to continue downloads if possible.
   39     noprogress:         Do not print the progress bar.
   40     logtostderr:        Log messages to stderr instead of stdout.
   41     consoletitle:       Display progress in console window's titlebar.
   42     nopart:             Do not use temporary .part files.
   43     updatetime:         Use the Last-modified header to set output file timestamps.
   44     test:               Download only first bytes to test the downloader.
   45     min_filesize:       Skip files smaller than this size
   46     max_filesize:       Skip files larger than this size
   47     xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
   48                         (experimental)
   49     external_downloader_args:  A list of additional command-line arguments for the
   50                         external downloader.
   51     hls_use_mpegts:     Use the mpegts container for HLS videos.
   52 
   53     Subclasses of this one must re-define the real_download method.
   54     """
   55 
   56     _TEST_FILE_SIZE = 10241
   57     params = None
   58 
   59     def __init__(self, ydl, params):
   60         """Create a FileDownloader object with the given options."""
   61         self.ydl = ydl
   62         self._progress_hooks = []
   63         self.params = params
   64         self.add_progress_hook(self.report_progress)
   65 
   66     @staticmethod
   67     def format_seconds(seconds):
   68         (mins, secs) = divmod(seconds, 60)
   69         (hours, mins) = divmod(mins, 60)
   70         if hours > 99:
   71             return '--:--:--'
   72         if hours == 0:
   73             return '%02d:%02d' % (mins, secs)
   74         else:
   75             return '%02d:%02d:%02d' % (hours, mins, secs)
   76 
   77     @staticmethod
   78     def calc_percent(byte_counter, data_len):
   79         if data_len is None:
   80             return None
   81         return float(byte_counter) / float(data_len) * 100.0
   82 
   83     @staticmethod
   84     def format_percent(percent):
   85         if percent is None:
   86             return '---.-%'
   87         return '%6s' % ('%3.1f%%' % percent)
   88 
   89     @staticmethod
   90     def calc_eta(start, now, total, current):
   91         if total is None:
   92             return None
   93         if now is None:
   94             now = time.time()
   95         dif = now - start
   96         if current == 0 or dif < 0.001:  # One millisecond
   97             return None
   98         rate = float(current) / dif
   99         return int((float(total) - float(current)) / rate)
  100 
  101     @staticmethod
  102     def format_eta(eta):
  103         if eta is None:
  104             return '--:--'
  105         return FileDownloader.format_seconds(eta)
  106 
  107     @staticmethod
  108     def calc_speed(start, now, bytes):
  109         dif = now - start
  110         if bytes == 0 or dif < 0.001:  # One millisecond
  111             return None
  112         return float(bytes) / dif
  113 
  114     @staticmethod
  115     def format_speed(speed):
  116         if speed is None:
  117             return '%10s' % '---b/s'
  118         return '%10s' % ('%s/s' % format_bytes(speed))
  119 
  120     @staticmethod
  121     def format_retries(retries):
  122         return 'inf' if retries == float('inf') else '%.0f' % retries
  123 
  124     @staticmethod
  125     def best_block_size(elapsed_time, bytes):
  126         new_min = max(bytes / 2.0, 1.0)
  127         new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB
  128         if elapsed_time < 0.001:
  129             return int(new_max)
  130         rate = bytes / elapsed_time
  131         if rate > new_max:
  132             return int(new_max)
  133         if rate < new_min:
  134             return int(new_min)
  135         return int(rate)
  136 
  137     @staticmethod
  138     def parse_bytes(bytestr):
  139         """Parse a string indicating a byte quantity into an integer."""
  140         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  141         if matchobj is None:
  142             return None
  143         number = float(matchobj.group(1))
  144         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  145         return int(round(number * multiplier))
  146 
  147     def to_screen(self, *args, **kargs):
  148         self.ydl.to_screen(*args, **kargs)
  149 
  150     def to_stderr(self, message):
  151         self.ydl.to_screen(message)
  152 
  153     def to_console_title(self, message):
  154         self.ydl.to_console_title(message)
  155 
  156     def trouble(self, *args, **kargs):
  157         self.ydl.trouble(*args, **kargs)
  158 
  159     def report_warning(self, *args, **kargs):
  160         self.ydl.report_warning(*args, **kargs)
  161 
  162     def report_error(self, *args, **kargs):
  163         self.ydl.report_error(*args, **kargs)
  164 
  165     def slow_down(self, start_time, now, byte_counter):
  166         """Sleep if the download speed is over the rate limit."""
  167         rate_limit = self.params.get('ratelimit')
  168         if rate_limit is None or byte_counter == 0:
  169             return
  170         if now is None:
  171             now = time.time()
  172         elapsed = now - start_time
  173         if elapsed <= 0.0:
  174             return
  175         speed = float(byte_counter) / elapsed
  176         if speed > rate_limit:
  177             time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
  178 
  179     def temp_name(self, filename):
  180         """Returns a temporary filename for the given filename."""
  181         if self.params.get('nopart', False) or filename == '-' or \
  182                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  183             return filename
  184         return filename + '.part'
  185 
  186     def undo_temp_name(self, filename):
  187         if filename.endswith('.part'):
  188             return filename[:-len('.part')]
  189         return filename
  190 
  191     def ytdl_filename(self, filename):
  192         return filename + '.ytdl'
  193 
  194     def try_rename(self, old_filename, new_filename):
  195         try:
  196             if old_filename == new_filename:
  197                 return
  198             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  199         except (IOError, OSError) as err:
  200             self.report_error('unable to rename file: %s' % error_to_compat_str(err))
  201 
  202     def try_utime(self, filename, last_modified_hdr):
  203         """Try to set the last-modified time of the given file."""
  204         if last_modified_hdr is None:
  205             return
  206         if not os.path.isfile(encodeFilename(filename)):
  207             return
  208         timestr = last_modified_hdr
  209         if timestr is None:
  210             return
  211         filetime = timeconvert(timestr)
  212         if filetime is None:
  213             return filetime
  214         # Ignore obviously invalid dates
  215         if filetime == 0:
  216             return
  217         try:
  218             os.utime(filename, (time.time(), filetime))
  219         except Exception:
  220             pass
  221         return filetime
  222 
  223     def report_destination(self, filename):
  224         """Report destination filename."""
  225         self.to_screen('[download] Destination: ' + filename)
  226 
  227     def _report_progress_status(self, msg, is_last_line=False):
  228         fullmsg = '[download] ' + msg
  229         if self.params.get('progress_with_newline', False):
  230             self.to_screen(fullmsg)
  231         else:
  232             if compat_os_name == 'nt':
  233                 prev_len = getattr(self, '_report_progress_prev_line_length',
  234                                    0)
  235                 if prev_len > len(fullmsg):
  236                     fullmsg += ' ' * (prev_len - len(fullmsg))
  237                 self._report_progress_prev_line_length = len(fullmsg)
  238                 clear_line = '\r'
  239             else:
  240                 clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
  241             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  242         self.to_console_title('youtube-dl ' + msg)
  243 
  244     def report_progress(self, s):
  245         if s['status'] == 'finished':
  246             if self.params.get('noprogress', False):
  247                 self.to_screen('[download] Download completed')
  248             else:
  249                 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  250                 if s.get('elapsed') is not None:
  251                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  252                     msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
  253                 else:
  254                     msg_template = '100%% of %(_total_bytes_str)s'
  255                 self._report_progress_status(
  256                     msg_template % s, is_last_line=True)
  257 
  258         if self.params.get('noprogress'):
  259             return
  260 
  261         if s['status'] != 'downloading':
  262             return
  263 
  264         if s.get('eta') is not None:
  265             s['_eta_str'] = self.format_eta(s['eta'])
  266         else:
  267             s['_eta_str'] = 'Unknown ETA'
  268 
  269         if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
  270             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
  271         elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
  272             s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
  273         else:
  274             if s.get('downloaded_bytes') == 0:
  275                 s['_percent_str'] = self.format_percent(0)
  276             else:
  277                 s['_percent_str'] = 'Unknown %'
  278 
  279         if s.get('speed') is not None:
  280             s['_speed_str'] = self.format_speed(s['speed'])
  281         else:
  282             s['_speed_str'] = 'Unknown speed'
  283 
  284         if s.get('total_bytes') is not None:
  285             s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  286             msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
  287         elif s.get('total_bytes_estimate') is not None:
  288             s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
  289             msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
  290         else:
  291             if s.get('downloaded_bytes') is not None:
  292                 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
  293                 if s.get('elapsed'):
  294                     s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  295                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
  296                 else:
  297                     msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
  298             else:
  299                 msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
  300 
  301         self._report_progress_status(msg_template % s)
  302 
  303     def report_resuming_byte(self, resume_len):
  304         """Report attempt to resume at given byte."""
  305         self.to_screen('[download] Resuming download at byte %s' % resume_len)
  306 
  307     def report_retry(self, err, count, retries):
  308         """Report retry in case of HTTP error 5xx"""
  309         self.to_screen(
  310             '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
  311             % (error_to_compat_str(err), count, self.format_retries(retries)))
  312 
  313     def report_file_already_downloaded(self, file_name):
  314         """Report file has already been fully downloaded."""
  315         try:
  316             self.to_screen('[download] %s has already been downloaded' % file_name)
  317         except UnicodeEncodeError:
  318             self.to_screen('[download] The file has already been downloaded')
  319 
  320     def report_unable_to_resume(self):
  321         """Report it was impossible to resume download."""
  322         self.to_screen('[download] Unable to resume')
  323 
  324     def download(self, filename, info_dict):
  325         """Download to a filename using the info from info_dict
  326         Return True on success and False otherwise
  327         """
  328 
  329         nooverwrites_and_exists = (
  330             self.params.get('nooverwrites', False) and
  331             os.path.exists(encodeFilename(filename))
  332         )
  333 
  334         if not hasattr(filename, 'write'):
  335             continuedl_and_exists = (
  336                 self.params.get('continuedl', True) and
  337                 os.path.isfile(encodeFilename(filename)) and
  338                 not self.params.get('nopart', False)
  339             )
  340 
  341             # Check file already present
  342             if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
  343                 self.report_file_already_downloaded(filename)
  344                 self._hook_progress({
  345                     'filename': filename,
  346                     'status': 'finished',
  347                     'total_bytes': os.path.getsize(encodeFilename(filename)),
  348                 })
  349                 return True
  350 
  351         min_sleep_interval = self.params.get('sleep_interval')
  352         if min_sleep_interval:
  353             max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
  354             sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
  355             self.to_screen(
  356                 '[download] Sleeping %s seconds...' % (
  357                     int(sleep_interval) if sleep_interval.is_integer()
  358                     else '%.2f' % sleep_interval))
  359             time.sleep(sleep_interval)
  360 
  361         return self.real_download(filename, info_dict)
  362 
  363     def real_download(self, filename, info_dict):
  364         """Real download process. Redefine in subclasses."""
  365         raise NotImplementedError('This method must be implemented by subclasses')
  366 
  367     def _hook_progress(self, status):
  368         for ph in self._progress_hooks:
  369             ph(status)
  370 
  371     def add_progress_hook(self, ph):
  372         # See YoutubeDl.py (search for progress_hooks) for a description of
  373         # this interface
  374         self._progress_hooks.append(ph)
  375 
  376     def _debug_cmd(self, args, exe=None):
  377         if not self.params.get('verbose', False):
  378             return
  379 
  380         str_args = [decodeArgument(a) for a in args]
  381 
  382         if exe is None:
  383             exe = os.path.basename(str_args[0])
  384 
  385         self.to_screen('[debug] %s command line: %s' % (
  386             exe, shell_quote(str_args)))

Generated by cgit