summaryrefslogtreecommitdiff
path: root/youtube_dl/utils.py
blob: e3c3ccff904fce867d46e34b01d4168f8548899d (plain)
    1 #!/usr/bin/env python
    2 # coding: utf-8
    3 
    4 from __future__ import unicode_literals
    5 
    6 import base64
    7 import binascii
    8 import calendar
    9 import codecs
   10 import collections
   11 import contextlib
   12 import ctypes
   13 import datetime
   14 import email.utils
   15 import email.header
   16 import errno
   17 import functools
   18 import gzip
   19 import io
   20 import itertools
   21 import json
   22 import locale
   23 import math
   24 import operator
   25 import os
   26 import platform
   27 import random
   28 import re
   29 import socket
   30 import ssl
   31 import subprocess
   32 import sys
   33 import tempfile
   34 import time
   35 import traceback
   36 import unicodedata
   37 import xml.etree.ElementTree
   38 import zlib
   39 
   40 from .compat import (
   41     compat_HTMLParseError,
   42     compat_HTMLParser,
   43     compat_HTTPError,
   44     compat_basestring,
   45     compat_chr,
   46     compat_collections_abc,
   47     compat_cookiejar,
   48     compat_ctypes_WINFUNCTYPE,
   49     compat_etree_fromstring,
   50     compat_expanduser,
   51     compat_html_entities,
   52     compat_html_entities_html5,
   53     compat_http_client,
   54     compat_integer_types,
   55     compat_kwargs,
   56     compat_os_name,
   57     compat_parse_qs,
   58     compat_shlex_quote,
   59     compat_str,
   60     compat_struct_pack,
   61     compat_struct_unpack,
   62     compat_urllib_error,
   63     compat_urllib_parse,
   64     compat_urllib_parse_urlencode,
   65     compat_urllib_parse_urlparse,
   66     compat_urllib_parse_unquote_plus,
   67     compat_urllib_request,
   68     compat_urlparse,
   69     compat_xpath,
   70 )
   71 
   72 from .socks import (
   73     ProxyType,
   74     sockssocket,
   75 )
   76 
   77 
   78 def register_socks_protocols():
   79     # "Register" SOCKS protocols
   80     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
   81     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
   82     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
   83         if scheme not in compat_urlparse.uses_netloc:
   84             compat_urlparse.uses_netloc.append(scheme)
   85 
   86 
   87 # This is not clearly defined otherwise
   88 compiled_regex_type = type(re.compile(''))
   89 
   90 
   91 def random_user_agent():
   92     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
   93     _CHROME_VERSIONS = (
   94         '74.0.3729.129',
   95         '76.0.3780.3',
   96         '76.0.3780.2',
   97         '74.0.3729.128',
   98         '76.0.3780.1',
   99         '76.0.3780.0',
  100         '75.0.3770.15',
  101         '74.0.3729.127',
  102         '74.0.3729.126',
  103         '76.0.3779.1',
  104         '76.0.3779.0',
  105         '75.0.3770.14',
  106         '74.0.3729.125',
  107         '76.0.3778.1',
  108         '76.0.3778.0',
  109         '75.0.3770.13',
  110         '74.0.3729.124',
  111         '74.0.3729.123',
  112         '73.0.3683.121',
  113         '76.0.3777.1',
  114         '76.0.3777.0',
  115         '75.0.3770.12',
  116         '74.0.3729.122',
  117         '76.0.3776.4',
  118         '75.0.3770.11',
  119         '74.0.3729.121',
  120         '76.0.3776.3',
  121         '76.0.3776.2',
  122         '73.0.3683.120',
  123         '74.0.3729.120',
  124         '74.0.3729.119',
  125         '74.0.3729.118',
  126         '76.0.3776.1',
  127         '76.0.3776.0',
  128         '76.0.3775.5',
  129         '75.0.3770.10',
  130         '74.0.3729.117',
  131         '76.0.3775.4',
  132         '76.0.3775.3',
  133         '74.0.3729.116',
  134         '75.0.3770.9',
  135         '76.0.3775.2',
  136         '76.0.3775.1',
  137         '76.0.3775.0',
  138         '75.0.3770.8',
  139         '74.0.3729.115',
  140         '74.0.3729.114',
  141         '76.0.3774.1',
  142         '76.0.3774.0',
  143         '75.0.3770.7',
  144         '74.0.3729.113',
  145         '74.0.3729.112',
  146         '74.0.3729.111',
  147         '76.0.3773.1',
  148         '76.0.3773.0',
  149         '75.0.3770.6',
  150         '74.0.3729.110',
  151         '74.0.3729.109',
  152         '76.0.3772.1',
  153         '76.0.3772.0',
  154         '75.0.3770.5',
  155         '74.0.3729.108',
  156         '74.0.3729.107',
  157         '76.0.3771.1',
  158         '76.0.3771.0',
  159         '75.0.3770.4',
  160         '74.0.3729.106',
  161         '74.0.3729.105',
  162         '75.0.3770.3',
  163         '74.0.3729.104',
  164         '74.0.3729.103',
  165         '74.0.3729.102',
  166         '75.0.3770.2',
  167         '74.0.3729.101',
  168         '75.0.3770.1',
  169         '75.0.3770.0',
  170         '74.0.3729.100',
  171         '75.0.3769.5',
  172         '75.0.3769.4',
  173         '74.0.3729.99',
  174         '75.0.3769.3',
  175         '75.0.3769.2',
  176         '75.0.3768.6',
  177         '74.0.3729.98',
  178         '75.0.3769.1',
  179         '75.0.3769.0',
  180         '74.0.3729.97',
  181         '73.0.3683.119',
  182         '73.0.3683.118',
  183         '74.0.3729.96',
  184         '75.0.3768.5',
  185         '75.0.3768.4',
  186         '75.0.3768.3',
  187         '75.0.3768.2',
  188         '74.0.3729.95',
  189         '74.0.3729.94',
  190         '75.0.3768.1',
  191         '75.0.3768.0',
  192         '74.0.3729.93',
  193         '74.0.3729.92',
  194         '73.0.3683.117',
  195         '74.0.3729.91',
  196         '75.0.3766.3',
  197         '74.0.3729.90',
  198         '75.0.3767.2',
  199         '75.0.3767.1',
  200         '75.0.3767.0',
  201         '74.0.3729.89',
  202         '73.0.3683.116',
  203         '75.0.3766.2',
  204         '74.0.3729.88',
  205         '75.0.3766.1',
  206         '75.0.3766.0',
  207         '74.0.3729.87',
  208         '73.0.3683.115',
  209         '74.0.3729.86',
  210         '75.0.3765.1',
  211         '75.0.3765.0',
  212         '74.0.3729.85',
  213         '73.0.3683.114',
  214         '74.0.3729.84',
  215         '75.0.3764.1',
  216         '75.0.3764.0',
  217         '74.0.3729.83',
  218         '73.0.3683.113',
  219         '75.0.3763.2',
  220         '75.0.3761.4',
  221         '74.0.3729.82',
  222         '75.0.3763.1',
  223         '75.0.3763.0',
  224         '74.0.3729.81',
  225         '73.0.3683.112',
  226         '75.0.3762.1',
  227         '75.0.3762.0',
  228         '74.0.3729.80',
  229         '75.0.3761.3',
  230         '74.0.3729.79',
  231         '73.0.3683.111',
  232         '75.0.3761.2',
  233         '74.0.3729.78',
  234         '74.0.3729.77',
  235         '75.0.3761.1',
  236         '75.0.3761.0',
  237         '73.0.3683.110',
  238         '74.0.3729.76',
  239         '74.0.3729.75',
  240         '75.0.3760.0',
  241         '74.0.3729.74',
  242         '75.0.3759.8',
  243         '75.0.3759.7',
  244         '75.0.3759.6',
  245         '74.0.3729.73',
  246         '75.0.3759.5',
  247         '74.0.3729.72',
  248         '73.0.3683.109',
  249         '75.0.3759.4',
  250         '75.0.3759.3',
  251         '74.0.3729.71',
  252         '75.0.3759.2',
  253         '74.0.3729.70',
  254         '73.0.3683.108',
  255         '74.0.3729.69',
  256         '75.0.3759.1',
  257         '75.0.3759.0',
  258         '74.0.3729.68',
  259         '73.0.3683.107',
  260         '74.0.3729.67',
  261         '75.0.3758.1',
  262         '75.0.3758.0',
  263         '74.0.3729.66',
  264         '73.0.3683.106',
  265         '74.0.3729.65',
  266         '75.0.3757.1',
  267         '75.0.3757.0',
  268         '74.0.3729.64',
  269         '73.0.3683.105',
  270         '74.0.3729.63',
  271         '75.0.3756.1',
  272         '75.0.3756.0',
  273         '74.0.3729.62',
  274         '73.0.3683.104',
  275         '75.0.3755.3',
  276         '75.0.3755.2',
  277         '73.0.3683.103',
  278         '75.0.3755.1',
  279         '75.0.3755.0',
  280         '74.0.3729.61',
  281         '73.0.3683.102',
  282         '74.0.3729.60',
  283         '75.0.3754.2',
  284         '74.0.3729.59',
  285         '75.0.3753.4',
  286         '74.0.3729.58',
  287         '75.0.3754.1',
  288         '75.0.3754.0',
  289         '74.0.3729.57',
  290         '73.0.3683.101',
  291         '75.0.3753.3',
  292         '75.0.3752.2',
  293         '75.0.3753.2',
  294         '74.0.3729.56',
  295         '75.0.3753.1',
  296         '75.0.3753.0',
  297         '74.0.3729.55',
  298         '73.0.3683.100',
  299         '74.0.3729.54',
  300         '75.0.3752.1',
  301         '75.0.3752.0',
  302         '74.0.3729.53',
  303         '73.0.3683.99',
  304         '74.0.3729.52',
  305         '75.0.3751.1',
  306         '75.0.3751.0',
  307         '74.0.3729.51',
  308         '73.0.3683.98',
  309         '74.0.3729.50',
  310         '75.0.3750.0',
  311         '74.0.3729.49',
  312         '74.0.3729.48',
  313         '74.0.3729.47',
  314         '75.0.3749.3',
  315         '74.0.3729.46',
  316         '73.0.3683.97',
  317         '75.0.3749.2',
  318         '74.0.3729.45',
  319         '75.0.3749.1',
  320         '75.0.3749.0',
  321         '74.0.3729.44',
  322         '73.0.3683.96',
  323         '74.0.3729.43',
  324         '74.0.3729.42',
  325         '75.0.3748.1',
  326         '75.0.3748.0',
  327         '74.0.3729.41',
  328         '75.0.3747.1',
  329         '73.0.3683.95',
  330         '75.0.3746.4',
  331         '74.0.3729.40',
  332         '74.0.3729.39',
  333         '75.0.3747.0',
  334         '75.0.3746.3',
  335         '75.0.3746.2',
  336         '74.0.3729.38',
  337         '75.0.3746.1',
  338         '75.0.3746.0',
  339         '74.0.3729.37',
  340         '73.0.3683.94',
  341         '75.0.3745.5',
  342         '75.0.3745.4',
  343         '75.0.3745.3',
  344         '75.0.3745.2',
  345         '74.0.3729.36',
  346         '75.0.3745.1',
  347         '75.0.3745.0',
  348         '75.0.3744.2',
  349         '74.0.3729.35',
  350         '73.0.3683.93',
  351         '74.0.3729.34',
  352         '75.0.3744.1',
  353         '75.0.3744.0',
  354         '74.0.3729.33',
  355         '73.0.3683.92',
  356         '74.0.3729.32',
  357         '74.0.3729.31',
  358         '73.0.3683.91',
  359         '75.0.3741.2',
  360         '75.0.3740.5',
  361         '74.0.3729.30',
  362         '75.0.3741.1',
  363         '75.0.3741.0',
  364         '74.0.3729.29',
  365         '75.0.3740.4',
  366         '73.0.3683.90',
  367         '74.0.3729.28',
  368         '75.0.3740.3',
  369         '73.0.3683.89',
  370         '75.0.3740.2',
  371         '74.0.3729.27',
  372         '75.0.3740.1',
  373         '75.0.3740.0',
  374         '74.0.3729.26',
  375         '73.0.3683.88',
  376         '73.0.3683.87',
  377         '74.0.3729.25',
  378         '75.0.3739.1',
  379         '75.0.3739.0',
  380         '73.0.3683.86',
  381         '74.0.3729.24',
  382         '73.0.3683.85',
  383         '75.0.3738.4',
  384         '75.0.3738.3',
  385         '75.0.3738.2',
  386         '75.0.3738.1',
  387         '75.0.3738.0',
  388         '74.0.3729.23',
  389         '73.0.3683.84',
  390         '74.0.3729.22',
  391         '74.0.3729.21',
  392         '75.0.3737.1',
  393         '75.0.3737.0',
  394         '74.0.3729.20',
  395         '73.0.3683.83',
  396         '74.0.3729.19',
  397         '75.0.3736.1',
  398         '75.0.3736.0',
  399         '74.0.3729.18',
  400         '73.0.3683.82',
  401         '74.0.3729.17',
  402         '75.0.3735.1',
  403         '75.0.3735.0',
  404         '74.0.3729.16',
  405         '73.0.3683.81',
  406         '75.0.3734.1',
  407         '75.0.3734.0',
  408         '74.0.3729.15',
  409         '73.0.3683.80',
  410         '74.0.3729.14',
  411         '75.0.3733.1',
  412         '75.0.3733.0',
  413         '75.0.3732.1',
  414         '74.0.3729.13',
  415         '74.0.3729.12',
  416         '73.0.3683.79',
  417         '74.0.3729.11',
  418         '75.0.3732.0',
  419         '74.0.3729.10',
  420         '73.0.3683.78',
  421         '74.0.3729.9',
  422         '74.0.3729.8',
  423         '74.0.3729.7',
  424         '75.0.3731.3',
  425         '75.0.3731.2',
  426         '75.0.3731.0',
  427         '74.0.3729.6',
  428         '73.0.3683.77',
  429         '73.0.3683.76',
  430         '75.0.3730.5',
  431         '75.0.3730.4',
  432         '73.0.3683.75',
  433         '74.0.3729.5',
  434         '73.0.3683.74',
  435         '75.0.3730.3',
  436         '75.0.3730.2',
  437         '74.0.3729.4',
  438         '73.0.3683.73',
  439         '73.0.3683.72',
  440         '75.0.3730.1',
  441         '75.0.3730.0',
  442         '74.0.3729.3',
  443         '73.0.3683.71',
  444         '74.0.3729.2',
  445         '73.0.3683.70',
  446         '74.0.3729.1',
  447         '74.0.3729.0',
  448         '74.0.3726.4',
  449         '73.0.3683.69',
  450         '74.0.3726.3',
  451         '74.0.3728.0',
  452         '74.0.3726.2',
  453         '73.0.3683.68',
  454         '74.0.3726.1',
  455         '74.0.3726.0',
  456         '74.0.3725.4',
  457         '73.0.3683.67',
  458         '73.0.3683.66',
  459         '74.0.3725.3',
  460         '74.0.3725.2',
  461         '74.0.3725.1',
  462         '74.0.3724.8',
  463         '74.0.3725.0',
  464         '73.0.3683.65',
  465         '74.0.3724.7',
  466         '74.0.3724.6',
  467         '74.0.3724.5',
  468         '74.0.3724.4',
  469         '74.0.3724.3',
  470         '74.0.3724.2',
  471         '74.0.3724.1',
  472         '74.0.3724.0',
  473         '73.0.3683.64',
  474         '74.0.3723.1',
  475         '74.0.3723.0',
  476         '73.0.3683.63',
  477         '74.0.3722.1',
  478         '74.0.3722.0',
  479         '73.0.3683.62',
  480         '74.0.3718.9',
  481         '74.0.3702.3',
  482         '74.0.3721.3',
  483         '74.0.3721.2',
  484         '74.0.3721.1',
  485         '74.0.3721.0',
  486         '74.0.3720.6',
  487         '73.0.3683.61',
  488         '72.0.3626.122',
  489         '73.0.3683.60',
  490         '74.0.3720.5',
  491         '72.0.3626.121',
  492         '74.0.3718.8',
  493         '74.0.3720.4',
  494         '74.0.3720.3',
  495         '74.0.3718.7',
  496         '74.0.3720.2',
  497         '74.0.3720.1',
  498         '74.0.3720.0',
  499         '74.0.3718.6',
  500         '74.0.3719.5',
  501         '73.0.3683.59',
  502         '74.0.3718.5',
  503         '74.0.3718.4',
  504         '74.0.3719.4',
  505         '74.0.3719.3',
  506         '74.0.3719.2',
  507         '74.0.3719.1',
  508         '73.0.3683.58',
  509         '74.0.3719.0',
  510         '73.0.3683.57',
  511         '73.0.3683.56',
  512         '74.0.3718.3',
  513         '73.0.3683.55',
  514         '74.0.3718.2',
  515         '74.0.3718.1',
  516         '74.0.3718.0',
  517         '73.0.3683.54',
  518         '74.0.3717.2',
  519         '73.0.3683.53',
  520         '74.0.3717.1',
  521         '74.0.3717.0',
  522         '73.0.3683.52',
  523         '74.0.3716.1',
  524         '74.0.3716.0',
  525         '73.0.3683.51',
  526         '74.0.3715.1',
  527         '74.0.3715.0',
  528         '73.0.3683.50',
  529         '74.0.3711.2',
  530         '74.0.3714.2',
  531         '74.0.3713.3',
  532         '74.0.3714.1',
  533         '74.0.3714.0',
  534         '73.0.3683.49',
  535         '74.0.3713.1',
  536         '74.0.3713.0',
  537         '72.0.3626.120',
  538         '73.0.3683.48',
  539         '74.0.3712.2',
  540         '74.0.3712.1',
  541         '74.0.3712.0',
  542         '73.0.3683.47',
  543         '72.0.3626.119',
  544         '73.0.3683.46',
  545         '74.0.3710.2',
  546         '72.0.3626.118',
  547         '74.0.3711.1',
  548         '74.0.3711.0',
  549         '73.0.3683.45',
  550         '72.0.3626.117',
  551         '74.0.3710.1',
  552         '74.0.3710.0',
  553         '73.0.3683.44',
  554         '72.0.3626.116',
  555         '74.0.3709.1',
  556         '74.0.3709.0',
  557         '74.0.3704.9',
  558         '73.0.3683.43',
  559         '72.0.3626.115',
  560         '74.0.3704.8',
  561         '74.0.3704.7',
  562         '74.0.3708.0',
  563         '74.0.3706.7',
  564         '74.0.3704.6',
  565         '73.0.3683.42',
  566         '72.0.3626.114',
  567         '74.0.3706.6',
  568         '72.0.3626.113',
  569         '74.0.3704.5',
  570         '74.0.3706.5',
  571         '74.0.3706.4',
  572         '74.0.3706.3',
  573         '74.0.3706.2',
  574         '74.0.3706.1',
  575         '74.0.3706.0',
  576         '73.0.3683.41',
  577         '72.0.3626.112',
  578         '74.0.3705.1',
  579         '74.0.3705.0',
  580         '73.0.3683.40',
  581         '72.0.3626.111',
  582         '73.0.3683.39',
  583         '74.0.3704.4',
  584         '73.0.3683.38',
  585         '74.0.3704.3',
  586         '74.0.3704.2',
  587         '74.0.3704.1',
  588         '74.0.3704.0',
  589         '73.0.3683.37',
  590         '72.0.3626.110',
  591         '72.0.3626.109',
  592         '74.0.3703.3',
  593         '74.0.3703.2',
  594         '73.0.3683.36',
  595         '74.0.3703.1',
  596         '74.0.3703.0',
  597         '73.0.3683.35',
  598         '72.0.3626.108',
  599         '74.0.3702.2',
  600         '74.0.3699.3',
  601         '74.0.3702.1',
  602         '74.0.3702.0',
  603         '73.0.3683.34',
  604         '72.0.3626.107',
  605         '73.0.3683.33',
  606         '74.0.3701.1',
  607         '74.0.3701.0',
  608         '73.0.3683.32',
  609         '73.0.3683.31',
  610         '72.0.3626.105',
  611         '74.0.3700.1',
  612         '74.0.3700.0',
  613         '73.0.3683.29',
  614         '72.0.3626.103',
  615         '74.0.3699.2',
  616         '74.0.3699.1',
  617         '74.0.3699.0',
  618         '73.0.3683.28',
  619         '72.0.3626.102',
  620         '73.0.3683.27',
  621         '73.0.3683.26',
  622         '74.0.3698.0',
  623         '74.0.3696.2',
  624         '72.0.3626.101',
  625         '73.0.3683.25',
  626         '74.0.3696.1',
  627         '74.0.3696.0',
  628         '74.0.3694.8',
  629         '72.0.3626.100',
  630         '74.0.3694.7',
  631         '74.0.3694.6',
  632         '74.0.3694.5',
  633         '74.0.3694.4',
  634         '72.0.3626.99',
  635         '72.0.3626.98',
  636         '74.0.3694.3',
  637         '73.0.3683.24',
  638         '72.0.3626.97',
  639         '72.0.3626.96',
  640         '72.0.3626.95',
  641         '73.0.3683.23',
  642         '72.0.3626.94',
  643         '73.0.3683.22',
  644         '73.0.3683.21',
  645         '72.0.3626.93',
  646         '74.0.3694.2',
  647         '72.0.3626.92',
  648         '74.0.3694.1',
  649         '74.0.3694.0',
  650         '74.0.3693.6',
  651         '73.0.3683.20',
  652         '72.0.3626.91',
  653         '74.0.3693.5',
  654         '74.0.3693.4',
  655         '74.0.3693.3',
  656         '74.0.3693.2',
  657         '73.0.3683.19',
  658         '74.0.3693.1',
  659         '74.0.3693.0',
  660         '73.0.3683.18',
  661         '72.0.3626.90',
  662         '74.0.3692.1',
  663         '74.0.3692.0',
  664         '73.0.3683.17',
  665         '72.0.3626.89',
  666         '74.0.3687.3',
  667         '74.0.3691.1',
  668         '74.0.3691.0',
  669         '73.0.3683.16',
  670         '72.0.3626.88',
  671         '72.0.3626.87',
  672         '73.0.3683.15',
  673         '74.0.3690.1',
  674         '74.0.3690.0',
  675         '73.0.3683.14',
  676         '72.0.3626.86',
  677         '73.0.3683.13',
  678         '73.0.3683.12',
  679         '74.0.3689.1',
  680         '74.0.3689.0',
  681         '73.0.3683.11',
  682         '72.0.3626.85',
  683         '73.0.3683.10',
  684         '72.0.3626.84',
  685         '73.0.3683.9',
  686         '74.0.3688.1',
  687         '74.0.3688.0',
  688         '73.0.3683.8',
  689         '72.0.3626.83',
  690         '74.0.3687.2',
  691         '74.0.3687.1',
  692         '74.0.3687.0',
  693         '73.0.3683.7',
  694         '72.0.3626.82',
  695         '74.0.3686.4',
  696         '72.0.3626.81',
  697         '74.0.3686.3',
  698         '74.0.3686.2',
  699         '74.0.3686.1',
  700         '74.0.3686.0',
  701         '73.0.3683.6',
  702         '72.0.3626.80',
  703         '74.0.3685.1',
  704         '74.0.3685.0',
  705         '73.0.3683.5',
  706         '72.0.3626.79',
  707         '74.0.3684.1',
  708         '74.0.3684.0',
  709         '73.0.3683.4',
  710         '72.0.3626.78',
  711         '72.0.3626.77',
  712         '73.0.3683.3',
  713         '73.0.3683.2',
  714         '72.0.3626.76',
  715         '73.0.3683.1',
  716         '73.0.3683.0',
  717         '72.0.3626.75',
  718         '71.0.3578.141',
  719         '73.0.3682.1',
  720         '73.0.3682.0',
  721         '72.0.3626.74',
  722         '71.0.3578.140',
  723         '73.0.3681.4',
  724         '73.0.3681.3',
  725         '73.0.3681.2',
  726         '73.0.3681.1',
  727         '73.0.3681.0',
  728         '72.0.3626.73',
  729         '71.0.3578.139',
  730         '72.0.3626.72',
  731         '72.0.3626.71',
  732         '73.0.3680.1',
  733         '73.0.3680.0',
  734         '72.0.3626.70',
  735         '71.0.3578.138',
  736         '73.0.3678.2',
  737         '73.0.3679.1',
  738         '73.0.3679.0',
  739         '72.0.3626.69',
  740         '71.0.3578.137',
  741         '73.0.3678.1',
  742         '73.0.3678.0',
  743         '71.0.3578.136',
  744         '73.0.3677.1',
  745         '73.0.3677.0',
  746         '72.0.3626.68',
  747         '72.0.3626.67',
  748         '71.0.3578.135',
  749         '73.0.3676.1',
  750         '73.0.3676.0',
  751         '73.0.3674.2',
  752         '72.0.3626.66',
  753         '71.0.3578.134',
  754         '73.0.3674.1',
  755         '73.0.3674.0',
  756         '72.0.3626.65',
  757         '71.0.3578.133',
  758         '73.0.3673.2',
  759         '73.0.3673.1',
  760         '73.0.3673.0',
  761         '72.0.3626.64',
  762         '71.0.3578.132',
  763         '72.0.3626.63',
  764         '72.0.3626.62',
  765         '72.0.3626.61',
  766         '72.0.3626.60',
  767         '73.0.3672.1',
  768         '73.0.3672.0',
  769         '72.0.3626.59',
  770         '71.0.3578.131',
  771         '73.0.3671.3',
  772         '73.0.3671.2',
  773         '73.0.3671.1',
  774         '73.0.3671.0',
  775         '72.0.3626.58',
  776         '71.0.3578.130',
  777         '73.0.3670.1',
  778         '73.0.3670.0',
  779         '72.0.3626.57',
  780         '71.0.3578.129',
  781         '73.0.3669.1',
  782         '73.0.3669.0',
  783         '72.0.3626.56',
  784         '71.0.3578.128',
  785         '73.0.3668.2',
  786         '73.0.3668.1',
  787         '73.0.3668.0',
  788         '72.0.3626.55',
  789         '71.0.3578.127',
  790         '73.0.3667.2',
  791         '73.0.3667.1',
  792         '73.0.3667.0',
  793         '72.0.3626.54',
  794         '71.0.3578.126',
  795         '73.0.3666.1',
  796         '73.0.3666.0',
  797         '72.0.3626.53',
  798         '71.0.3578.125',
  799         '73.0.3665.4',
  800         '73.0.3665.3',
  801         '72.0.3626.52',
  802         '73.0.3665.2',
  803         '73.0.3664.4',
  804         '73.0.3665.1',
  805         '73.0.3665.0',
  806         '72.0.3626.51',
  807         '71.0.3578.124',
  808         '72.0.3626.50',
  809         '73.0.3664.3',
  810         '73.0.3664.2',
  811         '73.0.3664.1',
  812         '73.0.3664.0',
  813         '73.0.3663.2',
  814         '72.0.3626.49',
  815         '71.0.3578.123',
  816         '73.0.3663.1',
  817         '73.0.3663.0',
  818         '72.0.3626.48',
  819         '71.0.3578.122',
  820         '73.0.3662.1',
  821         '73.0.3662.0',
  822         '72.0.3626.47',
  823         '71.0.3578.121',
  824         '73.0.3661.1',
  825         '72.0.3626.46',
  826         '73.0.3661.0',
  827         '72.0.3626.45',
  828         '71.0.3578.120',
  829         '73.0.3660.2',
  830         '73.0.3660.1',
  831         '73.0.3660.0',
  832         '72.0.3626.44',
  833         '71.0.3578.119',
  834         '73.0.3659.1',
  835         '73.0.3659.0',
  836         '72.0.3626.43',
  837         '71.0.3578.118',
  838         '73.0.3658.1',
  839         '73.0.3658.0',
  840         '72.0.3626.42',
  841         '71.0.3578.117',
  842         '73.0.3657.1',
  843         '73.0.3657.0',
  844         '72.0.3626.41',
  845         '71.0.3578.116',
  846         '73.0.3656.1',
  847         '73.0.3656.0',
  848         '72.0.3626.40',
  849         '71.0.3578.115',
  850         '73.0.3655.1',
  851         '73.0.3655.0',
  852         '72.0.3626.39',
  853         '71.0.3578.114',
  854         '73.0.3654.1',
  855         '73.0.3654.0',
  856         '72.0.3626.38',
  857         '71.0.3578.113',
  858         '73.0.3653.1',
  859         '73.0.3653.0',
  860         '72.0.3626.37',
  861         '71.0.3578.112',
  862         '73.0.3652.1',
  863         '73.0.3652.0',
  864         '72.0.3626.36',
  865         '71.0.3578.111',
  866         '73.0.3651.1',
  867         '73.0.3651.0',
  868         '72.0.3626.35',
  869         '71.0.3578.110',
  870         '73.0.3650.1',
  871         '73.0.3650.0',
  872         '72.0.3626.34',
  873         '71.0.3578.109',
  874         '73.0.3649.1',
  875         '73.0.3649.0',
  876         '72.0.3626.33',
  877         '71.0.3578.108',
  878         '73.0.3648.2',
  879         '73.0.3648.1',
  880         '73.0.3648.0',
  881         '72.0.3626.32',
  882         '71.0.3578.107',
  883         '73.0.3647.2',
  884         '73.0.3647.1',
  885         '73.0.3647.0',
  886         '72.0.3626.31',
  887         '71.0.3578.106',
  888         '73.0.3635.3',
  889         '73.0.3646.2',
  890         '73.0.3646.1',
  891         '73.0.3646.0',
  892         '72.0.3626.30',
  893         '71.0.3578.105',
  894         '72.0.3626.29',
  895         '73.0.3645.2',
  896         '73.0.3645.1',
  897         '73.0.3645.0',
  898         '72.0.3626.28',
  899         '71.0.3578.104',
  900         '72.0.3626.27',
  901         '72.0.3626.26',
  902         '72.0.3626.25',
  903         '72.0.3626.24',
  904         '73.0.3644.0',
  905         '73.0.3643.2',
  906         '72.0.3626.23',
  907         '71.0.3578.103',
  908         '73.0.3643.1',
  909         '73.0.3643.0',
  910         '72.0.3626.22',
  911         '71.0.3578.102',
  912         '73.0.3642.1',
  913         '73.0.3642.0',
  914         '72.0.3626.21',
  915         '71.0.3578.101',
  916         '73.0.3641.1',
  917         '73.0.3641.0',
  918         '72.0.3626.20',
  919         '71.0.3578.100',
  920         '72.0.3626.19',
  921         '73.0.3640.1',
  922         '73.0.3640.0',
  923         '72.0.3626.18',
  924         '73.0.3639.1',
  925         '71.0.3578.99',
  926         '73.0.3639.0',
  927         '72.0.3626.17',
  928         '73.0.3638.2',
  929         '72.0.3626.16',
  930         '73.0.3638.1',
  931         '73.0.3638.0',
  932         '72.0.3626.15',
  933         '71.0.3578.98',
  934         '73.0.3635.2',
  935         '71.0.3578.97',
  936         '73.0.3637.1',
  937         '73.0.3637.0',
  938         '72.0.3626.14',
  939         '71.0.3578.96',
  940         '71.0.3578.95',
  941         '72.0.3626.13',
  942         '71.0.3578.94',
  943         '73.0.3636.2',
  944         '71.0.3578.93',
  945         '73.0.3636.1',
  946         '73.0.3636.0',
  947         '72.0.3626.12',
  948         '71.0.3578.92',
  949         '73.0.3635.1',
  950         '73.0.3635.0',
  951         '72.0.3626.11',
  952         '71.0.3578.91',
  953         '73.0.3634.2',
  954         '73.0.3634.1',
  955         '73.0.3634.0',
  956         '72.0.3626.10',
  957         '71.0.3578.90',
  958         '71.0.3578.89',
  959         '73.0.3633.2',
  960         '73.0.3633.1',
  961         '73.0.3633.0',
  962         '72.0.3610.4',
  963         '72.0.3626.9',
  964         '71.0.3578.88',
  965         '73.0.3632.5',
  966         '73.0.3632.4',
  967         '73.0.3632.3',
  968         '73.0.3632.2',
  969         '73.0.3632.1',
  970         '73.0.3632.0',
  971         '72.0.3626.8',
  972         '71.0.3578.87',
  973         '73.0.3631.2',
  974         '73.0.3631.1',
  975         '73.0.3631.0',
  976         '72.0.3626.7',
  977         '71.0.3578.86',
  978         '72.0.3626.6',
  979         '73.0.3630.1',
  980         '73.0.3630.0',
  981         '72.0.3626.5',
  982         '71.0.3578.85',
  983         '72.0.3626.4',
  984         '73.0.3628.3',
  985         '73.0.3628.2',
  986         '73.0.3629.1',
  987         '73.0.3629.0',
  988         '72.0.3626.3',
  989         '71.0.3578.84',
  990         '73.0.3628.1',
  991         '73.0.3628.0',
  992         '71.0.3578.83',
  993         '73.0.3627.1',
  994         '73.0.3627.0',
  995         '72.0.3626.2',
  996         '71.0.3578.82',
  997         '71.0.3578.81',
  998         '71.0.3578.80',
  999         '72.0.3626.1',
 1000         '72.0.3626.0',
 1001         '71.0.3578.79',
 1002         '70.0.3538.124',
 1003         '71.0.3578.78',
 1004         '72.0.3623.4',
 1005         '72.0.3625.2',
 1006         '72.0.3625.1',
 1007         '72.0.3625.0',
 1008         '71.0.3578.77',
 1009         '70.0.3538.123',
 1010         '72.0.3624.4',
 1011         '72.0.3624.3',
 1012         '72.0.3624.2',
 1013         '71.0.3578.76',
 1014         '72.0.3624.1',
 1015         '72.0.3624.0',
 1016         '72.0.3623.3',
 1017         '71.0.3578.75',
 1018         '70.0.3538.122',
 1019         '71.0.3578.74',
 1020         '72.0.3623.2',
 1021         '72.0.3610.3',
 1022         '72.0.3623.1',
 1023         '72.0.3623.0',
 1024         '72.0.3622.3',
 1025         '72.0.3622.2',
 1026         '71.0.3578.73',
 1027         '70.0.3538.121',
 1028         '72.0.3622.1',
 1029         '72.0.3622.0',
 1030         '71.0.3578.72',
 1031         '70.0.3538.120',
 1032         '72.0.3621.1',
 1033         '72.0.3621.0',
 1034         '71.0.3578.71',
 1035         '70.0.3538.119',
 1036         '72.0.3620.1',
 1037         '72.0.3620.0',
 1038         '71.0.3578.70',
 1039         '70.0.3538.118',
 1040         '71.0.3578.69',
 1041         '72.0.3619.1',
 1042         '72.0.3619.0',
 1043         '71.0.3578.68',
 1044         '70.0.3538.117',
 1045         '71.0.3578.67',
 1046         '72.0.3618.1',
 1047         '72.0.3618.0',
 1048         '71.0.3578.66',
 1049         '70.0.3538.116',
 1050         '72.0.3617.1',
 1051         '72.0.3617.0',
 1052         '71.0.3578.65',
 1053         '70.0.3538.115',
 1054         '72.0.3602.3',
 1055         '71.0.3578.64',
 1056         '72.0.3616.1',
 1057         '72.0.3616.0',
 1058         '71.0.3578.63',
 1059         '70.0.3538.114',
 1060         '71.0.3578.62',
 1061         '72.0.3615.1',
 1062         '72.0.3615.0',
 1063         '71.0.3578.61',
 1064         '70.0.3538.113',
 1065         '72.0.3614.1',
 1066         '72.0.3614.0',
 1067         '71.0.3578.60',
 1068         '70.0.3538.112',
 1069         '72.0.3613.1',
 1070         '72.0.3613.0',
 1071         '71.0.3578.59',
 1072         '70.0.3538.111',
 1073         '72.0.3612.2',
 1074         '72.0.3612.1',
 1075         '72.0.3612.0',
 1076         '70.0.3538.110',
 1077         '71.0.3578.58',
 1078         '70.0.3538.109',
 1079         '72.0.3611.2',
 1080         '72.0.3611.1',
 1081         '72.0.3611.0',
 1082         '71.0.3578.57',
 1083         '70.0.3538.108',
 1084         '72.0.3610.2',
 1085         '71.0.3578.56',
 1086         '71.0.3578.55',
 1087         '72.0.3610.1',
 1088         '72.0.3610.0',
 1089         '71.0.3578.54',
 1090         '70.0.3538.107',
 1091         '71.0.3578.53',
 1092         '72.0.3609.3',
 1093         '71.0.3578.52',
 1094         '72.0.3609.2',
 1095         '71.0.3578.51',
 1096         '72.0.3608.5',
 1097         '72.0.3609.1',
 1098         '72.0.3609.0',
 1099         '71.0.3578.50',
 1100         '70.0.3538.106',
 1101         '72.0.3608.4',
 1102         '72.0.3608.3',
 1103         '72.0.3608.2',
 1104         '71.0.3578.49',
 1105         '72.0.3608.1',
 1106         '72.0.3608.0',
 1107         '70.0.3538.105',
 1108         '71.0.3578.48',
 1109         '72.0.3607.1',
 1110         '72.0.3607.0',
 1111         '71.0.3578.47',
 1112         '70.0.3538.104',
 1113         '72.0.3606.2',
 1114         '72.0.3606.1',
 1115         '72.0.3606.0',
 1116         '71.0.3578.46',
 1117         '70.0.3538.103',
 1118         '70.0.3538.102',
 1119         '72.0.3605.3',
 1120         '72.0.3605.2',
 1121         '72.0.3605.1',
 1122         '72.0.3605.0',
 1123         '71.0.3578.45',
 1124         '70.0.3538.101',
 1125         '71.0.3578.44',
 1126         '71.0.3578.43',
 1127         '70.0.3538.100',
 1128         '70.0.3538.99',
 1129         '71.0.3578.42',
 1130         '72.0.3604.1',
 1131         '72.0.3604.0',
 1132         '71.0.3578.41',
 1133         '70.0.3538.98',
 1134         '71.0.3578.40',
 1135         '72.0.3603.2',
 1136         '72.0.3603.1',
 1137         '72.0.3603.0',
 1138         '71.0.3578.39',
 1139         '70.0.3538.97',
 1140         '72.0.3602.2',
 1141         '71.0.3578.38',
 1142         '71.0.3578.37',
 1143         '72.0.3602.1',
 1144         '72.0.3602.0',
 1145         '71.0.3578.36',
 1146         '70.0.3538.96',
 1147         '72.0.3601.1',
 1148         '72.0.3601.0',
 1149         '71.0.3578.35',
 1150         '70.0.3538.95',
 1151         '72.0.3600.1',
 1152         '72.0.3600.0',
 1153         '71.0.3578.34',
 1154         '70.0.3538.94',
 1155         '72.0.3599.3',
 1156         '72.0.3599.2',
 1157         '72.0.3599.1',
 1158         '72.0.3599.0',
 1159         '71.0.3578.33',
 1160         '70.0.3538.93',
 1161         '72.0.3598.1',
 1162         '72.0.3598.0',
 1163         '71.0.3578.32',
 1164         '70.0.3538.87',
 1165         '72.0.3597.1',
 1166         '72.0.3597.0',
 1167         '72.0.3596.2',
 1168         '71.0.3578.31',
 1169         '70.0.3538.86',
 1170         '71.0.3578.30',
 1171         '71.0.3578.29',
 1172         '72.0.3596.1',
 1173         '72.0.3596.0',
 1174         '71.0.3578.28',
 1175         '70.0.3538.85',
 1176         '72.0.3595.2',
 1177         '72.0.3591.3',
 1178         '72.0.3595.1',
 1179         '72.0.3595.0',
 1180         '71.0.3578.27',
 1181         '70.0.3538.84',
 1182         '72.0.3594.1',
 1183         '72.0.3594.0',
 1184         '71.0.3578.26',
 1185         '70.0.3538.83',
 1186         '72.0.3593.2',
 1187         '72.0.3593.1',
 1188         '72.0.3593.0',
 1189         '71.0.3578.25',
 1190         '70.0.3538.82',
 1191         '72.0.3589.3',
 1192         '72.0.3592.2',
 1193         '72.0.3592.1',
 1194         '72.0.3592.0',
 1195         '71.0.3578.24',
 1196         '72.0.3589.2',
 1197         '70.0.3538.81',
 1198         '70.0.3538.80',
 1199         '72.0.3591.2',
 1200         '72.0.3591.1',
 1201         '72.0.3591.0',
 1202         '71.0.3578.23',
 1203         '70.0.3538.79',
 1204         '71.0.3578.22',
 1205         '72.0.3590.1',
 1206         '72.0.3590.0',
 1207         '71.0.3578.21',
 1208         '70.0.3538.78',
 1209         '70.0.3538.77',
 1210         '72.0.3589.1',
 1211         '72.0.3589.0',
 1212         '71.0.3578.20',
 1213         '70.0.3538.76',
 1214         '71.0.3578.19',
 1215         '70.0.3538.75',
 1216         '72.0.3588.1',
 1217         '72.0.3588.0',
 1218         '71.0.3578.18',
 1219         '70.0.3538.74',
 1220         '72.0.3586.2',
 1221         '72.0.3587.0',
 1222         '71.0.3578.17',
 1223         '70.0.3538.73',
 1224         '72.0.3586.1',
 1225         '72.0.3586.0',
 1226         '71.0.3578.16',
 1227         '70.0.3538.72',
 1228         '72.0.3585.1',
 1229         '72.0.3585.0',
 1230         '71.0.3578.15',
 1231         '70.0.3538.71',
 1232         '71.0.3578.14',
 1233         '72.0.3584.1',
 1234         '72.0.3584.0',
 1235         '71.0.3578.13',
 1236         '70.0.3538.70',
 1237         '72.0.3583.2',
 1238         '71.0.3578.12',
 1239         '72.0.3583.1',
 1240         '72.0.3583.0',
 1241         '71.0.3578.11',
 1242         '70.0.3538.69',
 1243         '71.0.3578.10',
 1244         '72.0.3582.0',
 1245         '72.0.3581.4',
 1246         '71.0.3578.9',
 1247         '70.0.3538.67',
 1248         '72.0.3581.3',
 1249         '72.0.3581.2',
 1250         '72.0.3581.1',
 1251         '72.0.3581.0',
 1252         '71.0.3578.8',
 1253         '70.0.3538.66',
 1254         '72.0.3580.1',
 1255         '72.0.3580.0',
 1256         '71.0.3578.7',
 1257         '70.0.3538.65',
 1258         '71.0.3578.6',
 1259         '72.0.3579.1',
 1260         '72.0.3579.0',
 1261         '71.0.3578.5',
 1262         '70.0.3538.64',
 1263         '71.0.3578.4',
 1264         '71.0.3578.3',
 1265         '71.0.3578.2',
 1266         '71.0.3578.1',
 1267         '71.0.3578.0',
 1268         '70.0.3538.63',
 1269         '69.0.3497.128',
 1270         '70.0.3538.62',
 1271         '70.0.3538.61',
 1272         '70.0.3538.60',
 1273         '70.0.3538.59',
 1274         '71.0.3577.1',
 1275         '71.0.3577.0',
 1276         '70.0.3538.58',
 1277         '69.0.3497.127',
 1278         '71.0.3576.2',
 1279         '71.0.3576.1',
 1280         '71.0.3576.0',
 1281         '70.0.3538.57',
 1282         '70.0.3538.56',
 1283         '71.0.3575.2',
 1284         '70.0.3538.55',
 1285         '69.0.3497.126',
 1286         '70.0.3538.54',
 1287         '71.0.3575.1',
 1288         '71.0.3575.0',
 1289         '71.0.3574.1',
 1290         '71.0.3574.0',
 1291         '70.0.3538.53',
 1292         '69.0.3497.125',
 1293         '70.0.3538.52',
 1294         '71.0.3573.1',
 1295         '71.0.3573.0',
 1296         '70.0.3538.51',
 1297         '69.0.3497.124',
 1298         '71.0.3572.1',
 1299         '71.0.3572.0',
 1300         '70.0.3538.50',
 1301         '69.0.3497.123',
 1302         '71.0.3571.2',
 1303         '70.0.3538.49',
 1304         '69.0.3497.122',
 1305         '71.0.3571.1',
 1306         '71.0.3571.0',
 1307         '70.0.3538.48',
 1308         '69.0.3497.121',
 1309         '71.0.3570.1',
 1310         '71.0.3570.0',
 1311         '70.0.3538.47',
 1312         '69.0.3497.120',
 1313         '71.0.3568.2',
 1314         '71.0.3569.1',
 1315         '71.0.3569.0',
 1316         '70.0.3538.46',
 1317         '69.0.3497.119',
 1318         '70.0.3538.45',
 1319         '71.0.3568.1',
 1320         '71.0.3568.0',
 1321         '70.0.3538.44',
 1322         '69.0.3497.118',
 1323         '70.0.3538.43',
 1324         '70.0.3538.42',
 1325         '71.0.3567.1',
 1326         '71.0.3567.0',
 1327         '70.0.3538.41',
 1328         '69.0.3497.117',
 1329         '71.0.3566.1',
 1330         '71.0.3566.0',
 1331         '70.0.3538.40',
 1332         '69.0.3497.116',
 1333         '71.0.3565.1',
 1334         '71.0.3565.0',
 1335         '70.0.3538.39',
 1336         '69.0.3497.115',
 1337         '71.0.3564.1',
 1338         '71.0.3564.0',
 1339         '70.0.3538.38',
 1340         '69.0.3497.114',
 1341         '71.0.3563.0',
 1342         '71.0.3562.2',
 1343         '70.0.3538.37',
 1344         '69.0.3497.113',
 1345         '70.0.3538.36',
 1346         '70.0.3538.35',
 1347         '71.0.3562.1',
 1348         '71.0.3562.0',
 1349         '70.0.3538.34',
 1350         '69.0.3497.112',
 1351         '70.0.3538.33',
 1352         '71.0.3561.1',
 1353         '71.0.3561.0',
 1354         '70.0.3538.32',
 1355         '69.0.3497.111',
 1356         '71.0.3559.6',
 1357         '71.0.3560.1',
 1358         '71.0.3560.0',
 1359         '71.0.3559.5',
 1360         '71.0.3559.4',
 1361         '70.0.3538.31',
 1362         '69.0.3497.110',
 1363         '71.0.3559.3',
 1364         '70.0.3538.30',
 1365         '69.0.3497.109',
 1366         '71.0.3559.2',
 1367         '71.0.3559.1',
 1368         '71.0.3559.0',
 1369         '70.0.3538.29',
 1370         '69.0.3497.108',
 1371         '71.0.3558.2',
 1372         '71.0.3558.1',
 1373         '71.0.3558.0',
 1374         '70.0.3538.28',
 1375         '69.0.3497.107',
 1376         '71.0.3557.2',
 1377         '71.0.3557.1',
 1378         '71.0.3557.0',
 1379         '70.0.3538.27',
 1380         '69.0.3497.106',
 1381         '71.0.3554.4',
 1382         '70.0.3538.26',
 1383         '71.0.3556.1',
 1384         '71.0.3556.0',
 1385         '70.0.3538.25',
 1386         '71.0.3554.3',
 1387         '69.0.3497.105',
 1388         '71.0.3554.2',
 1389         '70.0.3538.24',
 1390         '69.0.3497.104',
 1391         '71.0.3555.2',
 1392         '70.0.3538.23',
 1393         '71.0.3555.1',
 1394         '71.0.3555.0',
 1395         '70.0.3538.22',
 1396         '69.0.3497.103',
 1397         '71.0.3554.1',
 1398         '71.0.3554.0',
 1399         '70.0.3538.21',
 1400         '69.0.3497.102',
 1401         '71.0.3553.3',
 1402         '70.0.3538.20',
 1403         '69.0.3497.101',
 1404         '71.0.3553.2',
 1405         '69.0.3497.100',
 1406         '71.0.3553.1',
 1407         '71.0.3553.0',
 1408         '70.0.3538.19',
 1409         '69.0.3497.99',
 1410         '69.0.3497.98',
 1411         '69.0.3497.97',
 1412         '71.0.3552.6',
 1413         '71.0.3552.5',
 1414         '71.0.3552.4',
 1415         '71.0.3552.3',
 1416         '71.0.3552.2',
 1417         '71.0.3552.1',
 1418         '71.0.3552.0',
 1419         '70.0.3538.18',
 1420         '69.0.3497.96',
 1421         '71.0.3551.3',
 1422         '71.0.3551.2',
 1423         '71.0.3551.1',
 1424         '71.0.3551.0',
 1425         '70.0.3538.17',
 1426         '69.0.3497.95',
 1427         '71.0.3550.3',
 1428         '71.0.3550.2',
 1429         '71.0.3550.1',
 1430         '71.0.3550.0',
 1431         '70.0.3538.16',
 1432         '69.0.3497.94',
 1433         '71.0.3549.1',
 1434         '71.0.3549.0',
 1435         '70.0.3538.15',
 1436         '69.0.3497.93',
 1437         '69.0.3497.92',
 1438         '71.0.3548.1',
 1439         '71.0.3548.0',
 1440         '70.0.3538.14',
 1441         '69.0.3497.91',
 1442         '71.0.3547.1',
 1443         '71.0.3547.0',
 1444         '70.0.3538.13',
 1445         '69.0.3497.90',
 1446         '71.0.3546.2',
 1447         '69.0.3497.89',
 1448         '71.0.3546.1',
 1449         '71.0.3546.0',
 1450         '70.0.3538.12',
 1451         '69.0.3497.88',
 1452         '71.0.3545.4',
 1453         '71.0.3545.3',
 1454         '71.0.3545.2',
 1455         '71.0.3545.1',
 1456         '71.0.3545.0',
 1457         '70.0.3538.11',
 1458         '69.0.3497.87',
 1459         '71.0.3544.5',
 1460         '71.0.3544.4',
 1461         '71.0.3544.3',
 1462         '71.0.3544.2',
 1463         '71.0.3544.1',
 1464         '71.0.3544.0',
 1465         '69.0.3497.86',
 1466         '70.0.3538.10',
 1467         '69.0.3497.85',
 1468         '70.0.3538.9',
 1469         '69.0.3497.84',
 1470         '71.0.3543.4',
 1471         '70.0.3538.8',
 1472         '71.0.3543.3',
 1473         '71.0.3543.2',
 1474         '71.0.3543.1',
 1475         '71.0.3543.0',
 1476         '70.0.3538.7',
 1477         '69.0.3497.83',
 1478         '71.0.3542.2',
 1479         '71.0.3542.1',
 1480         '71.0.3542.0',
 1481         '70.0.3538.6',
 1482         '69.0.3497.82',
 1483         '69.0.3497.81',
 1484         '71.0.3541.1',
 1485         '71.0.3541.0',
 1486         '70.0.3538.5',
 1487         '69.0.3497.80',
 1488         '71.0.3540.1',
 1489         '71.0.3540.0',
 1490         '70.0.3538.4',
 1491         '69.0.3497.79',
 1492         '70.0.3538.3',
 1493         '71.0.3539.1',
 1494         '71.0.3539.0',
 1495         '69.0.3497.78',
 1496         '68.0.3440.134',
 1497         '69.0.3497.77',
 1498         '70.0.3538.2',
 1499         '70.0.3538.1',
 1500         '70.0.3538.0',
 1501         '69.0.3497.76',
 1502         '68.0.3440.133',
 1503         '69.0.3497.75',
 1504         '70.0.3537.2',
 1505         '70.0.3537.1',
 1506         '70.0.3537.0',
 1507         '69.0.3497.74',
 1508         '68.0.3440.132',
 1509         '70.0.3536.0',
 1510         '70.0.3535.5',
 1511         '70.0.3535.4',
 1512         '70.0.3535.3',
 1513         '69.0.3497.73',
 1514         '68.0.3440.131',
 1515         '70.0.3532.8',
 1516         '70.0.3532.7',
 1517         '69.0.3497.72',
 1518         '69.0.3497.71',
 1519         '70.0.3535.2',
 1520         '70.0.3535.1',
 1521         '70.0.3535.0',
 1522         '69.0.3497.70',
 1523         '68.0.3440.130',
 1524         '69.0.3497.69',
 1525         '68.0.3440.129',
 1526         '70.0.3534.4',
 1527         '70.0.3534.3',
 1528         '70.0.3534.2',
 1529         '70.0.3534.1',
 1530         '70.0.3534.0',
 1531         '69.0.3497.68',
 1532         '68.0.3440.128',
 1533         '70.0.3533.2',
 1534         '70.0.3533.1',
 1535         '70.0.3533.0',
 1536         '69.0.3497.67',
 1537         '68.0.3440.127',
 1538         '70.0.3532.6',
 1539         '70.0.3532.5',
 1540         '70.0.3532.4',
 1541         '69.0.3497.66',
 1542         '68.0.3440.126',
 1543         '70.0.3532.3',
 1544         '70.0.3532.2',
 1545         '70.0.3532.1',
 1546         '69.0.3497.60',
 1547         '69.0.3497.65',
 1548         '69.0.3497.64',
 1549         '70.0.3532.0',
 1550         '70.0.3531.0',
 1551         '70.0.3530.4',
 1552         '70.0.3530.3',
 1553         '70.0.3530.2',
 1554         '69.0.3497.58',
 1555         '68.0.3440.125',
 1556         '69.0.3497.57',
 1557         '69.0.3497.56',
 1558         '69.0.3497.55',
 1559         '69.0.3497.54',
 1560         '70.0.3530.1',
 1561         '70.0.3530.0',
 1562         '69.0.3497.53',
 1563         '68.0.3440.124',
 1564         '69.0.3497.52',
 1565         '70.0.3529.3',
 1566         '70.0.3529.2',
 1567         '70.0.3529.1',
 1568         '70.0.3529.0',
 1569         '69.0.3497.51',
 1570         '70.0.3528.4',
 1571         '68.0.3440.123',
 1572         '70.0.3528.3',
 1573         '70.0.3528.2',
 1574         '70.0.3528.1',
 1575         '70.0.3528.0',
 1576         '69.0.3497.50',
 1577         '68.0.3440.122',
 1578         '70.0.3527.1',
 1579         '70.0.3527.0',
 1580         '69.0.3497.49',
 1581         '68.0.3440.121',
 1582         '70.0.3526.1',
 1583         '70.0.3526.0',
 1584         '68.0.3440.120',
 1585         '69.0.3497.48',
 1586         '69.0.3497.47',
 1587         '68.0.3440.119',
 1588         '68.0.3440.118',
 1589         '70.0.3525.5',
 1590         '70.0.3525.4',
 1591         '70.0.3525.3',
 1592         '68.0.3440.117',
 1593         '69.0.3497.46',
 1594         '70.0.3525.2',
 1595         '70.0.3525.1',
 1596         '70.0.3525.0',
 1597         '69.0.3497.45',
 1598         '68.0.3440.116',
 1599         '70.0.3524.4',
 1600         '70.0.3524.3',
 1601         '69.0.3497.44',
 1602         '70.0.3524.2',
 1603         '70.0.3524.1',
 1604         '70.0.3524.0',
 1605         '70.0.3523.2',
 1606         '69.0.3497.43',
 1607         '68.0.3440.115',
 1608         '70.0.3505.9',
 1609         '69.0.3497.42',
 1610         '70.0.3505.8',
 1611         '70.0.3523.1',
 1612         '70.0.3523.0',
 1613         '69.0.3497.41',
 1614         '68.0.3440.114',
 1615         '70.0.3505.7',
 1616         '69.0.3497.40',
 1617         '70.0.3522.1',
 1618         '70.0.3522.0',
 1619         '70.0.3521.2',
 1620         '69.0.3497.39',
 1621         '68.0.3440.113',
 1622         '70.0.3505.6',
 1623         '70.0.3521.1',
 1624         '70.0.3521.0',
 1625         '69.0.3497.38',
 1626         '68.0.3440.112',
 1627         '70.0.3520.1',
 1628         '70.0.3520.0',
 1629         '69.0.3497.37',
 1630         '68.0.3440.111',
 1631         '70.0.3519.3',
 1632         '70.0.3519.2',
 1633         '70.0.3519.1',
 1634         '70.0.3519.0',
 1635         '69.0.3497.36',
 1636         '68.0.3440.110',
 1637         '70.0.3518.1',
 1638         '70.0.3518.0',
 1639         '69.0.3497.35',
 1640         '69.0.3497.34',
 1641         '68.0.3440.109',
 1642         '70.0.3517.1',
 1643         '70.0.3517.0',
 1644         '69.0.3497.33',
 1645         '68.0.3440.108',
 1646         '69.0.3497.32',
 1647         '70.0.3516.3',
 1648         '70.0.3516.2',
 1649         '70.0.3516.1',
 1650         '70.0.3516.0',
 1651         '69.0.3497.31',
 1652         '68.0.3440.107',
 1653         '70.0.3515.4',
 1654         '68.0.3440.106',
 1655         '70.0.3515.3',
 1656         '70.0.3515.2',
 1657         '70.0.3515.1',
 1658         '70.0.3515.0',
 1659         '69.0.3497.30',
 1660         '68.0.3440.105',
 1661         '68.0.3440.104',
 1662         '70.0.3514.2',
 1663         '70.0.3514.1',
 1664         '70.0.3514.0',
 1665         '69.0.3497.29',
 1666         '68.0.3440.103',
 1667         '70.0.3513.1',
 1668         '70.0.3513.0',
 1669         '69.0.3497.28',
 1670     )
 1671     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 1672 
 1673 
 1674 std_headers = {
 1675     'User-Agent': random_user_agent(),
 1676     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 1677     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 1678     'Accept-Encoding': 'gzip, deflate',
 1679     'Accept-Language': 'en-us,en;q=0.5',
 1680 }
 1681 
 1682 
 1683 USER_AGENTS = {
 1684     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 1685 }
 1686 
 1687 
 1688 NO_DEFAULT = object()
 1689 IDENTITY = lambda x: x
 1690 
 1691 ENGLISH_MONTH_NAMES = [
 1692     'January', 'February', 'March', 'April', 'May', 'June',
 1693     'July', 'August', 'September', 'October', 'November', 'December']
 1694 
 1695 MONTH_NAMES = {
 1696     'en': ENGLISH_MONTH_NAMES,
 1697     'fr': [
 1698         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 1699         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 1700 }
 1701 
 1702 # Timezone names for RFC2822 obs-zone
 1703 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 1704 TIMEZONE_NAMES = {
 1705     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 1706     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 1707     'EST': -5, 'EDT': -4,  # Eastern
 1708     'CST': -6, 'CDT': -5,  # Central
 1709     'MST': -7, 'MDT': -6,  # Mountain
 1710     'PST': -8, 'PDT': -7   # Pacific
 1711 }
 1712 
 1713 KNOWN_EXTENSIONS = (
 1714     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 1715     'flv', 'f4v', 'f4a', 'f4b',
 1716     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 1717     'mkv', 'mka', 'mk3d',
 1718     'avi', 'divx',
 1719     'mov',
 1720     'asf', 'wmv', 'wma',
 1721     '3gp', '3g2',
 1722     'mp3',
 1723     'flac',
 1724     'ape',
 1725     'wav',
 1726     'f4f', 'f4m', 'm3u8', 'smil')
 1727 
 1728 # needed for sanitizing filenames in restricted mode
 1729 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 1730                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 1731                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 1732 
 1733 DATE_FORMATS = (
 1734     '%d %B %Y',
 1735     '%d %b %Y',
 1736     '%B %d %Y',
 1737     '%B %dst %Y',
 1738     '%B %dnd %Y',
 1739     '%B %drd %Y',
 1740     '%B %dth %Y',
 1741     '%b %d %Y',
 1742     '%b %dst %Y',
 1743     '%b %dnd %Y',
 1744     '%b %drd %Y',
 1745     '%b %dth %Y',
 1746     '%b %dst %Y %I:%M',
 1747     '%b %dnd %Y %I:%M',
 1748     '%b %drd %Y %I:%M',
 1749     '%b %dth %Y %I:%M',
 1750     '%Y %m %d',
 1751     '%Y-%m-%d',
 1752     '%Y.%m.%d.',
 1753     '%Y/%m/%d',
 1754     '%Y/%m/%d %H:%M',
 1755     '%Y/%m/%d %H:%M:%S',
 1756     '%Y%m%d%H%M',
 1757     '%Y%m%d%H%M%S',
 1758     '%Y%m%d',
 1759     '%Y-%m-%d %H:%M',
 1760     '%Y-%m-%d %H:%M:%S',
 1761     '%Y-%m-%d %H:%M:%S.%f',
 1762     '%Y-%m-%d %H:%M:%S:%f',
 1763     '%d.%m.%Y %H:%M',
 1764     '%d.%m.%Y %H.%M',
 1765     '%Y-%m-%dT%H:%M:%SZ',
 1766     '%Y-%m-%dT%H:%M:%S.%fZ',
 1767     '%Y-%m-%dT%H:%M:%S.%f0Z',
 1768     '%Y-%m-%dT%H:%M:%S',
 1769     '%Y-%m-%dT%H:%M:%S.%f',
 1770     '%Y-%m-%dT%H:%M',
 1771     '%b %d %Y at %H:%M',
 1772     '%b %d %Y at %H:%M:%S',
 1773     '%B %d %Y at %H:%M',
 1774     '%B %d %Y at %H:%M:%S',
 1775     '%H:%M %d-%b-%Y',
 1776 )
 1777 
 1778 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 1779 DATE_FORMATS_DAY_FIRST.extend([
 1780     '%d-%m-%Y',
 1781     '%d.%m.%Y',
 1782     '%d.%m.%y',
 1783     '%d/%m/%Y',
 1784     '%d/%m/%y',
 1785     '%d/%m/%Y %H:%M:%S',
 1786     '%d-%m-%Y %H:%M',
 1787 ])
 1788 
 1789 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 1790 DATE_FORMATS_MONTH_FIRST.extend([
 1791     '%m-%d-%Y',
 1792     '%m.%d.%Y',
 1793     '%m/%d/%Y',
 1794     '%m/%d/%y',
 1795     '%m/%d/%Y %H:%M:%S',
 1796 ])
 1797 
 1798 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 1799 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 1800 
 1801 
 1802 def preferredencoding():
 1803     """Get preferred encoding.
 1804 
 1805     Returns the best encoding scheme for the system, based on
 1806     locale.getpreferredencoding() and some further tweaks.
 1807     """
 1808     try:
 1809         pref = locale.getpreferredencoding()
 1810         'TEST'.encode(pref)
 1811     except Exception:
 1812         pref = 'UTF-8'
 1813 
 1814     return pref
 1815 
 1816 
 1817 def write_json_file(obj, fn):
 1818     """ Encode obj as JSON and write it to fn, atomically if possible """
 1819 
 1820     fn = encodeFilename(fn)
 1821     if sys.version_info < (3, 0) and sys.platform != 'win32':
 1822         encoding = get_filesystem_encoding()
 1823         # os.path.basename returns a bytes object, but NamedTemporaryFile
 1824         # will fail if the filename contains non ascii characters unless we
 1825         # use a unicode object
 1826         path_basename = lambda f: os.path.basename(fn).decode(encoding)
 1827         # the same for os.path.dirname
 1828         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
 1829     else:
 1830         path_basename = os.path.basename
 1831         path_dirname = os.path.dirname
 1832 
 1833     args = {
 1834         'suffix': '.tmp',
 1835         'prefix': path_basename(fn) + '.',
 1836         'dir': path_dirname(fn),
 1837         'delete': False,
 1838     }
 1839 
 1840     # In Python 2.x, json.dump expects a bytestream.
 1841     # In Python 3.x, it writes to a character stream
 1842     if sys.version_info < (3, 0):
 1843         args['mode'] = 'wb'
 1844     else:
 1845         args.update({
 1846             'mode': 'w',
 1847             'encoding': 'utf-8',
 1848         })
 1849 
 1850     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
 1851 
 1852     try:
 1853         with tf:
 1854             json.dump(obj, tf)
 1855         if sys.platform == 'win32':
 1856             # Need to remove existing file on Windows, else os.rename raises
 1857             # WindowsError or FileExistsError.
 1858             try:
 1859                 os.unlink(fn)
 1860             except OSError:
 1861                 pass
 1862         try:
 1863             mask = os.umask(0)
 1864             os.umask(mask)
 1865             os.chmod(tf.name, 0o666 & ~mask)
 1866         except OSError:
 1867             pass
 1868         os.rename(tf.name, fn)
 1869     except Exception:
 1870         try:
 1871             os.remove(tf.name)
 1872         except OSError:
 1873             pass
 1874         raise
 1875 
 1876 
 1877 if sys.version_info >= (2, 7):
 1878     def find_xpath_attr(node, xpath, key, val=None):
 1879         """ Find the xpath xpath[@key=val] """
 1880         assert re.match(r'^[a-zA-Z_-]+$', key)
 1881         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
 1882         return node.find(expr)
 1883 else:
 1884     def find_xpath_attr(node, xpath, key, val=None):
 1885         for f in node.findall(compat_xpath(xpath)):
 1886             if key not in f.attrib:
 1887                 continue
 1888             if val is None or f.attrib.get(key) == val:
 1889                 return f
 1890         return None
 1891 
 1892 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 1893 # the namespace parameter
 1894 
 1895 
 1896 def xpath_with_ns(path, ns_map):
 1897     components = [c.split(':') for c in path.split('/')]
 1898     replaced = []
 1899     for c in components:
 1900         if len(c) == 1:
 1901             replaced.append(c[0])
 1902         else:
 1903             ns, tag = c
 1904             replaced.append('{%s}%s' % (ns_map[ns], tag))
 1905     return '/'.join(replaced)
 1906 
 1907 
 1908 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 1909     def _find_xpath(xpath):
 1910         return node.find(compat_xpath(xpath))
 1911 
 1912     if isinstance(xpath, (str, compat_str)):
 1913         n = _find_xpath(xpath)
 1914     else:
 1915         for xp in xpath:
 1916             n = _find_xpath(xp)
 1917             if n is not None:
 1918                 break
 1919 
 1920     if n is None:
 1921         if default is not NO_DEFAULT:
 1922             return default
 1923         elif fatal:
 1924             name = xpath if name is None else name
 1925             raise ExtractorError('Could not find XML element %s' % name)
 1926         else:
 1927             return None
 1928     return n
 1929 
 1930 
 1931 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 1932     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 1933     if n is None or n == default:
 1934         return n
 1935     if n.text is None:
 1936         if default is not NO_DEFAULT:
 1937             return default
 1938         elif fatal:
 1939             name = xpath if name is None else name
 1940             raise ExtractorError('Could not find XML element\'s text %s' % name)
 1941         else:
 1942             return None
 1943     return n.text
 1944 
 1945 
 1946 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 1947     n = find_xpath_attr(node, xpath, key)
 1948     if n is None:
 1949         if default is not NO_DEFAULT:
 1950             return default
 1951         elif fatal:
 1952             name = '%s[@%s]' % (xpath, key) if name is None else name
 1953             raise ExtractorError('Could not find XML attribute %s' % name)
 1954         else:
 1955             return None
 1956     return n.attrib[key]
 1957 
 1958 
 1959 def get_element_by_id(id, html):
 1960     """Return the content of the tag with the specified ID in the passed HTML document"""
 1961     return get_element_by_attribute('id', id, html)
 1962 
 1963 
 1964 def get_element_by_class(class_name, html):
 1965     """Return the content of the first tag with the specified class in the passed HTML document"""
 1966     retval = get_elements_by_class(class_name, html)
 1967     return retval[0] if retval else None
 1968 
 1969 
 1970 def get_element_by_attribute(attribute, value, html, escape_value=True):
 1971     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 1972     return retval[0] if retval else None
 1973 
 1974 
 1975 def get_elements_by_class(class_name, html):
 1976     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 1977     return get_elements_by_attribute(
 1978         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 1979         html, escape_value=False)
 1980 
 1981 
 1982 def get_elements_by_attribute(attribute, value, html, escape_value=True):
 1983     """Return the content of the tag with the specified attribute in the passed HTML document"""
 1984 
 1985     value = re.escape(value) if escape_value else value
 1986 
 1987     retlist = []
 1988     for m in re.finditer(r'''(?xs)
 1989         <([a-zA-Z0-9:._-]+)
 1990          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 1991          \s+%s=['"]?%s['"]?
 1992          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 1993         \s*>
 1994         (?P<content>.*?)
 1995         </\1>
 1996     ''' % (re.escape(attribute), value), html):
 1997         res = m.group('content')
 1998 
 1999         if res.startswith('"') or res.startswith("'"):
 2000             res = res[1:-1]
 2001 
 2002         retlist.append(unescapeHTML(res))
 2003 
 2004     return retlist
 2005 
 2006 
 2007 class HTMLAttributeParser(compat_HTMLParser):
 2008     """Trivial HTML parser to gather the attributes for a single element"""
 2009     def __init__(self):
 2010         self.attrs = {}
 2011         compat_HTMLParser.__init__(self)
 2012 
 2013     def handle_starttag(self, tag, attrs):
 2014         self.attrs = dict(attrs)
 2015 
 2016 
 2017 def extract_attributes(html_element):
 2018     """Given a string for an HTML element such as
 2019     <el
 2020          a="foo" B="bar" c="&98;az" d=boz
 2021          empty= noval entity="&amp;"
 2022          sq='"' dq="'"
 2023     >
 2024     Decode and return a dictionary of attributes.
 2025     {
 2026         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 2027         'empty': '', 'noval': None, 'entity': '&',
 2028         'sq': '"', 'dq': '\''
 2029     }.
 2030     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
 2031     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
 2032     """
 2033     parser = HTMLAttributeParser()
 2034     try:
 2035         parser.feed(html_element)
 2036         parser.close()
 2037     # Older Python may throw HTMLParseError in case of malformed HTML
 2038     except compat_HTMLParseError:
 2039         pass
 2040     return parser.attrs
 2041 
 2042 
 2043 def clean_html(html):
 2044     """Clean an HTML snippet into a readable string"""
 2045 
 2046     if html is None:  # Convenience for sanitizing descriptions etc.
 2047         return html
 2048 
 2049     # Newline vs <br />
 2050     html = html.replace('\n', ' ')
 2051     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
 2052     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 2053     # Strip html tags
 2054     html = re.sub('<.*?>', '', html)
 2055     # Replace html entities
 2056     html = unescapeHTML(html)
 2057     return html.strip()
 2058 
 2059 
 2060 def sanitize_open(filename, open_mode):
 2061     """Try to open the given filename, and slightly tweak it if this fails.
 2062 
 2063     Attempts to open the given filename. If this fails, it tries to change
 2064     the filename slightly, step by step, until it's either able to open it
 2065     or it fails and raises a final exception, like the standard open()
 2066     function.
 2067 
 2068     It returns the tuple (stream, definitive_file_name).
 2069     """
 2070     try:
 2071         if filename == '-':
 2072             if sys.platform == 'win32':
 2073                 import msvcrt
 2074                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 2075             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 2076         stream = open(encodeFilename(filename), open_mode)
 2077         return (stream, filename)
 2078     except (IOError, OSError) as err:
 2079         if err.errno in (errno.EACCES,):
 2080             raise
 2081 
 2082         # In case of error, try to remove win32 forbidden chars
 2083         alt_filename = sanitize_path(filename)
 2084         if alt_filename == filename:
 2085             raise
 2086         else:
 2087             # An exception here should be caught in the caller
 2088             stream = open(encodeFilename(alt_filename), open_mode)
 2089             return (stream, alt_filename)
 2090 
 2091 
 2092 def timeconvert(timestr):
 2093     """Convert RFC 2822 defined time string into system timestamp"""
 2094     timestamp = None
 2095     timetuple = email.utils.parsedate_tz(timestr)
 2096     if timetuple is not None:
 2097         timestamp = email.utils.mktime_tz(timetuple)
 2098     return timestamp
 2099 
 2100 
 2101 def sanitize_filename(s, restricted=False, is_id=False):
 2102     """Sanitizes a string so it could be used as part of a filename.
 2103     If restricted is set, use a stricter subset of allowed characters.
 2104     Set is_id if this is not an arbitrary string, but an ID that should be kept
 2105     if possible.
 2106     """
 2107     def replace_insane(char):
 2108         if restricted and char in ACCENT_CHARS:
 2109             return ACCENT_CHARS[char]
 2110         if char == '?' or ord(char) < 32 or ord(char) == 127:
 2111             return ''
 2112         elif char == '"':
 2113             return '' if restricted else '\''
 2114         elif char == ':':
 2115             return '_-' if restricted else ' -'
 2116         elif char in '\\/|*<>':
 2117             return '_'
 2118         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 2119             return '_'
 2120         if restricted and ord(char) > 127:
 2121             return '_'
 2122         return char
 2123 
 2124     # Replace look-alike Unicode glyphs
 2125     if restricted and not is_id:
 2126         s = unicodedata.normalize('NFKC', s)
 2127     # Handle timestamps
 2128     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
 2129     result = ''.join(map(replace_insane, s))
 2130     if not is_id:
 2131         while '__' in result:
 2132             result = result.replace('__', '_')
 2133         result = result.strip('_')
 2134         # Common case of "Foreign band name - English song title"
 2135         if restricted and result.startswith('-_'):
 2136             result = result[2:]
 2137         if result.startswith('-'):
 2138             result = '_' + result[len('-'):]
 2139         result = result.lstrip('.')
 2140         if not result:
 2141             result = '_'
 2142     return result
 2143 
 2144 
 2145 def sanitize_path(s):
 2146     """Sanitizes and normalizes path on Windows"""
 2147     if sys.platform != 'win32':
 2148         return s
 2149     drive_or_unc, _ = os.path.splitdrive(s)
 2150     if sys.version_info < (2, 7) and not drive_or_unc:
 2151         drive_or_unc, _ = os.path.splitunc(s)
 2152     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 2153     if drive_or_unc:
 2154         norm_path.pop(0)
 2155     sanitized_path = [
 2156         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 2157         for path_part in norm_path]
 2158     if drive_or_unc:
 2159         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 2160     return os.path.join(*sanitized_path)
 2161 
 2162 
 2163 def sanitize_url(url):
 2164     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 2165     # the number of unwanted failures due to missing protocol
 2166     if url.startswith('//'):
 2167         return 'http:%s' % url
 2168     # Fix some common typos seen so far
 2169     COMMON_TYPOS = (
 2170         # https://github.com/ytdl-org/youtube-dl/issues/15649
 2171         (r'^httpss://', r'https://'),
 2172         # https://bx1.be/lives/direct-tv/
 2173         (r'^rmtp([es]?)://', r'rtmp\1://'),
 2174     )
 2175     for mistake, fixup in COMMON_TYPOS:
 2176         if re.match(mistake, url):
 2177             return re.sub(mistake, fixup, url)
 2178     return escape_url(url)
 2179 
 2180 
 2181 def sanitized_Request(url, *args, **kwargs):
 2182     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 2183 
 2184 
 2185 def expand_path(s):
 2186     """Expand shell variables and ~"""
 2187     return os.path.expandvars(compat_expanduser(s))
 2188 
 2189 
 2190 def orderedSet(iterable):
 2191     """ Remove all duplicates from the input iterable """
 2192     res = []
 2193     for el in iterable:
 2194         if el not in res:
 2195             res.append(el)
 2196     return res
 2197 
 2198 
 2199 def _htmlentity_transform(entity_with_semicolon):
 2200     """Transforms an HTML entity to a character."""
 2201     entity = entity_with_semicolon[:-1]
 2202 
 2203     # Known non-numeric HTML entity
 2204     if entity in compat_html_entities.name2codepoint:
 2205         return compat_chr(compat_html_entities.name2codepoint[entity])
 2206 
 2207     # TODO: HTML5 allows entities without a semicolon. For example,
 2208     # '&Eacuteric' should be decoded as 'Éric'.
 2209     if entity_with_semicolon in compat_html_entities_html5:
 2210         return compat_html_entities_html5[entity_with_semicolon]
 2211 
 2212     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 2213     if mobj is not None:
 2214         numstr = mobj.group(1)
 2215         if numstr.startswith('x'):
 2216             base = 16
 2217             numstr = '0%s' % numstr
 2218         else:
 2219             base = 10
 2220         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 2221         try:
 2222             return compat_chr(int(numstr, base))
 2223         except ValueError:
 2224             pass
 2225 
 2226     # Unknown entity in name, return its literal representation
 2227     return '&%s;' % entity
 2228 
 2229 
 2230 def unescapeHTML(s):
 2231     if s is None:
 2232         return None
 2233     assert type(s) == compat_str
 2234 
 2235     return re.sub(
 2236         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 2237 
 2238 
 2239 def process_communicate_or_kill(p, *args, **kwargs):
 2240     try:
 2241         return p.communicate(*args, **kwargs)
 2242     except BaseException:  # Including KeyboardInterrupt
 2243         p.kill()
 2244         p.wait()
 2245         raise
 2246 
 2247 
 2248 def get_subprocess_encoding():
 2249     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 2250         # For subprocess calls, encode with locale encoding
 2251         # Refer to http://stackoverflow.com/a/9951851/35070
 2252         encoding = preferredencoding()
 2253     else:
 2254         encoding = sys.getfilesystemencoding()
 2255     if encoding is None:
 2256         encoding = 'utf-8'
 2257     return encoding
 2258 
 2259 
 2260 def encodeFilename(s, for_subprocess=False):
 2261     """
 2262     @param s The name of the file
 2263     """
 2264 
 2265     assert type(s) == compat_str
 2266 
 2267     # Python 3 has a Unicode API
 2268     if sys.version_info >= (3, 0):
 2269         return s
 2270 
 2271     # Pass '' directly to use Unicode APIs on Windows 2000 and up
 2272     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 2273     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 2274     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 2275         return s
 2276 
 2277     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
 2278     if sys.platform.startswith('java'):
 2279         return s
 2280 
 2281     return s.encode(get_subprocess_encoding(), 'ignore')
 2282 
 2283 
 2284 def decodeFilename(b, for_subprocess=False):
 2285 
 2286     if sys.version_info >= (3, 0):
 2287         return b
 2288 
 2289     if not isinstance(b, bytes):
 2290         return b
 2291 
 2292     return b.decode(get_subprocess_encoding(), 'ignore')
 2293 
 2294 
 2295 def encodeArgument(s):
 2296     if not isinstance(s, compat_str):
 2297         # Legacy code that uses byte strings
 2298         # Uncomment the following line after fixing all post processors
 2299         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 2300         s = s.decode('ascii')
 2301     return encodeFilename(s, True)
 2302 
 2303 
 2304 def decodeArgument(b):
 2305     return decodeFilename(b, True)
 2306 
 2307 
 2308 def decodeOption(optval):
 2309     if optval is None:
 2310         return optval
 2311     if isinstance(optval, bytes):
 2312         optval = optval.decode(preferredencoding())
 2313 
 2314     assert isinstance(optval, compat_str)
 2315     return optval
 2316 
 2317 
 2318 def formatSeconds(secs):
 2319     if secs > 3600:
 2320         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 2321     elif secs > 60:
 2322         return '%d:%02d' % (secs // 60, secs % 60)
 2323     else:
 2324         return '%d' % secs
 2325 
 2326 
 2327 def make_HTTPS_handler(params, **kwargs):
 2328 
 2329     # https://www.rfc-editor.org/info/rfc7301
 2330     ALPN_PROTOCOLS = ['http/1.1']
 2331 
 2332     def set_alpn_protocols(ctx):
 2333         # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
 2334         # Thanks @coletdjnz
 2335         # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
 2336         # https://github.com/python/cpython/issues/85140
 2337         # https://github.com/yt-dlp/yt-dlp/issues/3878
 2338         try:
 2339             ctx.set_alpn_protocols(ALPN_PROTOCOLS)
 2340         except (AttributeError, NotImplementedError):
 2341             # Python < 2.7.10, not ssl.HAS_ALPN
 2342             pass
 2343 
 2344     opts_no_check_certificate = params.get('nocheckcertificate', False)
 2345     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
 2346         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
 2347         set_alpn_protocols(context)
 2348         if opts_no_check_certificate:
 2349             context.check_hostname = False
 2350             context.verify_mode = ssl.CERT_NONE
 2351 
 2352         try:
 2353             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 2354         except TypeError:
 2355             # Python 2.7.8
 2356             # (create_default_context present but HTTPSHandler has no context=)
 2357             pass
 2358 
 2359     if sys.version_info < (3, 2):
 2360         return YoutubeDLHTTPSHandler(params, **kwargs)
 2361     else:  # Python < 3.4
 2362         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
 2363         context.verify_mode = (ssl.CERT_NONE
 2364                                if opts_no_check_certificate
 2365                                else ssl.CERT_REQUIRED)
 2366         context.set_default_verify_paths()
 2367         set_alpn_protocols(context)
 2368         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 2369 
 2370 
 2371 def bug_reports_message():
 2372     if ytdl_is_updateable():
 2373         update_cmd = 'type  youtube-dl -U  to update'
 2374     else:
 2375         update_cmd = 'see  https://yt-dl.org/update  on how to update'
 2376     msg = '; please report this issue on https://yt-dl.org/bug .'
 2377     msg += ' Make sure you are using the latest version; %s.' % update_cmd
 2378     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 2379     return msg
 2380 
 2381 
 2382 class YoutubeDLError(Exception):
 2383     """Base exception for YoutubeDL errors."""
 2384     pass
 2385 
 2386 
 2387 class ExtractorError(YoutubeDLError):
 2388     """Error during info extraction."""
 2389 
 2390     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 2391         """ tb, if given, is the original traceback (so that it can be printed out).
 2392         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 2393         """
 2394 
 2395         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 2396             expected = True
 2397         if video_id is not None:
 2398             msg = video_id + ': ' + msg
 2399         if cause:
 2400             msg += ' (caused by %r)' % cause
 2401         if not expected:
 2402             msg += bug_reports_message()
 2403         super(ExtractorError, self).__init__(msg)
 2404 
 2405         self.traceback = tb
 2406         self.exc_info = sys.exc_info()  # preserve original exception
 2407         self.cause = cause
 2408         self.video_id = video_id
 2409 
 2410     def format_traceback(self):
 2411         if self.traceback is None:
 2412             return None
 2413         return ''.join(traceback.format_tb(self.traceback))
 2414 
 2415 
 2416 class UnsupportedError(ExtractorError):
 2417     def __init__(self, url):
 2418         super(UnsupportedError, self).__init__(
 2419             'Unsupported URL: %s' % url, expected=True)
 2420         self.url = url
 2421 
 2422 
 2423 class RegexNotFoundError(ExtractorError):
 2424     """Error when a regex didn't match"""
 2425     pass
 2426 
 2427 
 2428 class GeoRestrictedError(ExtractorError):
 2429     """Geographic restriction Error exception.
 2430 
 2431     This exception may be thrown when a video is not available from your
 2432     geographic location due to geographic restrictions imposed by a website.
 2433     """
 2434     def __init__(self, msg, countries=None):
 2435         super(GeoRestrictedError, self).__init__(msg, expected=True)
 2436         self.msg = msg
 2437         self.countries = countries
 2438 
 2439 
 2440 class DownloadError(YoutubeDLError):
 2441     """Download Error exception.
 2442 
 2443     This exception may be thrown by FileDownloader objects if they are not
 2444     configured to continue on errors. They will contain the appropriate
 2445     error message.
 2446     """
 2447 
 2448     def __init__(self, msg, exc_info=None):
 2449         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 2450         super(DownloadError, self).__init__(msg)
 2451         self.exc_info = exc_info
 2452 
 2453 
 2454 class SameFileError(YoutubeDLError):
 2455     """Same File exception.
 2456 
 2457     This exception will be thrown by FileDownloader objects if they detect
 2458     multiple files would have to be downloaded to the same file on disk.
 2459     """
 2460     pass
 2461 
 2462 
 2463 class PostProcessingError(YoutubeDLError):
 2464     """Post Processing exception.
 2465 
 2466     This exception may be raised by PostProcessor's .run() method to
 2467     indicate an error in the postprocessing task.
 2468     """
 2469 
 2470     def __init__(self, msg):
 2471         super(PostProcessingError, self).__init__(msg)
 2472         self.msg = msg
 2473 
 2474 
 2475 class MaxDownloadsReached(YoutubeDLError):
 2476     """ --max-downloads limit has been reached. """
 2477     pass
 2478 
 2479 
 2480 class UnavailableVideoError(YoutubeDLError):
 2481     """Unavailable Format exception.
 2482 
 2483     This exception will be thrown when a video is requested
 2484     in a format that is not available for that video.
 2485     """
 2486     pass
 2487 
 2488 
 2489 class ContentTooShortError(YoutubeDLError):
 2490     """Content Too Short exception.
 2491 
 2492     This exception may be raised by FileDownloader objects when a file they
 2493     download is too small for what the server announced first, indicating
 2494     the connection was probably interrupted.
 2495     """
 2496 
 2497     def __init__(self, downloaded, expected):
 2498         super(ContentTooShortError, self).__init__(
 2499             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
 2500         )
 2501         # Both in bytes
 2502         self.downloaded = downloaded
 2503         self.expected = expected
 2504 
 2505 
 2506 class XAttrMetadataError(YoutubeDLError):
 2507     def __init__(self, code=None, msg='Unknown error'):
 2508         super(XAttrMetadataError, self).__init__(msg)
 2509         self.code = code
 2510         self.msg = msg
 2511 
 2512         # Parsing code and msg
 2513         if (self.code in (errno.ENOSPC, errno.EDQUOT)
 2514                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
 2515             self.reason = 'NO_SPACE'
 2516         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
 2517             self.reason = 'VALUE_TOO_LONG'
 2518         else:
 2519             self.reason = 'NOT_SUPPORTED'
 2520 
 2521 
 2522 class XAttrUnavailableError(YoutubeDLError):
 2523     pass
 2524 
 2525 
 2526 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
 2527     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
 2528     # expected HTTP responses to meet HTTP/1.0 or later (see also
 2529     # https://github.com/ytdl-org/youtube-dl/issues/6727)
 2530     if sys.version_info < (3, 0):
 2531         kwargs['strict'] = True
 2532     hc = http_class(*args, **compat_kwargs(kwargs))
 2533     source_address = ydl_handler._params.get('source_address')
 2534 
 2535     if source_address is not None:
 2536         # This is to workaround _create_connection() from socket where it will try all
 2537         # address data from getaddrinfo() including IPv6. This filters the result from
 2538         # getaddrinfo() based on the source_address value.
 2539         # This is based on the cpython socket.create_connection() function.
 2540         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
 2541         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
 2542             host, port = address
 2543             err = None
 2544             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
 2545             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
 2546             ip_addrs = [addr for addr in addrs if addr[0] == af]
 2547             if addrs and not ip_addrs:
 2548                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
 2549                 raise socket.error(
 2550                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
 2551                     % (ip_version, source_address[0]))
 2552             for res in ip_addrs:
 2553                 af, socktype, proto, canonname, sa = res
 2554                 sock = None
 2555                 try:
 2556                     sock = socket.socket(af, socktype, proto)
 2557                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 2558                         sock.settimeout(timeout)
 2559                     sock.bind(source_address)
 2560                     sock.connect(sa)
 2561                     err = None  # Explicitly break reference cycle
 2562                     return sock
 2563                 except socket.error as _:
 2564                     err = _
 2565                     if sock is not None:
 2566                         sock.close()
 2567             if err is not None:
 2568                 raise err
 2569             else:
 2570                 raise socket.error('getaddrinfo returns an empty list')
 2571         if hasattr(hc, '_create_connection'):
 2572             hc._create_connection = _create_connection
 2573         sa = (source_address, 0)
 2574         if hasattr(hc, 'source_address'):  # Python 2.7+
 2575             hc.source_address = sa
 2576         else:  # Python 2.6
 2577             def _hc_connect(self, *args, **kwargs):
 2578                 sock = _create_connection(
 2579                     (self.host, self.port), self.timeout, sa)
 2580                 if is_https:
 2581                     self.sock = ssl.wrap_socket(
 2582                         sock, self.key_file, self.cert_file,
 2583                         ssl_version=ssl.PROTOCOL_TLSv1)
 2584                 else:
 2585                     self.sock = sock
 2586             hc.connect = functools.partial(_hc_connect, hc)
 2587 
 2588     return hc
 2589 
 2590 
 2591 def handle_youtubedl_headers(headers):
 2592     filtered_headers = headers
 2593 
 2594     if 'Youtubedl-no-compression' in filtered_headers:
 2595         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
 2596         del filtered_headers['Youtubedl-no-compression']
 2597 
 2598     return filtered_headers
 2599 
 2600 
 2601 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 2602     """Handler for HTTP requests and responses.
 2603 
 2604     This class, when installed with an OpenerDirector, automatically adds
 2605     the standard headers to every HTTP request and handles gzipped and
 2606     deflated responses from web servers. If compression is to be avoided in
 2607     a particular request, the original request in the program code only has
 2608     to include the HTTP header "Youtubedl-no-compression", which will be
 2609     removed before making the real request.
 2610 
 2611     Part of this code was copied from:
 2612 
 2613     http://techknack.net/python-urllib2-handlers/
 2614 
 2615     Andrew Rowls, the author of that code, agreed to release it to the
 2616     public domain.
 2617     """
 2618 
 2619     def __init__(self, params, *args, **kwargs):
 2620         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
 2621         self._params = params
 2622 
 2623     def http_open(self, req):
 2624         conn_class = compat_http_client.HTTPConnection
 2625 
 2626         socks_proxy = req.headers.get('Ytdl-socks-proxy')
 2627         if socks_proxy:
 2628             conn_class = make_socks_conn_class(conn_class, socks_proxy)
 2629             del req.headers['Ytdl-socks-proxy']
 2630 
 2631         return self.do_open(functools.partial(
 2632             _create_http_connection, self, conn_class, False),
 2633             req)
 2634 
 2635     @staticmethod
 2636     def deflate(data):
 2637         try:
 2638             return zlib.decompress(data, -zlib.MAX_WBITS)
 2639         except zlib.error:
 2640             return zlib.decompress(data)
 2641 
 2642     def http_request(self, req):
 2643         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 2644         # always respected by websites, some tend to give out URLs with non percent-encoded
 2645         # non-ASCII characters (see telemb.py, ard.py [#3412])
 2646         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 2647         # To work around aforementioned issue we will replace request's original URL with
 2648         # percent-encoded one
 2649         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
 2650         # the code of this workaround has been moved here from YoutubeDL.urlopen()
 2651         url = req.get_full_url()
 2652         url_escaped = escape_url(url)
 2653 
 2654         # Substitute URL if any change after escaping
 2655         if url != url_escaped:
 2656             req = update_Request(req, url=url_escaped)
 2657 
 2658         for h, v in std_headers.items():
 2659             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 2660             # The dict keys are capitalized because of this bug by urllib
 2661             if h.capitalize() not in req.headers:
 2662                 req.add_header(h, v)
 2663 
 2664         req.headers = handle_youtubedl_headers(req.headers)
 2665 
 2666         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 2667             # Python 2.6 is brain-dead when it comes to fragments
 2668             req._Request__original = req._Request__original.partition('#')[0]
 2669             req._Request__r_type = req._Request__r_type.partition('#')[0]
 2670 
 2671         return req
 2672 
 2673     def http_response(self, req, resp):
 2674         old_resp = resp
 2675         # gzip
 2676         if resp.headers.get('Content-encoding', '') == 'gzip':
 2677             content = resp.read()
 2678             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 2679             try:
 2680                 uncompressed = io.BytesIO(gz.read())
 2681             except IOError as original_ioerror:
 2682                 # There may be junk add the end of the file
 2683                 # See http://stackoverflow.com/q/4928560/35070 for details
 2684                 for i in range(1, 1024):
 2685                     try:
 2686                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 2687                         uncompressed = io.BytesIO(gz.read())
 2688                     except IOError:
 2689                         continue
 2690                     break
 2691                 else:
 2692                     raise original_ioerror
 2693             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 2694             resp.msg = old_resp.msg
 2695             del resp.headers['Content-encoding']
 2696         # deflate
 2697         if resp.headers.get('Content-encoding', '') == 'deflate':
 2698             gz = io.BytesIO(self.deflate(resp.read()))
 2699             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
 2700             resp.msg = old_resp.msg
 2701             del resp.headers['Content-encoding']
 2702         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
 2703         # https://github.com/ytdl-org/youtube-dl/issues/6457).
 2704         if 300 <= resp.code < 400:
 2705             location = resp.headers.get('Location')
 2706             if location:
 2707                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
 2708                 if sys.version_info >= (3, 0):
 2709                     location = location.encode('iso-8859-1').decode('utf-8')
 2710                 else:
 2711                     location = location.decode('utf-8')
 2712                 location_escaped = escape_url(location)
 2713                 if location != location_escaped:
 2714                     del resp.headers['Location']
 2715                     if sys.version_info < (3, 0):
 2716                         location_escaped = location_escaped.encode('utf-8')
 2717                     resp.headers['Location'] = location_escaped
 2718         return resp
 2719 
 2720     https_request = http_request
 2721     https_response = http_response
 2722 
 2723 
 2724 def make_socks_conn_class(base_class, socks_proxy):
 2725     assert issubclass(base_class, (
 2726         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 2727 
 2728     url_components = compat_urlparse.urlparse(socks_proxy)
 2729     if url_components.scheme.lower() == 'socks5':
 2730         socks_type = ProxyType.SOCKS5
 2731     elif url_components.scheme.lower() in ('socks', 'socks4'):
 2732         socks_type = ProxyType.SOCKS4
 2733     elif url_components.scheme.lower() == 'socks4a':
 2734         socks_type = ProxyType.SOCKS4A
 2735 
 2736     def unquote_if_non_empty(s):
 2737         if not s:
 2738             return s
 2739         return compat_urllib_parse_unquote_plus(s)
 2740 
 2741     proxy_args = (
 2742         socks_type,
 2743         url_components.hostname, url_components.port or 1080,
 2744         True,  # Remote DNS
 2745         unquote_if_non_empty(url_components.username),
 2746         unquote_if_non_empty(url_components.password),
 2747     )
 2748 
 2749     class SocksConnection(base_class):
 2750         def connect(self):
 2751             self.sock = sockssocket()
 2752             self.sock.setproxy(*proxy_args)
 2753             if type(self.timeout) in (int, float):
 2754                 self.sock.settimeout(self.timeout)
 2755             self.sock.connect((self.host, self.port))
 2756 
 2757             if isinstance(self, compat_http_client.HTTPSConnection):
 2758                 if hasattr(self, '_context'):  # Python > 2.6
 2759                     self.sock = self._context.wrap_socket(
 2760                         self.sock, server_hostname=self.host)
 2761                 else:
 2762                     self.sock = ssl.wrap_socket(self.sock)
 2763 
 2764     return SocksConnection
 2765 
 2766 
 2767 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
 2768     def __init__(self, params, https_conn_class=None, *args, **kwargs):
 2769         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
 2770         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
 2771         self._params = params
 2772 
 2773     def https_open(self, req):
 2774         kwargs = {}
 2775         conn_class = self._https_conn_class
 2776 
 2777         if hasattr(self, '_context'):  # python > 2.6
 2778             kwargs['context'] = self._context
 2779         if hasattr(self, '_check_hostname'):  # python 3.x
 2780             kwargs['check_hostname'] = self._check_hostname
 2781 
 2782         socks_proxy = req.headers.get('Ytdl-socks-proxy')
 2783         if socks_proxy:
 2784             conn_class = make_socks_conn_class(conn_class, socks_proxy)
 2785             del req.headers['Ytdl-socks-proxy']
 2786 
 2787         return self.do_open(functools.partial(
 2788             _create_http_connection, self, conn_class, True),
 2789             req, **kwargs)
 2790 
 2791 
 2792 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
 2793     """
 2794     See [1] for cookie file format.
 2795 
 2796     1. https://curl.haxx.se/docs/http-cookies.html
 2797     """
 2798     _HTTPONLY_PREFIX = '#HttpOnly_'
 2799     _ENTRY_LEN = 7
 2800     _HEADER = '''# Netscape HTTP Cookie File
 2801 # This file is generated by youtube-dl.  Do not edit.
 2802 
 2803 '''
 2804     _CookieFileEntry = collections.namedtuple(
 2805         'CookieFileEntry',
 2806         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
 2807 
 2808     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 2809         """
 2810         Save cookies to a file.
 2811 
 2812         Most of the code is taken from CPython 3.8 and slightly adapted
 2813         to support cookie files with UTF-8 in both python 2 and 3.
 2814         """
 2815         if filename is None:
 2816             if self.filename is not None:
 2817                 filename = self.filename
 2818             else:
 2819                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 2820 
 2821         # Store session cookies with `expires` set to 0 instead of an empty
 2822         # string
 2823         for cookie in self:
 2824             if cookie.expires is None:
 2825                 cookie.expires = 0
 2826 
 2827         with io.open(filename, 'w', encoding='utf-8') as f:
 2828             f.write(self._HEADER)
 2829             now = time.time()
 2830             for cookie in self:
 2831                 if not ignore_discard and cookie.discard:
 2832                     continue
 2833                 if not ignore_expires and cookie.is_expired(now):
 2834                     continue
 2835                 if cookie.secure:
 2836                     secure = 'TRUE'
 2837                 else:
 2838                     secure = 'FALSE'
 2839                 if cookie.domain.startswith('.'):
 2840                     initial_dot = 'TRUE'
 2841                 else:
 2842                     initial_dot = 'FALSE'
 2843                 if cookie.expires is not None:
 2844                     expires = compat_str(cookie.expires)
 2845                 else:
 2846                     expires = ''
 2847                 if cookie.value is None:
 2848                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
 2849                     # with no name, whereas http.cookiejar regards it as a
 2850                     # cookie with no value.
 2851                     name = ''
 2852                     value = cookie.name
 2853                 else:
 2854                     name = cookie.name
 2855                     value = cookie.value
 2856                 f.write(
 2857                     '\t'.join([cookie.domain, initial_dot, cookie.path,
 2858                                secure, expires, name, value]) + '\n')
 2859 
 2860     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 2861         """Load cookies from a file."""
 2862         if filename is None:
 2863             if self.filename is not None:
 2864                 filename = self.filename
 2865             else:
 2866                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 2867 
 2868         def prepare_line(line):
 2869             if line.startswith(self._HTTPONLY_PREFIX):
 2870                 line = line[len(self._HTTPONLY_PREFIX):]
 2871             # comments and empty lines are fine
 2872             if line.startswith('#') or not line.strip():
 2873                 return line
 2874             cookie_list = line.split('\t')
 2875             if len(cookie_list) != self._ENTRY_LEN:
 2876                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
 2877             cookie = self._CookieFileEntry(*cookie_list)
 2878             if cookie.expires_at and not cookie.expires_at.isdigit():
 2879                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
 2880             return line
 2881 
 2882         cf = io.StringIO()
 2883         with io.open(filename, encoding='utf-8') as f:
 2884             for line in f:
 2885                 try:
 2886                     cf.write(prepare_line(line))
 2887                 except compat_cookiejar.LoadError as e:
 2888                     write_string(
 2889                         'WARNING: skipping cookie file entry due to %s: %r\n'
 2890                         % (e, line), sys.stderr)
 2891                     continue
 2892         cf.seek(0)
 2893         self._really_load(cf, filename, ignore_discard, ignore_expires)
 2894         # Session cookies are denoted by either `expires` field set to
 2895         # an empty string or 0. MozillaCookieJar only recognizes the former
 2896         # (see [1]). So we need force the latter to be recognized as session
 2897         # cookies on our own.
 2898         # Session cookies may be important for cookies-based authentication,
 2899         # e.g. usually, when user does not check 'Remember me' check box while
 2900         # logging in on a site, some important cookies are stored as session
 2901         # cookies so that not recognizing them will result in failed login.
 2902         # 1. https://bugs.python.org/issue17164
 2903         for cookie in self:
 2904             # Treat `expires=0` cookies as session cookies
 2905             if cookie.expires == 0:
 2906                 cookie.expires = None
 2907                 cookie.discard = True
 2908 
 2909 
 2910 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
 2911     def __init__(self, cookiejar=None):
 2912         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
 2913 
 2914     def http_response(self, request, response):
 2915         # Python 2 will choke on next HTTP request in row if there are non-ASCII
 2916         # characters in Set-Cookie HTTP header of last response (see
 2917         # https://github.com/ytdl-org/youtube-dl/issues/6769).
 2918         # In order to at least prevent crashing we will percent encode Set-Cookie
 2919         # header before HTTPCookieProcessor starts processing it.
 2920         # if sys.version_info < (3, 0) and response.headers:
 2921         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
 2922         #         set_cookie = response.headers.get(set_cookie_header)
 2923         #         if set_cookie:
 2924         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
 2925         #             if set_cookie != set_cookie_escaped:
 2926         #                 del response.headers[set_cookie_header]
 2927         #                 response.headers[set_cookie_header] = set_cookie_escaped
 2928         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
 2929 
 2930     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
 2931     https_response = http_response
 2932 
 2933 
 2934 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 2935     """YoutubeDL redirect handler
 2936 
 2937     The code is based on HTTPRedirectHandler implementation from CPython [1].
 2938 
 2939     This redirect handler solves two issues:
 2940      - ensures redirect URL is always unicode under python 2
 2941      - introduces support for experimental HTTP response status code
 2942        308 Permanent Redirect [2] used by some sites [3]
 2943 
 2944     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
 2945     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
 2946     3. https://github.com/ytdl-org/youtube-dl/issues/28768
 2947     """
 2948 
 2949     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
 2950 
 2951     def redirect_request(self, req, fp, code, msg, headers, newurl):
 2952         """Return a Request or None in response to a redirect.
 2953 
 2954         This is called by the http_error_30x methods when a
 2955         redirection response is received.  If a redirection should
 2956         take place, return a new Request to allow http_error_30x to
 2957         perform the redirect.  Otherwise, raise HTTPError if no-one
 2958         else should try to handle this url.  Return None if you can't
 2959         but another Handler might.
 2960         """
 2961         m = req.get_method()
 2962         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
 2963                  or code in (301, 302, 303) and m == "POST")):
 2964             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
 2965         # Strictly (according to RFC 2616), 301 or 302 in response to
 2966         # a POST MUST NOT cause a redirection without confirmation
 2967         # from the user (of urllib.request, in this case).  In practice,
 2968         # essentially all clients do redirect in this case, so we do
 2969         # the same.
 2970 
 2971         # On python 2 urlh.geturl() may sometimes return redirect URL
 2972         # as byte string instead of unicode. This workaround allows
 2973         # to force it always return unicode.
 2974         if sys.version_info[0] < 3:
 2975             newurl = compat_str(newurl)
 2976 
 2977         # Be conciliant with URIs containing a space.  This is mainly
 2978         # redundant with the more complete encoding done in http_error_302(),
 2979         # but it is kept for compatibility with other callers.
 2980         newurl = newurl.replace(' ', '%20')
 2981 
 2982         CONTENT_HEADERS = ("content-length", "content-type")
 2983         # NB: don't use dict comprehension for python 2.6 compatibility
 2984         newheaders = dict((k, v) for k, v in req.headers.items()
 2985                           if k.lower() not in CONTENT_HEADERS)
 2986         return compat_urllib_request.Request(
 2987             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
 2988             unverifiable=True)
 2989 
 2990 
 2991 def extract_timezone(date_str):
 2992     m = re.search(
 2993         r'''(?x)
 2994             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
 2995             (?P<tz>Z|                                            # just the UTC Z, or
 2996                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
 2997                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
 2998                    [ ]?                                          # optional space
 2999                 (?P<sign>\+|-)                                   # +/-
 3000                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
 3001             $)
 3002         ''', date_str)
 3003     if not m:
 3004         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 3005         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
 3006         if timezone is not None:
 3007             date_str = date_str[:-len(m.group('tz'))]
 3008         timezone = datetime.timedelta(hours=timezone or 0)
 3009     else:
 3010         date_str = date_str[:-len(m.group('tz'))]
 3011         if not m.group('sign'):
 3012             timezone = datetime.timedelta()
 3013         else:
 3014             sign = 1 if m.group('sign') == '+' else -1
 3015             timezone = datetime.timedelta(
 3016                 hours=sign * int(m.group('hours')),
 3017                 minutes=sign * int(m.group('minutes')))
 3018     return timezone, date_str
 3019 
 3020 
 3021 def parse_iso8601(date_str, delimiter='T', timezone=None):
 3022     """ Return a UNIX timestamp from the given date """
 3023 
 3024     if date_str is None:
 3025         return None
 3026 
 3027     date_str = re.sub(r'\.[0-9]+', '', date_str)
 3028 
 3029     if timezone is None:
 3030         timezone, date_str = extract_timezone(date_str)
 3031 
 3032     try:
 3033         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 3034         dt = datetime.datetime.strptime(date_str, date_format) - timezone
 3035         return calendar.timegm(dt.timetuple())
 3036     except ValueError:
 3037         pass
 3038 
 3039 
 3040 def date_formats(day_first=True):
 3041     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
 3042 
 3043 
 3044 def unified_strdate(date_str, day_first=True):
 3045     """Return a string with the date in the format YYYYMMDD"""
 3046 
 3047     if date_str is None:
 3048         return None
 3049     upload_date = None
 3050     # Replace commas
 3051     date_str = date_str.replace(',', ' ')
 3052     # Remove AM/PM + timezone
 3053     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 3054     _, date_str = extract_timezone(date_str)
 3055 
 3056     for expression in date_formats(day_first):
 3057         try:
 3058             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 3059         except ValueError:
 3060             pass
 3061     if upload_date is None:
 3062         timetuple = email.utils.parsedate_tz(date_str)
 3063         if timetuple:
 3064             try:
 3065                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 3066             except ValueError:
 3067                 pass
 3068     if upload_date is not None:
 3069         return compat_str(upload_date)
 3070 
 3071 
 3072 def unified_timestamp(date_str, day_first=True):
 3073     if date_str is None:
 3074         return None
 3075 
 3076     date_str = re.sub(r'\s+', ' ', re.sub(
 3077         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
 3078 
 3079     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
 3080     timezone, date_str = extract_timezone(date_str)
 3081 
 3082     # Remove AM/PM + timezone
 3083     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 3084 
 3085     # Remove unrecognized timezones from ISO 8601 alike timestamps
 3086     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 3087     if m:
 3088         date_str = date_str[:-len(m.group('tz'))]
 3089 
 3090     # Python only supports microseconds, so remove nanoseconds
 3091     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
 3092     if m:
 3093         date_str = m.group(1)
 3094 
 3095     for expression in date_formats(day_first):
 3096         try:
 3097             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
 3098             return calendar.timegm(dt.timetuple())
 3099         except ValueError:
 3100             pass
 3101     timetuple = email.utils.parsedate_tz(date_str)
 3102     if timetuple:
 3103         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
 3104 
 3105 
 3106 def determine_ext(url, default_ext='unknown_video'):
 3107     if url is None or '.' not in url:
 3108         return default_ext
 3109     guess = url.partition('?')[0].rpartition('.')[2]
 3110     if re.match(r'^[A-Za-z0-9]+$', guess):
 3111         return guess
 3112     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
 3113     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
 3114         return guess.rstrip('/')
 3115     else:
 3116         return default_ext
 3117 
 3118 
 3119 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
 3120     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
 3121 
 3122 
 3123 def date_from_str(date_str):
 3124     """
 3125     Return a datetime object from a string in the format YYYYMMDD or
 3126     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 3127     today = datetime.date.today()
 3128     if date_str in ('now', 'today'):
 3129         return today
 3130     if date_str == 'yesterday':
 3131         return today - datetime.timedelta(days=1)
 3132     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 3133     if match is not None:
 3134         sign = match.group('sign')
 3135         time = int(match.group('time'))
 3136         if sign == '-':
 3137             time = -time
 3138         unit = match.group('unit')
 3139         # A bad approximation?
 3140         if unit == 'month':
 3141             unit = 'day'
 3142             time *= 30
 3143         elif unit == 'year':
 3144             unit = 'day'
 3145             time *= 365
 3146         unit += 's'
 3147         delta = datetime.timedelta(**{unit: time})
 3148         return today + delta
 3149     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
 3150 
 3151 
 3152 def hyphenate_date(date_str):
 3153     """
 3154     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 3155     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 3156     if match is not None:
 3157         return '-'.join(match.groups())
 3158     else:
 3159         return date_str
 3160 
 3161 
 3162 class DateRange(object):
 3163     """Represents a time interval between two dates"""
 3164 
 3165     def __init__(self, start=None, end=None):
 3166         """start and end must be strings in the format accepted by date"""
 3167         if start is not None:
 3168             self.start = date_from_str(start)
 3169         else:
 3170             self.start = datetime.datetime.min.date()
 3171         if end is not None:
 3172             self.end = date_from_str(end)
 3173         else:
 3174             self.end = datetime.datetime.max.date()
 3175         if self.start > self.end:
 3176             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 3177 
 3178     @classmethod
 3179     def day(cls, day):
 3180         """Returns a range that only contains the given day"""
 3181         return cls(day, day)
 3182 
 3183     def __contains__(self, date):
 3184         """Check if the date is in the range"""
 3185         if not isinstance(date, datetime.date):
 3186             date = date_from_str(date)
 3187         return self.start <= date <= self.end
 3188 
 3189     def __str__(self):
 3190         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
 3191 
 3192 
 3193 def platform_name():
 3194     """ Returns the platform name as a compat_str """
 3195     res = platform.platform()
 3196     if isinstance(res, bytes):
 3197         res = res.decode(preferredencoding())
 3198 
 3199     assert isinstance(res, compat_str)
 3200     return res
 3201 
 3202 
 3203 def _windows_write_string(s, out):
 3204     """ Returns True if the string was written using special methods,
 3205     False if it has yet to be written out."""
 3206     # Adapted from http://stackoverflow.com/a/3259271/35070
 3207 
 3208     import ctypes
 3209     import ctypes.wintypes
 3210 
 3211     WIN_OUTPUT_IDS = {
 3212         1: -11,
 3213         2: -12,
 3214     }
 3215 
 3216     try:
 3217         fileno = out.fileno()
 3218     except AttributeError:
 3219         # If the output stream doesn't have a fileno, it's virtual
 3220         return False
 3221     except io.UnsupportedOperation:
 3222         # Some strange Windows pseudo files?
 3223         return False
 3224     if fileno not in WIN_OUTPUT_IDS:
 3225         return False
 3226 
 3227     GetStdHandle = compat_ctypes_WINFUNCTYPE(
 3228         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 3229         ('GetStdHandle', ctypes.windll.kernel32))
 3230     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 3231 
 3232     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
 3233         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 3234         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 3235         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
 3236     written = ctypes.wintypes.DWORD(0)
 3237 
 3238     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
 3239     FILE_TYPE_CHAR = 0x0002
 3240     FILE_TYPE_REMOTE = 0x8000
 3241     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
 3242         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 3243         ctypes.POINTER(ctypes.wintypes.DWORD))(
 3244         ('GetConsoleMode', ctypes.windll.kernel32))
 3245     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 3246 
 3247     def not_a_console(handle):
 3248         if handle == INVALID_HANDLE_VALUE or handle is None:
 3249             return True
 3250         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 3251                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 3252 
 3253     if not_a_console(h):
 3254         return False
 3255 
 3256     def next_nonbmp_pos(s):
 3257         try:
 3258             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 3259         except StopIteration:
 3260             return len(s)
 3261 
 3262     while s:
 3263         count = min(next_nonbmp_pos(s), 1024)
 3264 
 3265         ret = WriteConsoleW(
 3266             h, s, count if count else 2, ctypes.byref(written), None)
 3267         if ret == 0:
 3268             raise OSError('Failed to write string')
 3269         if not count:  # We just wrote a non-BMP character
 3270             assert written.value == 2
 3271             s = s[1:]
 3272         else:
 3273             assert written.value > 0
 3274             s = s[written.value:]
 3275     return True
 3276 
 3277 
 3278 def write_string(s, out=None, encoding=None):
 3279     if out is None:
 3280         out = sys.stderr
 3281     assert type(s) == compat_str
 3282 
 3283     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 3284         if _windows_write_string(s, out):
 3285             return
 3286 
 3287     if ('b' in getattr(out, 'mode', '')
 3288             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 3289         byt = s.encode(encoding or preferredencoding(), 'ignore')
 3290         out.write(byt)
 3291     elif hasattr(out, 'buffer'):
 3292         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 3293         byt = s.encode(enc, 'ignore')
 3294         out.buffer.write(byt)
 3295     else:
 3296         out.write(s)
 3297     out.flush()
 3298 
 3299 
 3300 def bytes_to_intlist(bs):
 3301     if not bs:
 3302         return []
 3303     if isinstance(bs[0], int):  # Python 3
 3304         return list(bs)
 3305     else:
 3306         return [ord(c) for c in bs]
 3307 
 3308 
 3309 def intlist_to_bytes(xs):
 3310     if not xs:
 3311         return b''
 3312     return compat_struct_pack('%dB' % len(xs), *xs)
 3313 
 3314 
 3315 # Cross-platform file locking
 3316 if sys.platform == 'win32':
 3317     import ctypes.wintypes
 3318     import msvcrt
 3319 
 3320     class OVERLAPPED(ctypes.Structure):
 3321         _fields_ = [
 3322             ('Internal', ctypes.wintypes.LPVOID),
 3323             ('InternalHigh', ctypes.wintypes.LPVOID),
 3324             ('Offset', ctypes.wintypes.DWORD),
 3325             ('OffsetHigh', ctypes.wintypes.DWORD),
 3326             ('hEvent', ctypes.wintypes.HANDLE),
 3327         ]
 3328 
 3329     kernel32 = ctypes.windll.kernel32
 3330     LockFileEx = kernel32.LockFileEx
 3331     LockFileEx.argtypes = [
 3332         ctypes.wintypes.HANDLE,     # hFile
 3333         ctypes.wintypes.DWORD,      # dwFlags
 3334         ctypes.wintypes.DWORD,      # dwReserved
 3335         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 3336         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 3337         ctypes.POINTER(OVERLAPPED)  # Overlapped
 3338     ]
 3339     LockFileEx.restype = ctypes.wintypes.BOOL
 3340     UnlockFileEx = kernel32.UnlockFileEx
 3341     UnlockFileEx.argtypes = [
 3342         ctypes.wintypes.HANDLE,     # hFile
 3343         ctypes.wintypes.DWORD,      # dwReserved
 3344         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 3345         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 3346         ctypes.POINTER(OVERLAPPED)  # Overlapped
 3347     ]
 3348     UnlockFileEx.restype = ctypes.wintypes.BOOL
 3349     whole_low = 0xffffffff
 3350     whole_high = 0x7fffffff
 3351 
 3352     def _lock_file(f, exclusive):
 3353         overlapped = OVERLAPPED()
 3354         overlapped.Offset = 0
 3355         overlapped.OffsetHigh = 0
 3356         overlapped.hEvent = 0
 3357         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 3358         handle = msvcrt.get_osfhandle(f.fileno())
 3359         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 3360                           whole_low, whole_high, f._lock_file_overlapped_p):
 3361             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 3362 
 3363     def _unlock_file(f):
 3364         assert f._lock_file_overlapped_p
 3365         handle = msvcrt.get_osfhandle(f.fileno())
 3366         if not UnlockFileEx(handle, 0,
 3367                             whole_low, whole_high, f._lock_file_overlapped_p):
 3368             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 3369 
 3370 else:
 3371     # Some platforms, such as Jython, is missing fcntl
 3372     try:
 3373         import fcntl
 3374 
 3375         def _lock_file(f, exclusive):
 3376             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 3377 
 3378         def _unlock_file(f):
 3379             fcntl.flock(f, fcntl.LOCK_UN)
 3380     except ImportError:
 3381         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
 3382 
 3383         def _lock_file(f, exclusive):
 3384             raise IOError(UNSUPPORTED_MSG)
 3385 
 3386         def _unlock_file(f):
 3387             raise IOError(UNSUPPORTED_MSG)
 3388 
 3389 
 3390 class locked_file(object):
 3391     def __init__(self, filename, mode, encoding=None):
 3392         assert mode in ['r', 'a', 'w']
 3393         self.f = io.open(filename, mode, encoding=encoding)
 3394         self.mode = mode
 3395 
 3396     def __enter__(self):
 3397         exclusive = self.mode != 'r'
 3398         try:
 3399             _lock_file(self.f, exclusive)
 3400         except IOError:
 3401             self.f.close()
 3402             raise
 3403         return self
 3404 
 3405     def __exit__(self, etype, value, traceback):
 3406         try:
 3407             _unlock_file(self.f)
 3408         finally:
 3409             self.f.close()
 3410 
 3411     def __iter__(self):
 3412         return iter(self.f)
 3413 
 3414     def write(self, *args):
 3415         return self.f.write(*args)
 3416 
 3417     def read(self, *args):
 3418         return self.f.read(*args)
 3419 
 3420 
 3421 def get_filesystem_encoding():
 3422     encoding = sys.getfilesystemencoding()
 3423     return encoding if encoding is not None else 'utf-8'
 3424 
 3425 
 3426 def shell_quote(args):
 3427     quoted_args = []
 3428     encoding = get_filesystem_encoding()
 3429     for a in args:
 3430         if isinstance(a, bytes):
 3431             # We may get a filename encoded with 'encodeFilename'
 3432             a = a.decode(encoding)
 3433         quoted_args.append(compat_shlex_quote(a))
 3434     return ' '.join(quoted_args)
 3435 
 3436 
 3437 def smuggle_url(url, data):
 3438     """ Pass additional data in a URL for internal use. """
 3439 
 3440     url, idata = unsmuggle_url(url, {})
 3441     data.update(idata)
 3442     sdata = compat_urllib_parse_urlencode(
 3443         {'__youtubedl_smuggle': json.dumps(data)})
 3444     return url + '#' + sdata
 3445 
 3446 
 3447 def unsmuggle_url(smug_url, default=None):
 3448     if '#__youtubedl_smuggle' not in smug_url:
 3449         return smug_url, default
 3450     url, _, sdata = smug_url.rpartition('#')
 3451     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
 3452     data = json.loads(jsond)
 3453     return url, data
 3454 
 3455 
 3456 def format_bytes(bytes):
 3457     if bytes is None:
 3458         return 'N/A'
 3459     if type(bytes) is str:
 3460         bytes = float(bytes)
 3461     if bytes == 0.0:
 3462         exponent = 0
 3463     else:
 3464         exponent = int(math.log(bytes, 1024.0))
 3465     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
 3466     converted = float(bytes) / float(1024 ** exponent)
 3467     return '%.2f%s' % (converted, suffix)
 3468 
 3469 
 3470 def lookup_unit_table(unit_table, s):
 3471     units_re = '|'.join(re.escape(u) for u in unit_table)
 3472     m = re.match(
 3473         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
 3474     if not m:
 3475         return None
 3476     num_str = m.group('num').replace(',', '.')
 3477     mult = unit_table[m.group('unit')]
 3478     return int(float(num_str) * mult)
 3479 
 3480 
 3481 def parse_filesize(s):
 3482     if s is None:
 3483         return None
 3484 
 3485     # The lower-case forms are of course incorrect and unofficial,
 3486     # but we support those too
 3487     _UNIT_TABLE = {
 3488         'B': 1,
 3489         'b': 1,
 3490         'bytes': 1,
 3491         'KiB': 1024,
 3492         'KB': 1000,
 3493         'kB': 1024,
 3494         'Kb': 1000,
 3495         'kb': 1000,
 3496         'kilobytes': 1000,
 3497         'kibibytes': 1024,
 3498         'MiB': 1024 ** 2,
 3499         'MB': 1000 ** 2,
 3500         'mB': 1024 ** 2,
 3501         'Mb': 1000 ** 2,
 3502         'mb': 1000 ** 2,
 3503         'megabytes': 1000 ** 2,
 3504         'mebibytes': 1024 ** 2,
 3505         'GiB': 1024 ** 3,
 3506         'GB': 1000 ** 3,
 3507         'gB': 1024 ** 3,
 3508         'Gb': 1000 ** 3,
 3509         'gb': 1000 ** 3,
 3510         'gigabytes': 1000 ** 3,
 3511         'gibibytes': 1024 ** 3,
 3512         'TiB': 1024 ** 4,
 3513         'TB': 1000 ** 4,
 3514         'tB': 1024 ** 4,
 3515         'Tb': 1000 ** 4,
 3516         'tb': 1000 ** 4,
 3517         'terabytes': 1000 ** 4,
 3518         'tebibytes': 1024 ** 4,
 3519         'PiB': 1024 ** 5,
 3520         'PB': 1000 ** 5,
 3521         'pB': 1024 ** 5,
 3522         'Pb': 1000 ** 5,
 3523         'pb': 1000 ** 5,
 3524         'petabytes': 1000 ** 5,
 3525         'pebibytes': 1024 ** 5,
 3526         'EiB': 1024 ** 6,
 3527         'EB': 1000 ** 6,
 3528         'eB': 1024 ** 6,
 3529         'Eb': 1000 ** 6,
 3530         'eb': 1000 ** 6,
 3531         'exabytes': 1000 ** 6,
 3532         'exbibytes': 1024 ** 6,
 3533         'ZiB': 1024 ** 7,
 3534         'ZB': 1000 ** 7,
 3535         'zB': 1024 ** 7,
 3536         'Zb': 1000 ** 7,
 3537         'zb': 1000 ** 7,
 3538         'zettabytes': 1000 ** 7,
 3539         'zebibytes': 1024 ** 7,
 3540         'YiB': 1024 ** 8,
 3541         'YB': 1000 ** 8,
 3542         'yB': 1024 ** 8,
 3543         'Yb': 1000 ** 8,
 3544         'yb': 1000 ** 8,
 3545         'yottabytes': 1000 ** 8,
 3546         'yobibytes': 1024 ** 8,
 3547     }
 3548 
 3549     return lookup_unit_table(_UNIT_TABLE, s)
 3550 
 3551 
 3552 def parse_count(s):
 3553     if s is None:
 3554         return None
 3555 
 3556     s = s.strip()
 3557 
 3558     if re.match(r'^[\d,.]+$', s):
 3559         return str_to_int(s)
 3560 
 3561     _UNIT_TABLE = {
 3562         'k': 1000,
 3563         'K': 1000,
 3564         'm': 1000 ** 2,
 3565         'M': 1000 ** 2,
 3566         'kk': 1000 ** 2,
 3567         'KK': 1000 ** 2,
 3568     }
 3569 
 3570     return lookup_unit_table(_UNIT_TABLE, s)
 3571 
 3572 
 3573 def parse_resolution(s):
 3574     if s is None:
 3575         return {}
 3576 
 3577     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
 3578     if mobj:
 3579         return {
 3580             'width': int(mobj.group('w')),
 3581             'height': int(mobj.group('h')),
 3582         }
 3583 
 3584     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
 3585     if mobj:
 3586         return {'height': int(mobj.group(1))}
 3587 
 3588     mobj = re.search(r'\b([48])[kK]\b', s)
 3589     if mobj:
 3590         return {'height': int(mobj.group(1)) * 540}
 3591 
 3592     return {}
 3593 
 3594 
 3595 def parse_bitrate(s):
 3596     if not isinstance(s, compat_str):
 3597         return
 3598     mobj = re.search(r'\b(\d+)\s*kbps', s)
 3599     if mobj:
 3600         return int(mobj.group(1))
 3601 
 3602 
 3603 def month_by_name(name, lang='en'):
 3604     """ Return the number of a month by (locale-independently) English name """
 3605 
 3606     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
 3607 
 3608     try:
 3609         return month_names.index(name) + 1
 3610     except ValueError:
 3611         return None
 3612 
 3613 
 3614 def month_by_abbreviation(abbrev):
 3615     """ Return the number of a month by (locale-independently) English
 3616         abbreviations """
 3617 
 3618     try:
 3619         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
 3620     except ValueError:
 3621         return None
 3622 
 3623 
 3624 def fix_xml_ampersands(xml_str):
 3625     """Replace all the '&' by '&amp;' in XML"""
 3626     return re.sub(
 3627         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
 3628         '&amp;',
 3629         xml_str)
 3630 
 3631 
 3632 def setproctitle(title):
 3633     assert isinstance(title, compat_str)
 3634 
 3635     # ctypes in Jython is not complete
 3636     # http://bugs.jython.org/issue2148
 3637     if sys.platform.startswith('java'):
 3638         return
 3639 
 3640     try:
 3641         libc = ctypes.cdll.LoadLibrary('libc.so.6')
 3642     except OSError:
 3643         return
 3644     except TypeError:
 3645         # LoadLibrary in Windows Python 2.7.13 only expects
 3646         # a bytestring, but since unicode_literals turns
 3647         # every string into a unicode string, it fails.
 3648         return
 3649     title_bytes = title.encode('utf-8')
 3650     buf = ctypes.create_string_buffer(len(title_bytes))
 3651     buf.value = title_bytes
 3652     try:
 3653         libc.prctl(15, buf, 0, 0, 0)
 3654     except AttributeError:
 3655         return  # Strange libc, just skip this
 3656 
 3657 
 3658 def remove_start(s, start):
 3659     return s[len(start):] if s is not None and s.startswith(start) else s
 3660 
 3661 
 3662 def remove_end(s, end):
 3663     return s[:-len(end)] if s is not None and s.endswith(end) else s
 3664 
 3665 
 3666 def remove_quotes(s):
 3667     if s is None or len(s) < 2:
 3668         return s
 3669     for quote in ('"', "'", ):
 3670         if s[0] == quote and s[-1] == quote:
 3671             return s[1:-1]
 3672     return s
 3673 
 3674 
 3675 def url_basename(url):
 3676     path = compat_urlparse.urlparse(url).path
 3677     return path.strip('/').split('/')[-1]
 3678 
 3679 
 3680 def base_url(url):
 3681     return re.match(r'https?://[^?#&]+/', url).group()
 3682 
 3683 
 3684 def urljoin(base, path):
 3685     if isinstance(path, bytes):
 3686         path = path.decode('utf-8')
 3687     if not isinstance(path, compat_str) or not path:
 3688         return None
 3689     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
 3690         return path
 3691     if isinstance(base, bytes):
 3692         base = base.decode('utf-8')
 3693     if not isinstance(base, compat_str) or not re.match(
 3694             r'^(?:https?:)?//', base):
 3695         return None
 3696     return compat_urlparse.urljoin(base, path)
 3697 
 3698 
 3699 class HEADRequest(compat_urllib_request.Request):
 3700     def get_method(self):
 3701         return 'HEAD'
 3702 
 3703 
 3704 class PUTRequest(compat_urllib_request.Request):
 3705     def get_method(self):
 3706         return 'PUT'
 3707 
 3708 
 3709 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
 3710     if get_attr:
 3711         if v is not None:
 3712             v = getattr(v, get_attr, None)
 3713     if v in (None, ''):
 3714         return default
 3715     try:
 3716         return int(v) * invscale // scale
 3717     except (ValueError, TypeError, OverflowError):
 3718         return default
 3719 
 3720 
 3721 def str_or_none(v, default=None):
 3722     return default if v is None else compat_str(v)
 3723 
 3724 
 3725 def str_to_int(int_str):
 3726     """ A more relaxed version of int_or_none """
 3727     if isinstance(int_str, compat_integer_types):
 3728         return int_str
 3729     elif isinstance(int_str, compat_str):
 3730         int_str = re.sub(r'[,\.\+]', '', int_str)
 3731         return int_or_none(int_str)
 3732 
 3733 
 3734 def float_or_none(v, scale=1, invscale=1, default=None):
 3735     if v is None:
 3736         return default
 3737     try:
 3738         return float(v) * invscale / scale
 3739     except (ValueError, TypeError):
 3740         return default
 3741 
 3742 
 3743 def bool_or_none(v, default=None):
 3744     return v if isinstance(v, bool) else default
 3745 
 3746 
 3747 def strip_or_none(v, default=None):
 3748     return v.strip() if isinstance(v, compat_str) else default
 3749 
 3750 
 3751 def url_or_none(url):
 3752     if not url or not isinstance(url, compat_str):
 3753         return None
 3754     url = url.strip()
 3755     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
 3756 
 3757 
 3758 def parse_duration(s):
 3759     if not isinstance(s, compat_basestring):
 3760         return None
 3761 
 3762     s = s.strip()
 3763 
 3764     days, hours, mins, secs, ms = [None] * 5
 3765     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
 3766     if m:
 3767         days, hours, mins, secs, ms = m.groups()
 3768     else:
 3769         m = re.match(
 3770             r'''(?ix)(?:P?
 3771                 (?:
 3772                     [0-9]+\s*y(?:ears?)?\s*
 3773                 )?
 3774                 (?:
 3775                     [0-9]+\s*m(?:onths?)?\s*
 3776                 )?
 3777                 (?:
 3778                     [0-9]+\s*w(?:eeks?)?\s*
 3779                 )?
 3780                 (?:
 3781                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
 3782                 )?
 3783                 T)?
 3784                 (?:
 3785                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
 3786                 )?
 3787                 (?:
 3788                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
 3789                 )?
 3790                 (?:
 3791                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
 3792                 )?Z?$''', s)
 3793         if m:
 3794             days, hours, mins, secs, ms = m.groups()
 3795         else:
 3796             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
 3797             if m:
 3798                 hours, mins = m.groups()
 3799             else:
 3800                 return None
 3801 
 3802     duration = 0
 3803     if secs:
 3804         duration += float(secs)
 3805     if mins:
 3806         duration += float(mins) * 60
 3807     if hours:
 3808         duration += float(hours) * 60 * 60
 3809     if days:
 3810         duration += float(days) * 24 * 60 * 60
 3811     if ms:
 3812         duration += float(ms)
 3813     return duration
 3814 
 3815 
 3816 def prepend_extension(filename, ext, expected_real_ext=None):
 3817     name, real_ext = os.path.splitext(filename)
 3818     return (
 3819         '{0}.{1}{2}'.format(name, ext, real_ext)
 3820         if not expected_real_ext or real_ext[1:] == expected_real_ext
 3821         else '{0}.{1}'.format(filename, ext))
 3822 
 3823 
 3824 def replace_extension(filename, ext, expected_real_ext=None):
 3825     name, real_ext = os.path.splitext(filename)
 3826     return '{0}.{1}'.format(
 3827         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
 3828         ext)
 3829 
 3830 
 3831 def check_executable(exe, args=[]):
 3832     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
 3833     args can be a list of arguments for a short output (like -version) """
 3834     try:
 3835         process_communicate_or_kill(subprocess.Popen(
 3836             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
 3837     except OSError:
 3838         return False
 3839     return exe
 3840 
 3841 
 3842 def get_exe_version(exe, args=['--version'],
 3843                     version_re=None, unrecognized='present'):
 3844     """ Returns the version of the specified executable,
 3845     or False if the executable is not present """
 3846     try:
 3847         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
 3848         # SIGTTOU if youtube-dl is run in the background.
 3849         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
 3850         out, _ = process_communicate_or_kill(subprocess.Popen(
 3851             [encodeArgument(exe)] + args,
 3852             stdin=subprocess.PIPE,
 3853             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
 3854     except OSError:
 3855         return False
 3856     if isinstance(out, bytes):  # Python 2.x
 3857         out = out.decode('ascii', 'ignore')
 3858     return detect_exe_version(out, version_re, unrecognized)
 3859 
 3860 
 3861 def detect_exe_version(output, version_re=None, unrecognized='present'):
 3862     assert isinstance(output, compat_str)
 3863     if version_re is None:
 3864         version_re = r'version\s+([-0-9._a-zA-Z]+)'
 3865     m = re.search(version_re, output)
 3866     if m:
 3867         return m.group(1)
 3868     else:
 3869         return unrecognized
 3870 
 3871 
 3872 class LazyList(compat_collections_abc.Sequence):
 3873     """Lazy immutable list from an iterable
 3874     Note that slices of a LazyList are lists and not LazyList"""
 3875 
 3876     class IndexError(IndexError):
 3877         def __init__(self, cause=None):
 3878             if cause:
 3879                 # reproduce `raise from`
 3880                 self.__cause__ = cause
 3881             super(IndexError, self).__init__()
 3882 
 3883     def __init__(self, iterable, **kwargs):
 3884         # kwarg-only
 3885         reverse = kwargs.get('reverse', False)
 3886         _cache = kwargs.get('_cache')
 3887 
 3888         self._iterable = iter(iterable)
 3889         self._cache = [] if _cache is None else _cache
 3890         self._reversed = reverse
 3891 
 3892     def __iter__(self):
 3893         if self._reversed:
 3894             # We need to consume the entire iterable to iterate in reverse
 3895             for item in self.exhaust():
 3896                 yield item
 3897             return
 3898         for item in self._cache:
 3899             yield item
 3900         for item in self._iterable:
 3901             self._cache.append(item)
 3902             yield item
 3903 
 3904     def _exhaust(self):
 3905         self._cache.extend(self._iterable)
 3906         self._iterable = []  # Discard the emptied iterable to make it pickle-able
 3907         return self._cache
 3908 
 3909     def exhaust(self):
 3910         """Evaluate the entire iterable"""
 3911         return self._exhaust()[::-1 if self._reversed else 1]
 3912 
 3913     @staticmethod
 3914     def _reverse_index(x):
 3915         return None if x is None else ~x
 3916 
 3917     def __getitem__(self, idx):
 3918         if isinstance(idx, slice):
 3919             if self._reversed:
 3920                 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
 3921             start, stop, step = idx.start, idx.stop, idx.step or 1
 3922         elif isinstance(idx, int):
 3923             if self._reversed:
 3924                 idx = self._reverse_index(idx)
 3925             start, stop, step = idx, idx, 0
 3926         else:
 3927             raise TypeError('indices must be integers or slices')
 3928         if ((start or 0) < 0 or (stop or 0) < 0
 3929                 or (start is None and step < 0)
 3930                 or (stop is None and step > 0)):
 3931             # We need to consume the entire iterable to be able to slice from the end
 3932             # Obviously, never use this with infinite iterables
 3933             self._exhaust()
 3934             try:
 3935                 return self._cache[idx]
 3936             except IndexError as e:
 3937                 raise self.IndexError(e)
 3938         n = max(start or 0, stop or 0) - len(self._cache) + 1
 3939         if n > 0:
 3940             self._cache.extend(itertools.islice(self._iterable, n))
 3941         try:
 3942             return self._cache[idx]
 3943         except IndexError as e:
 3944             raise self.IndexError(e)
 3945 
 3946     def __bool__(self):
 3947         try:
 3948             self[-1] if self._reversed else self[0]
 3949         except self.IndexError:
 3950             return False
 3951         return True
 3952 
 3953     def __len__(self):
 3954         self._exhaust()
 3955         return len(self._cache)
 3956 
 3957     def __reversed__(self):
 3958         return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
 3959 
 3960     def __copy__(self):
 3961         return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
 3962 
 3963     def __repr__(self):
 3964         # repr and str should mimic a list. So we exhaust the iterable
 3965         return repr(self.exhaust())
 3966 
 3967     def __str__(self):
 3968         return repr(self.exhaust())
 3969 
 3970 
 3971 class PagedList(object):
 3972     def __len__(self):
 3973         # This is only useful for tests
 3974         return len(self.getslice())
 3975 
 3976 
 3977 class OnDemandPagedList(PagedList):
 3978     def __init__(self, pagefunc, pagesize, use_cache=True):
 3979         self._pagefunc = pagefunc
 3980         self._pagesize = pagesize
 3981         self._use_cache = use_cache
 3982         if use_cache:
 3983             self._cache = {}
 3984 
 3985     def getslice(self, start=0, end=None):
 3986         res = []
 3987         for pagenum in itertools.count(start // self._pagesize):
 3988             firstid = pagenum * self._pagesize
 3989             nextfirstid = pagenum * self._pagesize + self._pagesize
 3990             if start >= nextfirstid:
 3991                 continue
 3992 
 3993             page_results = None
 3994             if self._use_cache:
 3995                 page_results = self._cache.get(pagenum)
 3996             if page_results is None:
 3997                 page_results = list(self._pagefunc(pagenum))
 3998             if self._use_cache:
 3999                 self._cache[pagenum] = page_results
 4000 
 4001             startv = (
 4002                 start % self._pagesize
 4003                 if firstid <= start < nextfirstid
 4004                 else 0)
 4005 
 4006             endv = (
 4007                 ((end - 1) % self._pagesize) + 1
 4008                 if (end is not None and firstid <= end <= nextfirstid)
 4009                 else None)
 4010 
 4011             if startv != 0 or endv is not None:
 4012                 page_results = page_results[startv:endv]
 4013             res.extend(page_results)
 4014 
 4015             # A little optimization - if current page is not "full", ie. does
 4016             # not contain page_size videos then we can assume that this page
 4017             # is the last one - there are no more ids on further pages -
 4018             # i.e. no need to query again.
 4019             if len(page_results) + startv < self._pagesize:
 4020                 break
 4021 
 4022             # If we got the whole page, but the next page is not interesting,
 4023             # break out early as well
 4024             if end == nextfirstid:
 4025                 break
 4026         return res
 4027 
 4028 
 4029 class InAdvancePagedList(PagedList):
 4030     def __init__(self, pagefunc, pagecount, pagesize):
 4031         self._pagefunc = pagefunc
 4032         self._pagecount = pagecount
 4033         self._pagesize = pagesize
 4034 
 4035     def getslice(self, start=0, end=None):
 4036         res = []
 4037         start_page = start // self._pagesize
 4038         end_page = (
 4039             self._pagecount if end is None else (end // self._pagesize + 1))
 4040         skip_elems = start - start_page * self._pagesize
 4041         only_more = None if end is None else end - start
 4042         for pagenum in range(start_page, end_page):
 4043             page = list(self._pagefunc(pagenum))
 4044             if skip_elems:
 4045                 page = page[skip_elems:]
 4046                 skip_elems = None
 4047             if only_more is not None:
 4048                 if len(page) < only_more:
 4049                     only_more -= len(page)
 4050                 else:
 4051                     page = page[:only_more]
 4052                     res.extend(page)
 4053                     break
 4054             res.extend(page)
 4055         return res
 4056 
 4057 
 4058 def uppercase_escape(s):
 4059     unicode_escape = codecs.getdecoder('unicode_escape')
 4060     return re.sub(
 4061         r'\\U[0-9a-fA-F]{8}',
 4062         lambda m: unicode_escape(m.group(0))[0],
 4063         s)
 4064 
 4065 
 4066 def lowercase_escape(s):
 4067     unicode_escape = codecs.getdecoder('unicode_escape')
 4068     return re.sub(
 4069         r'\\u[0-9a-fA-F]{4}',
 4070         lambda m: unicode_escape(m.group(0))[0],
 4071         s)
 4072 
 4073 
 4074 def escape_rfc3986(s):
 4075     """Escape non-ASCII characters as suggested by RFC 3986"""
 4076     if sys.version_info < (3, 0) and isinstance(s, compat_str):
 4077         s = s.encode('utf-8')
 4078     # ensure unicode: after quoting, it can always be converted
 4079     return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 4080 
 4081 
 4082 def escape_url(url):
 4083     """Escape URL as suggested by RFC 3986"""
 4084     url_parsed = compat_urllib_parse_urlparse(url)
 4085     return url_parsed._replace(
 4086         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
 4087         path=escape_rfc3986(url_parsed.path),
 4088         params=escape_rfc3986(url_parsed.params),
 4089         query=escape_rfc3986(url_parsed.query),
 4090         fragment=escape_rfc3986(url_parsed.fragment)
 4091     ).geturl()
 4092 
 4093 
 4094 def read_batch_urls(batch_fd):
 4095     def fixup(url):
 4096         if not isinstance(url, compat_str):
 4097             url = url.decode('utf-8', 'replace')
 4098         BOM_UTF8 = '\xef\xbb\xbf'
 4099         if url.startswith(BOM_UTF8):
 4100             url = url[len(BOM_UTF8):]
 4101         url = url.strip()
 4102         if url.startswith(('#', ';', ']')):
 4103             return False
 4104         return url
 4105 
 4106     with contextlib.closing(batch_fd) as fd:
 4107         return [url for url in map(fixup, fd) if url]
 4108 
 4109 
 4110 def urlencode_postdata(*args, **kargs):
 4111     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 4112 
 4113 
 4114 def update_url_query(url, query):
 4115     if not query:
 4116         return url
 4117     parsed_url = compat_urlparse.urlparse(url)
 4118     qs = compat_parse_qs(parsed_url.query)
 4119     qs.update(query)
 4120     return compat_urlparse.urlunparse(parsed_url._replace(
 4121         query=compat_urllib_parse_urlencode(qs, True)))
 4122 
 4123 
 4124 def update_Request(req, url=None, data=None, headers={}, query={}):
 4125     req_headers = req.headers.copy()
 4126     req_headers.update(headers)
 4127     req_data = data or req.data
 4128     req_url = update_url_query(url or req.get_full_url(), query)
 4129     req_get_method = req.get_method()
 4130     if req_get_method == 'HEAD':
 4131         req_type = HEADRequest
 4132     elif req_get_method == 'PUT':
 4133         req_type = PUTRequest
 4134     else:
 4135         req_type = compat_urllib_request.Request
 4136     new_req = req_type(
 4137         req_url, data=req_data, headers=req_headers,
 4138         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
 4139     if hasattr(req, 'timeout'):
 4140         new_req.timeout = req.timeout
 4141     return new_req
 4142 
 4143 
 4144 def _multipart_encode_impl(data, boundary):
 4145     content_type = 'multipart/form-data; boundary=%s' % boundary
 4146 
 4147     out = b''
 4148     for k, v in data.items():
 4149         out += b'--' + boundary.encode('ascii') + b'\r\n'
 4150         if isinstance(k, compat_str):
 4151             k = k.encode('utf-8')
 4152         if isinstance(v, compat_str):
 4153             v = v.encode('utf-8')
 4154         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
 4155         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
 4156         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
 4157         if boundary.encode('ascii') in content:
 4158             raise ValueError('Boundary overlaps with data')
 4159         out += content
 4160 
 4161     out += b'--' + boundary.encode('ascii') + b'--\r\n'
 4162 
 4163     return out, content_type
 4164 
 4165 
 4166 def multipart_encode(data, boundary=None):
 4167     '''
 4168     Encode a dict to RFC 7578-compliant form-data
 4169 
 4170     data:
 4171         A dict where keys and values can be either Unicode or bytes-like
 4172         objects.
 4173     boundary:
 4174         If specified a Unicode object, it's used as the boundary. Otherwise
 4175         a random boundary is generated.
 4176 
 4177     Reference: https://tools.ietf.org/html/rfc7578
 4178     '''
 4179     has_specified_boundary = boundary is not None
 4180 
 4181     while True:
 4182         if boundary is None:
 4183             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
 4184 
 4185         try:
 4186             out, content_type = _multipart_encode_impl(data, boundary)
 4187             break
 4188         except ValueError:
 4189             if has_specified_boundary:
 4190                 raise
 4191             boundary = None
 4192 
 4193     return out, content_type
 4194 
 4195 
 4196 def variadic(x, allowed_types=(compat_str, bytes, dict)):
 4197     return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
 4198 
 4199 
 4200 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
 4201     if isinstance(key_or_keys, (list, tuple)):
 4202         for key in key_or_keys:
 4203             if key not in d or d[key] is None or skip_false_values and not d[key]:
 4204                 continue
 4205             return d[key]
 4206         return default
 4207     return d.get(key_or_keys, default)
 4208 
 4209 
 4210 def try_call(*funcs, **kwargs):
 4211 
 4212     # parameter defaults
 4213     expected_type = kwargs.get('expected_type')
 4214     fargs = kwargs.get('args', [])
 4215     fkwargs = kwargs.get('kwargs', {})
 4216 
 4217     for f in funcs:
 4218         try:
 4219             val = f(*fargs, **fkwargs)
 4220         except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
 4221             pass
 4222         else:
 4223             if expected_type is None or isinstance(val, expected_type):
 4224                 return val
 4225 
 4226 
 4227 def try_get(src, getter, expected_type=None):
 4228     if not isinstance(getter, (list, tuple)):
 4229         getter = [getter]
 4230     for get in getter:
 4231         try:
 4232             v = get(src)
 4233         except (AttributeError, KeyError, TypeError, IndexError):
 4234             pass
 4235         else:
 4236             if expected_type is None or isinstance(v, expected_type):
 4237                 return v
 4238 
 4239 
 4240 def merge_dicts(*dicts):
 4241     merged = {}
 4242     for a_dict in dicts:
 4243         for k, v in a_dict.items():
 4244             if v is None:
 4245                 continue
 4246             if (k not in merged
 4247                     or (isinstance(v, compat_str) and v
 4248                         and isinstance(merged[k], compat_str)
 4249                         and not merged[k])):
 4250                 merged[k] = v
 4251     return merged
 4252 
 4253 
 4254 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
 4255     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
 4256 
 4257 
 4258 US_RATINGS = {
 4259     'G': 0,
 4260     'PG': 10,
 4261     'PG-13': 13,
 4262     'R': 16,
 4263     'NC': 18,
 4264 }
 4265 
 4266 
 4267 TV_PARENTAL_GUIDELINES = {
 4268     'TV-Y': 0,
 4269     'TV-Y7': 7,
 4270     'TV-G': 0,
 4271     'TV-PG': 0,
 4272     'TV-14': 14,
 4273     'TV-MA': 17,
 4274 }
 4275 
 4276 
 4277 def parse_age_limit(s):
 4278     if type(s) == int:
 4279         return s if 0 <= s <= 21 else None
 4280     if not isinstance(s, compat_basestring):
 4281         return None
 4282     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
 4283     if m:
 4284         return int(m.group('age'))
 4285     if s in US_RATINGS:
 4286         return US_RATINGS[s]
 4287     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
 4288     if m:
 4289         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
 4290     return None
 4291 
 4292 
 4293 def strip_jsonp(code):
 4294     return re.sub(
 4295         r'''(?sx)^
 4296             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
 4297             (?:\s*&&\s*(?P=func_name))?
 4298             \s*\(\s*(?P<callback_data>.*)\);?
 4299             \s*?(?://[^\n]*)*$''',
 4300         r'\g<callback_data>', code)
 4301 
 4302 
 4303 def js_to_json(code):
 4304     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
 4305     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
 4306     INTEGER_TABLE = (
 4307         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
 4308         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
 4309     )
 4310 
 4311     def fix_kv(m):
 4312         v = m.group(0)
 4313         if v in ('true', 'false', 'null'):
 4314             return v
 4315         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
 4316             return ""
 4317 
 4318         if v[0] in ("'", '"'):
 4319             v = re.sub(r'(?s)\\.|"', lambda m: {
 4320                 '"': '\\"',
 4321                 "\\'": "'",
 4322                 '\\\n': '',
 4323                 '\\x': '\\u00',
 4324             }.get(m.group(0), m.group(0)), v[1:-1])
 4325         else:
 4326             for regex, base in INTEGER_TABLE:
 4327                 im = re.match(regex, v)
 4328                 if im:
 4329                     i = int(im.group(1), base)
 4330                     return '"%d":' % i if v.endswith(':') else '%d' % i
 4331 
 4332         return '"%s"' % v
 4333 
 4334     return re.sub(r'''(?sx)
 4335         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
 4336         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
 4337         {comment}|,(?={skip}[\]}}])|
 4338         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
 4339         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
 4340         [0-9]+(?={skip}:)|
 4341         !+
 4342         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
 4343 
 4344 
 4345 def qualities(quality_ids):
 4346     """ Get a numeric quality value out of a list of possible values """
 4347     def q(qid):
 4348         try:
 4349             return quality_ids.index(qid)
 4350         except ValueError:
 4351             return -1
 4352     return q
 4353 
 4354 
 4355 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
 4356 
 4357 
 4358 def limit_length(s, length):
 4359     """ Add ellipses to overly long strings """
 4360     if s is None:
 4361         return None
 4362     ELLIPSES = '...'
 4363     if len(s) > length:
 4364         return s[:length - len(ELLIPSES)] + ELLIPSES
 4365     return s
 4366 
 4367 
 4368 def version_tuple(v):
 4369     return tuple(int(e) for e in re.split(r'[-.]', v))
 4370 
 4371 
 4372 def is_outdated_version(version, limit, assume_new=True):
 4373     if not version:
 4374         return not assume_new
 4375     try:
 4376         return version_tuple(version) < version_tuple(limit)
 4377     except ValueError:
 4378         return not assume_new
 4379 
 4380 
 4381 def ytdl_is_updateable():
 4382     """ Returns if youtube-dl can be updated with -U """
 4383     from zipimport import zipimporter
 4384 
 4385     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
 4386 
 4387 
 4388 def args_to_str(args):
 4389     # Get a short string representation for a subprocess command
 4390     return ' '.join(compat_shlex_quote(a) for a in args)
 4391 
 4392 
 4393 def error_to_compat_str(err):
 4394     err_str = str(err)
 4395     # On python 2 error byte string must be decoded with proper
 4396     # encoding rather than ascii
 4397     if sys.version_info[0] < 3:
 4398         err_str = err_str.decode(preferredencoding())
 4399     return err_str
 4400 
 4401 
 4402 def mimetype2ext(mt):
 4403     if mt is None:
 4404         return None
 4405 
 4406     ext = {
 4407         'audio/mp4': 'm4a',
 4408         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
 4409         # it's the most popular one
 4410         'audio/mpeg': 'mp3',
 4411     }.get(mt)
 4412     if ext is not None:
 4413         return ext
 4414 
 4415     _, _, res = mt.rpartition('/')
 4416     res = res.split(';')[0].strip().lower()
 4417 
 4418     return {
 4419         '3gpp': '3gp',
 4420         'smptett+xml': 'tt',
 4421         'ttaf+xml': 'dfxp',
 4422         'ttml+xml': 'ttml',
 4423         'x-flv': 'flv',
 4424         'x-mp4-fragmented': 'mp4',
 4425         'x-ms-sami': 'sami',
 4426         'x-ms-wmv': 'wmv',
 4427         'mpegurl': 'm3u8',
 4428         'x-mpegurl': 'm3u8',
 4429         'vnd.apple.mpegurl': 'm3u8',
 4430         'dash+xml': 'mpd',
 4431         'f4m+xml': 'f4m',
 4432         'hds+xml': 'f4m',
 4433         'vnd.ms-sstr+xml': 'ism',
 4434         'quicktime': 'mov',
 4435         'mp2t': 'ts',
 4436         'x-wav': 'wav',
 4437     }.get(res, res)
 4438 
 4439 
 4440 def parse_codecs(codecs_str):
 4441     # http://tools.ietf.org/html/rfc6381
 4442     if not codecs_str:
 4443         return {}
 4444     split_codecs = list(filter(None, map(
 4445         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
 4446     vcodec, acodec = None, None
 4447     for full_codec in split_codecs:
 4448         codec = full_codec.split('.')[0]
 4449         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
 4450             if not vcodec:
 4451                 vcodec = full_codec
 4452         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
 4453             if not acodec:
 4454                 acodec = full_codec
 4455         else:
 4456             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
 4457     if not vcodec and not acodec:
 4458         if len(split_codecs) == 2:
 4459             return {
 4460                 'vcodec': split_codecs[0],
 4461                 'acodec': split_codecs[1],
 4462             }
 4463     else:
 4464         return {
 4465             'vcodec': vcodec or 'none',
 4466             'acodec': acodec or 'none',
 4467         }
 4468     return {}
 4469 
 4470 
 4471 def urlhandle_detect_ext(url_handle):
 4472     getheader = url_handle.headers.get
 4473 
 4474     cd = getheader('Content-Disposition')
 4475     if cd:
 4476         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
 4477         if m:
 4478             e = determine_ext(m.group('filename'), default_ext=None)
 4479             if e:
 4480                 return e
 4481 
 4482     return mimetype2ext(getheader('Content-Type'))
 4483 
 4484 
 4485 def encode_data_uri(data, mime_type):
 4486     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
 4487 
 4488 
 4489 def age_restricted(content_limit, age_limit):
 4490     """ Returns True iff the content should be blocked """
 4491 
 4492     if age_limit is None:  # No limit set
 4493         return False
 4494     if content_limit is None:
 4495         return False  # Content available for everyone
 4496     return age_limit < content_limit
 4497 
 4498 
 4499 def is_html(first_bytes):
 4500     """ Detect whether a file contains HTML by examining its first bytes. """
 4501 
 4502     BOMS = [
 4503         (b'\xef\xbb\xbf', 'utf-8'),
 4504         (b'\x00\x00\xfe\xff', 'utf-32-be'),
 4505         (b'\xff\xfe\x00\x00', 'utf-32-le'),
 4506         (b'\xff\xfe', 'utf-16-le'),
 4507         (b'\xfe\xff', 'utf-16-be'),
 4508     ]
 4509     for bom, enc in BOMS:
 4510         if first_bytes.startswith(bom):
 4511             s = first_bytes[len(bom):].decode(enc, 'replace')
 4512             break
 4513     else:
 4514         s = first_bytes.decode('utf-8', 'replace')
 4515 
 4516     return re.match(r'^\s*<', s)
 4517 
 4518 
 4519 def determine_protocol(info_dict):
 4520     protocol = info_dict.get('protocol')
 4521     if protocol is not None:
 4522         return protocol
 4523 
 4524     url = info_dict['url']
 4525     if url.startswith('rtmp'):
 4526         return 'rtmp'
 4527     elif url.startswith('mms'):
 4528         return 'mms'
 4529     elif url.startswith('rtsp'):
 4530         return 'rtsp'
 4531 
 4532     ext = determine_ext(url)
 4533     if ext == 'm3u8':
 4534         return 'm3u8'
 4535     elif ext == 'f4m':
 4536         return 'f4m'
 4537 
 4538     return compat_urllib_parse_urlparse(url).scheme
 4539 
 4540 
 4541 def render_table(header_row, data):
 4542     """ Render a list of rows, each as a list of values """
 4543     table = [header_row] + data
 4544     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
 4545     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
 4546     return '\n'.join(format_str % tuple(row) for row in table)
 4547 
 4548 
 4549 def _match_one(filter_part, dct):
 4550     COMPARISON_OPERATORS = {
 4551         '<': operator.lt,
 4552         '<=': operator.le,
 4553         '>': operator.gt,
 4554         '>=': operator.ge,
 4555         '=': operator.eq,
 4556         '!=': operator.ne,
 4557     }
 4558     operator_rex = re.compile(r'''(?x)\s*
 4559         (?P<key>[a-z_]+)
 4560         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 4561         (?:
 4562             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
 4563             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
 4564             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
 4565         )
 4566         \s*$
 4567         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
 4568     m = operator_rex.search(filter_part)
 4569     if m:
 4570         op = COMPARISON_OPERATORS[m.group('op')]
 4571         actual_value = dct.get(m.group('key'))
 4572         if (m.group('quotedstrval') is not None
 4573             or m.group('strval') is not None
 4574             # If the original field is a string and matching comparisonvalue is
 4575             # a number we should respect the origin of the original field
 4576             # and process comparison value as a string (see
 4577             # https://github.com/ytdl-org/youtube-dl/issues/11082).
 4578             or actual_value is not None and m.group('intval') is not None
 4579                 and isinstance(actual_value, compat_str)):
 4580             if m.group('op') not in ('=', '!='):
 4581                 raise ValueError(
 4582                     'Operator %s does not support string values!' % m.group('op'))
 4583             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
 4584             quote = m.group('quote')
 4585             if quote is not None:
 4586                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
 4587         else:
 4588             try:
 4589                 comparison_value = int(m.group('intval'))
 4590             except ValueError:
 4591                 comparison_value = parse_filesize(m.group('intval'))
 4592                 if comparison_value is None:
 4593                     comparison_value = parse_filesize(m.group('intval') + 'B')
 4594                 if comparison_value is None:
 4595                     raise ValueError(
 4596                         'Invalid integer value %r in filter part %r' % (
 4597                             m.group('intval'), filter_part))
 4598         if actual_value is None:
 4599             return m.group('none_inclusive')
 4600         return op(actual_value, comparison_value)
 4601 
 4602     UNARY_OPERATORS = {
 4603         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
 4604         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
 4605     }
 4606     operator_rex = re.compile(r'''(?x)\s*
 4607         (?P<op>%s)\s*(?P<key>[a-z_]+)
 4608         \s*$
 4609         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
 4610     m = operator_rex.search(filter_part)
 4611     if m:
 4612         op = UNARY_OPERATORS[m.group('op')]
 4613         actual_value = dct.get(m.group('key'))
 4614         return op(actual_value)
 4615 
 4616     raise ValueError('Invalid filter part %r' % filter_part)
 4617 
 4618 
 4619 def match_str(filter_str, dct):
 4620     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
 4621 
 4622     return all(
 4623         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
 4624 
 4625 
 4626 def match_filter_func(filter_str):
 4627     def _match_func(info_dict):
 4628         if match_str(filter_str, info_dict):
 4629             return None
 4630         else:
 4631             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 4632             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
 4633     return _match_func
 4634 
 4635 
 4636 def parse_dfxp_time_expr(time_expr):
 4637     if not time_expr:
 4638         return
 4639 
 4640     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
 4641     if mobj:
 4642         return float(mobj.group('time_offset'))
 4643 
 4644     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
 4645     if mobj:
 4646         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
 4647 
 4648 
 4649 def srt_subtitles_timecode(seconds):
 4650     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
 4651 
 4652 
 4653 def dfxp2srt(dfxp_data):
 4654     '''
 4655     @param dfxp_data A bytes-like object containing DFXP data
 4656     @returns A unicode object containing converted SRT data
 4657     '''
 4658     LEGACY_NAMESPACES = (
 4659         (b'http://www.w3.org/ns/ttml', [
 4660             b'http://www.w3.org/2004/11/ttaf1',
 4661             b'http://www.w3.org/2006/04/ttaf1',
 4662             b'http://www.w3.org/2006/10/ttaf1',
 4663         ]),
 4664         (b'http://www.w3.org/ns/ttml#styling', [
 4665             b'http://www.w3.org/ns/ttml#style',
 4666         ]),
 4667     )
 4668 
 4669     SUPPORTED_STYLING = [
 4670         'color',
 4671         'fontFamily',
 4672         'fontSize',
 4673         'fontStyle',
 4674         'fontWeight',
 4675         'textDecoration'
 4676     ]
 4677 
 4678     _x = functools.partial(xpath_with_ns, ns_map={
 4679         'xml': 'http://www.w3.org/XML/1998/namespace',
 4680         'ttml': 'http://www.w3.org/ns/ttml',
 4681         'tts': 'http://www.w3.org/ns/ttml#styling',
 4682     })
 4683 
 4684     styles = {}
 4685     default_style = {}
 4686 
 4687     class TTMLPElementParser(object):
 4688         _out = ''
 4689         _unclosed_elements = []
 4690         _applied_styles = []
 4691 
 4692         def start(self, tag, attrib):
 4693             if tag in (_x('ttml:br'), 'br'):
 4694                 self._out += '\n'
 4695             else:
 4696                 unclosed_elements = []
 4697                 style = {}
 4698                 element_style_id = attrib.get('style')
 4699                 if default_style:
 4700                     style.update(default_style)
 4701                 if element_style_id:
 4702                     style.update(styles.get(element_style_id, {}))
 4703                 for prop in SUPPORTED_STYLING:
 4704                     prop_val = attrib.get(_x('tts:' + prop))
 4705                     if prop_val:
 4706                         style[prop] = prop_val
 4707                 if style:
 4708                     font = ''
 4709                     for k, v in sorted(style.items()):
 4710                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
 4711                             continue
 4712                         if k == 'color':
 4713                             font += ' color="%s"' % v
 4714                         elif k == 'fontSize':
 4715                             font += ' size="%s"' % v
 4716                         elif k == 'fontFamily':
 4717                             font += ' face="%s"' % v
 4718                         elif k == 'fontWeight' and v == 'bold':
 4719                             self._out += '<b>'
 4720                             unclosed_elements.append('b')
 4721                         elif k == 'fontStyle' and v == 'italic':
 4722                             self._out += '<i>'
 4723                             unclosed_elements.append('i')
 4724                         elif k == 'textDecoration' and v == 'underline':
 4725                             self._out += '<u>'
 4726                             unclosed_elements.append('u')
 4727                     if font:
 4728                         self._out += '<font' + font + '>'
 4729                         unclosed_elements.append('font')
 4730                     applied_style = {}
 4731                     if self._applied_styles:
 4732                         applied_style.update(self._applied_styles[-1])
 4733                     applied_style.update(style)
 4734                     self._applied_styles.append(applied_style)
 4735                 self._unclosed_elements.append(unclosed_elements)
 4736 
 4737         def end(self, tag):
 4738             if tag not in (_x('ttml:br'), 'br'):
 4739                 unclosed_elements = self._unclosed_elements.pop()
 4740                 for element in reversed(unclosed_elements):
 4741                     self._out += '</%s>' % element
 4742                 if unclosed_elements and self._applied_styles:
 4743                     self._applied_styles.pop()
 4744 
 4745         def data(self, data):
 4746             self._out += data
 4747 
 4748         def close(self):
 4749             return self._out.strip()
 4750 
 4751     def parse_node(node):
 4752         target = TTMLPElementParser()
 4753         parser = xml.etree.ElementTree.XMLParser(target=target)
 4754         parser.feed(xml.etree.ElementTree.tostring(node))
 4755         return parser.close()
 4756 
 4757     for k, v in LEGACY_NAMESPACES:
 4758         for ns in v:
 4759             dfxp_data = dfxp_data.replace(ns, k)
 4760 
 4761     dfxp = compat_etree_fromstring(dfxp_data)
 4762     out = []
 4763     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
 4764 
 4765     if not paras:
 4766         raise ValueError('Invalid dfxp/TTML subtitle')
 4767 
 4768     repeat = False
 4769     while True:
 4770         for style in dfxp.findall(_x('.//ttml:style')):
 4771             style_id = style.get('id') or style.get(_x('xml:id'))
 4772             if not style_id:
 4773                 continue
 4774             parent_style_id = style.get('style')
 4775             if parent_style_id:
 4776                 if parent_style_id not in styles:
 4777                     repeat = True
 4778                     continue
 4779                 styles[style_id] = styles[parent_style_id].copy()
 4780             for prop in SUPPORTED_STYLING:
 4781                 prop_val = style.get(_x('tts:' + prop))
 4782                 if prop_val:
 4783                     styles.setdefault(style_id, {})[prop] = prop_val
 4784         if repeat:
 4785             repeat = False
 4786         else:
 4787             break
 4788 
 4789     for p in ('body', 'div'):
 4790         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
 4791         if ele is None:
 4792             continue
 4793         style = styles.get(ele.get('style'))
 4794         if not style:
 4795             continue
 4796         default_style.update(style)
 4797 
 4798     for para, index in zip(paras, itertools.count(1)):
 4799         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
 4800         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
 4801         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
 4802         if begin_time is None:
 4803             continue
 4804         if not end_time:
 4805             if not dur:
 4806                 continue
 4807             end_time = begin_time + dur
 4808         out.append('%d\n%s --> %s\n%s\n\n' % (
 4809             index,
 4810             srt_subtitles_timecode(begin_time),
 4811             srt_subtitles_timecode(end_time),
 4812             parse_node(para)))
 4813 
 4814     return ''.join(out)
 4815 
 4816 
 4817 def cli_option(params, command_option, param):
 4818     param = params.get(param)
 4819     if param:
 4820         param = compat_str(param)
 4821     return [command_option, param] if param is not None else []
 4822 
 4823 
 4824 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
 4825     param = params.get(param)
 4826     if param is None:
 4827         return []
 4828     assert isinstance(param, bool)
 4829     if separator:
 4830         return [command_option + separator + (true_value if param else false_value)]
 4831     return [command_option, true_value if param else false_value]
 4832 
 4833 
 4834 def cli_valueless_option(params, command_option, param, expected_value=True):
 4835     param = params.get(param)
 4836     return [command_option] if param == expected_value else []
 4837 
 4838 
 4839 def cli_configuration_args(params, param, default=[]):
 4840     ex_args = params.get(param)
 4841     if ex_args is None:
 4842         return default
 4843     assert isinstance(ex_args, list)
 4844     return ex_args
 4845 
 4846 
 4847 class ISO639Utils(object):
 4848     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 4849     _lang_map = {
 4850         'aa': 'aar',
 4851         'ab': 'abk',
 4852         'ae': 'ave',
 4853         'af': 'afr',
 4854         'ak': 'aka',
 4855         'am': 'amh',
 4856         'an': 'arg',
 4857         'ar': 'ara',
 4858         'as': 'asm',
 4859         'av': 'ava',
 4860         'ay': 'aym',
 4861         'az': 'aze',
 4862         'ba': 'bak',
 4863         'be': 'bel',
 4864         'bg': 'bul',
 4865         'bh': 'bih',
 4866         'bi': 'bis',
 4867         'bm': 'bam',
 4868         'bn': 'ben',
 4869         'bo': 'bod',
 4870         'br': 'bre',
 4871         'bs': 'bos',
 4872         'ca': 'cat',
 4873         'ce': 'che',
 4874         'ch': 'cha',
 4875         'co': 'cos',
 4876         'cr': 'cre',
 4877         'cs': 'ces',
 4878         'cu': 'chu',
 4879         'cv': 'chv',
 4880         'cy': 'cym',
 4881         'da': 'dan',
 4882         'de': 'deu',
 4883         'dv': 'div',
 4884         'dz': 'dzo',
 4885         'ee': 'ewe',
 4886         'el': 'ell',
 4887         'en': 'eng',
 4888         'eo': 'epo',
 4889         'es': 'spa',
 4890         'et': 'est',
 4891         'eu': 'eus',
 4892         'fa': 'fas',
 4893         'ff': 'ful',
 4894         'fi': 'fin',
 4895         'fj': 'fij',
 4896         'fo': 'fao',
 4897         'fr': 'fra',
 4898         'fy': 'fry',
 4899         'ga': 'gle',
 4900         'gd': 'gla',
 4901         'gl': 'glg',
 4902         'gn': 'grn',
 4903         'gu': 'guj',
 4904         'gv': 'glv',
 4905         'ha': 'hau',
 4906         'he': 'heb',
 4907         'iw': 'heb',  # Replaced by he in 1989 revision
 4908         'hi': 'hin',
 4909         'ho': 'hmo',
 4910         'hr': 'hrv',
 4911         'ht': 'hat',
 4912         'hu': 'hun',
 4913         'hy': 'hye',
 4914         'hz': 'her',
 4915         'ia': 'ina',
 4916         'id': 'ind',
 4917         'in': 'ind',  # Replaced by id in 1989 revision
 4918         'ie': 'ile',
 4919         'ig': 'ibo',
 4920         'ii': 'iii',
 4921         'ik': 'ipk',
 4922         'io': 'ido',
 4923         'is': 'isl',
 4924         'it': 'ita',
 4925         'iu': 'iku',
 4926         'ja': 'jpn',
 4927         'jv': 'jav',
 4928         'ka': 'kat',
 4929         'kg': 'kon',
 4930         'ki': 'kik',
 4931         'kj': 'kua',
 4932         'kk': 'kaz',
 4933         'kl': 'kal',
 4934         'km': 'khm',
 4935         'kn': 'kan',
 4936         'ko': 'kor',
 4937         'kr': 'kau',
 4938         'ks': 'kas',
 4939         'ku': 'kur',
 4940         'kv': 'kom',
 4941         'kw': 'cor',
 4942         'ky': 'kir',
 4943         'la': 'lat',
 4944         'lb': 'ltz',
 4945         'lg': 'lug',
 4946         'li': 'lim',
 4947         'ln': 'lin',
 4948         'lo': 'lao',
 4949         'lt': 'lit',
 4950         'lu': 'lub',
 4951         'lv': 'lav',
 4952         'mg': 'mlg',
 4953         'mh': 'mah',
 4954         'mi': 'mri',
 4955         'mk': 'mkd',
 4956         'ml': 'mal',
 4957         'mn': 'mon',
 4958         'mr': 'mar',
 4959         'ms': 'msa',
 4960         'mt': 'mlt',
 4961         'my': 'mya',
 4962         'na': 'nau',
 4963         'nb': 'nob',
 4964         'nd': 'nde',
 4965         'ne': 'nep',
 4966         'ng': 'ndo',
 4967         'nl': 'nld',
 4968         'nn': 'nno',
 4969         'no': 'nor',
 4970         'nr': 'nbl',
 4971         'nv': 'nav',
 4972         'ny': 'nya',
 4973         'oc': 'oci',
 4974         'oj': 'oji',
 4975         'om': 'orm',
 4976         'or': 'ori',
 4977         'os': 'oss',
 4978         'pa': 'pan',
 4979         'pi': 'pli',
 4980         'pl': 'pol',
 4981         'ps': 'pus',
 4982         'pt': 'por',
 4983         'qu': 'que',
 4984         'rm': 'roh',
 4985         'rn': 'run',
 4986         'ro': 'ron',
 4987         'ru': 'rus',
 4988         'rw': 'kin',
 4989         'sa': 'san',
 4990         'sc': 'srd',
 4991         'sd': 'snd',
 4992         'se': 'sme',
 4993         'sg': 'sag',
 4994         'si': 'sin',
 4995         'sk': 'slk',
 4996         'sl': 'slv',
 4997         'sm': 'smo',
 4998         'sn': 'sna',
 4999         'so': 'som',
 5000         'sq': 'sqi',
 5001         'sr': 'srp',
 5002         'ss': 'ssw',
 5003         'st': 'sot',
 5004         'su': 'sun',
 5005         'sv': 'swe',
 5006         'sw': 'swa',
 5007         'ta': 'tam',
 5008         'te': 'tel',
 5009         'tg': 'tgk',
 5010         'th': 'tha',
 5011         'ti': 'tir',
 5012         'tk': 'tuk',
 5013         'tl': 'tgl',
 5014         'tn': 'tsn',
 5015         'to': 'ton',
 5016         'tr': 'tur',
 5017         'ts': 'tso',
 5018         'tt': 'tat',
 5019         'tw': 'twi',
 5020         'ty': 'tah',
 5021         'ug': 'uig',
 5022         'uk': 'ukr',
 5023         'ur': 'urd',
 5024         'uz': 'uzb',
 5025         've': 'ven',
 5026         'vi': 'vie',
 5027         'vo': 'vol',
 5028         'wa': 'wln',
 5029         'wo': 'wol',
 5030         'xh': 'xho',
 5031         'yi': 'yid',
 5032         'ji': 'yid',  # Replaced by yi in 1989 revision
 5033         'yo': 'yor',
 5034         'za': 'zha',
 5035         'zh': 'zho',
 5036         'zu': 'zul',
 5037     }
 5038 
 5039     @classmethod
 5040     def short2long(cls, code):
 5041         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 5042         return cls._lang_map.get(code[:2])
 5043 
 5044     @classmethod
 5045     def long2short(cls, code):
 5046         """Convert language code from ISO 639-2/T to ISO 639-1"""
 5047         for short_name, long_name in cls._lang_map.items():
 5048             if long_name == code:
 5049                 return short_name
 5050 
 5051 
 5052 class ISO3166Utils(object):
 5053     # From http://data.okfn.org/data/core/country-list
 5054     _country_map = {
 5055         'AF': 'Afghanistan',
 5056         'AX': 'Åland Islands',
 5057         'AL': 'Albania',
 5058         'DZ': 'Algeria',
 5059         'AS': 'American Samoa',
 5060         'AD': 'Andorra',
 5061         'AO': 'Angola',
 5062         'AI': 'Anguilla',
 5063         'AQ': 'Antarctica',
 5064         'AG': 'Antigua and Barbuda',
 5065         'AR': 'Argentina',
 5066         'AM': 'Armenia',
 5067         'AW': 'Aruba',
 5068         'AU': 'Australia',
 5069         'AT': 'Austria',
 5070         'AZ': 'Azerbaijan',
 5071         'BS': 'Bahamas',
 5072         'BH': 'Bahrain',
 5073         'BD': 'Bangladesh',
 5074         'BB': 'Barbados',
 5075         'BY': 'Belarus',
 5076         'BE': 'Belgium',
 5077         'BZ': 'Belize',
 5078         'BJ': 'Benin',
 5079         'BM': 'Bermuda',
 5080         'BT': 'Bhutan',
 5081         'BO': 'Bolivia, Plurinational State of',
 5082         'BQ': 'Bonaire, Sint Eustatius and Saba',
 5083         'BA': 'Bosnia and Herzegovina',
 5084         'BW': 'Botswana',
 5085         'BV': 'Bouvet Island',
 5086         'BR': 'Brazil',
 5087         'IO': 'British Indian Ocean Territory',
 5088         'BN': 'Brunei Darussalam',
 5089         'BG': 'Bulgaria',
 5090         'BF': 'Burkina Faso',
 5091         'BI': 'Burundi',
 5092         'KH': 'Cambodia',
 5093         'CM': 'Cameroon',
 5094         'CA': 'Canada',
 5095         'CV': 'Cape Verde',
 5096         'KY': 'Cayman Islands',
 5097         'CF': 'Central African Republic',
 5098         'TD': 'Chad',
 5099         'CL': 'Chile',
 5100         'CN': 'China',
 5101         'CX': 'Christmas Island',
 5102         'CC': 'Cocos (Keeling) Islands',
 5103         'CO': 'Colombia',
 5104         'KM': 'Comoros',
 5105         'CG': 'Congo',
 5106         'CD': 'Congo, the Democratic Republic of the',
 5107         'CK': 'Cook Islands',
 5108         'CR': 'Costa Rica',
 5109         'CI': 'Côte d\'Ivoire',
 5110         'HR': 'Croatia',
 5111         'CU': 'Cuba',
 5112         'CW': 'Curaçao',
 5113         'CY': 'Cyprus',
 5114         'CZ': 'Czech Republic',
 5115         'DK': 'Denmark',
 5116         'DJ': 'Djibouti',
 5117         'DM': 'Dominica',
 5118         'DO': 'Dominican Republic',
 5119         'EC': 'Ecuador',
 5120         'EG': 'Egypt',
 5121         'SV': 'El Salvador',
 5122         'GQ': 'Equatorial Guinea',
 5123         'ER': 'Eritrea',
 5124         'EE': 'Estonia',
 5125         'ET': 'Ethiopia',
 5126         'FK': 'Falkland Islands (Malvinas)',
 5127         'FO': 'Faroe Islands',
 5128         'FJ': 'Fiji',
 5129         'FI': 'Finland',
 5130         'FR': 'France',
 5131         'GF': 'French Guiana',
 5132         'PF': 'French Polynesia',
 5133         'TF': 'French Southern Territories',
 5134         'GA': 'Gabon',
 5135         'GM': 'Gambia',
 5136         'GE': 'Georgia',
 5137         'DE': 'Germany',
 5138         'GH': 'Ghana',
 5139         'GI': 'Gibraltar',
 5140         'GR': 'Greece',
 5141         'GL': 'Greenland',
 5142         'GD': 'Grenada',
 5143         'GP': 'Guadeloupe',
 5144         'GU': 'Guam',
 5145         'GT': 'Guatemala',
 5146         'GG': 'Guernsey',
 5147         'GN': 'Guinea',
 5148         'GW': 'Guinea-Bissau',
 5149         'GY': 'Guyana',
 5150         'HT': 'Haiti',
 5151         'HM': 'Heard Island and McDonald Islands',
 5152         'VA': 'Holy See (Vatican City State)',
 5153         'HN': 'Honduras',
 5154         'HK': 'Hong Kong',
 5155         'HU': 'Hungary',
 5156         'IS': 'Iceland',
 5157         'IN': 'India',
 5158         'ID': 'Indonesia',
 5159         'IR': 'Iran, Islamic Republic of',
 5160         'IQ': 'Iraq',
 5161         'IE': 'Ireland',
 5162         'IM': 'Isle of Man',
 5163         'IL': 'Israel',
 5164         'IT': 'Italy',
 5165         'JM': 'Jamaica',
 5166         'JP': 'Japan',
 5167         'JE': 'Jersey',
 5168         'JO': 'Jordan',
 5169         'KZ': 'Kazakhstan',
 5170         'KE': 'Kenya',
 5171         'KI': 'Kiribati',
 5172         'KP': 'Korea, Democratic People\'s Republic of',
 5173         'KR': 'Korea, Republic of',
 5174         'KW': 'Kuwait',
 5175         'KG': 'Kyrgyzstan',
 5176         'LA': 'Lao People\'s Democratic Republic',
 5177         'LV': 'Latvia',
 5178         'LB': 'Lebanon',
 5179         'LS': 'Lesotho',
 5180         'LR': 'Liberia',
 5181         'LY': 'Libya',
 5182         'LI': 'Liechtenstein',
 5183         'LT': 'Lithuania',
 5184         'LU': 'Luxembourg',
 5185         'MO': 'Macao',
 5186         'MK': 'Macedonia, the Former Yugoslav Republic of',
 5187         'MG': 'Madagascar',
 5188         'MW': 'Malawi',
 5189         'MY': 'Malaysia',
 5190         'MV': 'Maldives',
 5191         'ML': 'Mali',
 5192         'MT': 'Malta',
 5193         'MH': 'Marshall Islands',
 5194         'MQ': 'Martinique',
 5195         'MR': 'Mauritania',
 5196         'MU': 'Mauritius',
 5197         'YT': 'Mayotte',
 5198         'MX': 'Mexico',
 5199         'FM': 'Micronesia, Federated States of',
 5200         'MD': 'Moldova, Republic of',
 5201         'MC': 'Monaco',
 5202         'MN': 'Mongolia',
 5203         'ME': 'Montenegro',
 5204         'MS': 'Montserrat',
 5205         'MA': 'Morocco',
 5206         'MZ': 'Mozambique',
 5207         'MM': 'Myanmar',
 5208         'NA': 'Namibia',
 5209         'NR': 'Nauru',
 5210         'NP': 'Nepal',
 5211         'NL': 'Netherlands',
 5212         'NC': 'New Caledonia',
 5213         'NZ': 'New Zealand',
 5214         'NI': 'Nicaragua',
 5215         'NE': 'Niger',
 5216         'NG': 'Nigeria',
 5217         'NU': 'Niue',
 5218         'NF': 'Norfolk Island',
 5219         'MP': 'Northern Mariana Islands',
 5220         'NO': 'Norway',
 5221         'OM': 'Oman',
 5222         'PK': 'Pakistan',
 5223         'PW': 'Palau',
 5224         'PS': 'Palestine, State of',
 5225         'PA': 'Panama',
 5226         'PG': 'Papua New Guinea',
 5227         'PY': 'Paraguay',
 5228         'PE': 'Peru',
 5229         'PH': 'Philippines',
 5230         'PN': 'Pitcairn',
 5231         'PL': 'Poland',
 5232         'PT': 'Portugal',
 5233         'PR': 'Puerto Rico',
 5234         'QA': 'Qatar',
 5235         'RE': 'Réunion',
 5236         'RO': 'Romania',
 5237         'RU': 'Russian Federation',
 5238         'RW': 'Rwanda',
 5239         'BL': 'Saint Barthélemy',
 5240         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
 5241         'KN': 'Saint Kitts and Nevis',
 5242         'LC': 'Saint Lucia',
 5243         'MF': 'Saint Martin (French part)',
 5244         'PM': 'Saint Pierre and Miquelon',
 5245         'VC': 'Saint Vincent and the Grenadines',
 5246         'WS': 'Samoa',
 5247         'SM': 'San Marino',
 5248         'ST': 'Sao Tome and Principe',
 5249         'SA': 'Saudi Arabia',
 5250         'SN': 'Senegal',
 5251         'RS': 'Serbia',
 5252         'SC': 'Seychelles',
 5253         'SL': 'Sierra Leone',
 5254         'SG': 'Singapore',
 5255         'SX': 'Sint Maarten (Dutch part)',
 5256         'SK': 'Slovakia',
 5257         'SI': 'Slovenia',
 5258         'SB': 'Solomon Islands',
 5259         'SO': 'Somalia',
 5260         'ZA': 'South Africa',
 5261         'GS': 'South Georgia and the South Sandwich Islands',
 5262         'SS': 'South Sudan',
 5263         'ES': 'Spain',
 5264         'LK': 'Sri Lanka',
 5265         'SD': 'Sudan',
 5266         'SR': 'Suriname',
 5267         'SJ': 'Svalbard and Jan Mayen',
 5268         'SZ': 'Swaziland',
 5269         'SE': 'Sweden',
 5270         'CH': 'Switzerland',
 5271         'SY': 'Syrian Arab Republic',
 5272         'TW': 'Taiwan, Province of China',
 5273         'TJ': 'Tajikistan',
 5274         'TZ': 'Tanzania, United Republic of',
 5275         'TH': 'Thailand',
 5276         'TL': 'Timor-Leste',
 5277         'TG': 'Togo',
 5278         'TK': 'Tokelau',
 5279         'TO': 'Tonga',
 5280         'TT': 'Trinidad and Tobago',
 5281         'TN': 'Tunisia',
 5282         'TR': 'Turkey',
 5283         'TM': 'Turkmenistan',
 5284         'TC': 'Turks and Caicos Islands',
 5285         'TV': 'Tuvalu',
 5286         'UG': 'Uganda',
 5287         'UA': 'Ukraine',
 5288         'AE': 'United Arab Emirates',
 5289         'GB': 'United Kingdom',
 5290         'US': 'United States',
 5291         'UM': 'United States Minor Outlying Islands',
 5292         'UY': 'Uruguay',
 5293         'UZ': 'Uzbekistan',
 5294         'VU': 'Vanuatu',
 5295         'VE': 'Venezuela, Bolivarian Republic of',
 5296         'VN': 'Viet Nam',
 5297         'VG': 'Virgin Islands, British',
 5298         'VI': 'Virgin Islands, U.S.',
 5299         'WF': 'Wallis and Futuna',
 5300         'EH': 'Western Sahara',
 5301         'YE': 'Yemen',
 5302         'ZM': 'Zambia',
 5303         'ZW': 'Zimbabwe',
 5304     }
 5305 
 5306     @classmethod
 5307     def short2full(cls, code):
 5308         """Convert an ISO 3166-2 country code to the corresponding full name"""
 5309         return cls._country_map.get(code.upper())
 5310 
 5311 
 5312 class GeoUtils(object):
 5313     # Major IPv4 address blocks per country
 5314     _country_ip_map = {
 5315         'AD': '46.172.224.0/19',
 5316         'AE': '94.200.0.0/13',
 5317         'AF': '149.54.0.0/17',
 5318         'AG': '209.59.64.0/18',
 5319         'AI': '204.14.248.0/21',
 5320         'AL': '46.99.0.0/16',
 5321         'AM': '46.70.0.0/15',
 5322         'AO': '105.168.0.0/13',
 5323         'AP': '182.50.184.0/21',
 5324         'AQ': '23.154.160.0/24',
 5325         'AR': '181.0.0.0/12',
 5326         'AS': '202.70.112.0/20',
 5327         'AT': '77.116.0.0/14',
 5328         'AU': '1.128.0.0/11',
 5329         'AW': '181.41.0.0/18',
 5330         'AX': '185.217.4.0/22',
 5331         'AZ': '5.197.0.0/16',
 5332         'BA': '31.176.128.0/17',
 5333         'BB': '65.48.128.0/17',
 5334         'BD': '114.130.0.0/16',
 5335         'BE': '57.0.0.0/8',
 5336         'BF': '102.178.0.0/15',
 5337         'BG': '95.42.0.0/15',
 5338         'BH': '37.131.0.0/17',
 5339         'BI': '154.117.192.0/18',
 5340         'BJ': '137.255.0.0/16',
 5341         'BL': '185.212.72.0/23',
 5342         'BM': '196.12.64.0/18',
 5343         'BN': '156.31.0.0/16',
 5344         'BO': '161.56.0.0/16',
 5345         'BQ': '161.0.80.0/20',
 5346         'BR': '191.128.0.0/12',
 5347         'BS': '24.51.64.0/18',
 5348         'BT': '119.2.96.0/19',
 5349         'BW': '168.167.0.0/16',
 5350         'BY': '178.120.0.0/13',
 5351         'BZ': '179.42.192.0/18',
 5352         'CA': '99.224.0.0/11',
 5353         'CD': '41.243.0.0/16',
 5354         'CF': '197.242.176.0/21',
 5355         'CG': '160.113.0.0/16',
 5356         'CH': '85.0.0.0/13',
 5357         'CI': '102.136.0.0/14',
 5358         'CK': '202.65.32.0/19',
 5359         'CL': '152.172.0.0/14',
 5360         'CM': '102.244.0.0/14',
 5361         'CN': '36.128.0.0/10',
 5362         'CO': '181.240.0.0/12',
 5363         'CR': '201.192.0.0/12',
 5364         'CU': '152.206.0.0/15',
 5365         'CV': '165.90.96.0/19',
 5366         'CW': '190.88.128.0/17',
 5367         'CY': '31.153.0.0/16',
 5368         'CZ': '88.100.0.0/14',
 5369         'DE': '53.0.0.0/8',
 5370         'DJ': '197.241.0.0/17',
 5371         'DK': '87.48.0.0/12',
 5372         'DM': '192.243.48.0/20',
 5373         'DO': '152.166.0.0/15',
 5374         'DZ': '41.96.0.0/12',
 5375         'EC': '186.68.0.0/15',
 5376         'EE': '90.190.0.0/15',
 5377         'EG': '156.160.0.0/11',
 5378         'ER': '196.200.96.0/20',
 5379         'ES': '88.0.0.0/11',
 5380         'ET': '196.188.0.0/14',
 5381         'EU': '2.16.0.0/13',
 5382         'FI': '91.152.0.0/13',
 5383         'FJ': '144.120.0.0/16',
 5384         'FK': '80.73.208.0/21',
 5385         'FM': '119.252.112.0/20',
 5386         'FO': '88.85.32.0/19',
 5387         'FR': '90.0.0.0/9',
 5388         'GA': '41.158.0.0/15',
 5389         'GB': '25.0.0.0/8',
 5390         'GD': '74.122.88.0/21',
 5391         'GE': '31.146.0.0/16',
 5392         'GF': '161.22.64.0/18',
 5393         'GG': '62.68.160.0/19',
 5394         'GH': '154.160.0.0/12',
 5395         'GI': '95.164.0.0/16',
 5396         'GL': '88.83.0.0/19',
 5397         'GM': '160.182.0.0/15',
 5398         'GN': '197.149.192.0/18',
 5399         'GP': '104.250.0.0/19',
 5400         'GQ': '105.235.224.0/20',
 5401         'GR': '94.64.0.0/13',
 5402         'GT': '168.234.0.0/16',
 5403         'GU': '168.123.0.0/16',
 5404         'GW': '197.214.80.0/20',
 5405         'GY': '181.41.64.0/18',
 5406         'HK': '113.252.0.0/14',
 5407         'HN': '181.210.0.0/16',
 5408         'HR': '93.136.0.0/13',
 5409         'HT': '148.102.128.0/17',
 5410         'HU': '84.0.0.0/14',
 5411         'ID': '39.192.0.0/10',
 5412         'IE': '87.32.0.0/12',
 5413         'IL': '79.176.0.0/13',
 5414         'IM': '5.62.80.0/20',
 5415         'IN': '117.192.0.0/10',
 5416         'IO': '203.83.48.0/21',
 5417         'IQ': '37.236.0.0/14',
 5418         'IR': '2.176.0.0/12',
 5419         'IS': '82.221.0.0/16',
 5420         'IT': '79.0.0.0/10',
 5421         'JE': '87.244.64.0/18',
 5422         'JM': '72.27.0.0/17',
 5423         'JO': '176.29.0.0/16',
 5424         'JP': '133.0.0.0/8',
 5425         'KE': '105.48.0.0/12',
 5426         'KG': '158.181.128.0/17',
 5427         'KH': '36.37.128.0/17',
 5428         'KI': '103.25.140.0/22',
 5429         'KM': '197.255.224.0/20',
 5430         'KN': '198.167.192.0/19',
 5431         'KP': '175.45.176.0/22',
 5432         'KR': '175.192.0.0/10',
 5433         'KW': '37.36.0.0/14',
 5434         'KY': '64.96.0.0/15',
 5435         'KZ': '2.72.0.0/13',
 5436         'LA': '115.84.64.0/18',
 5437         'LB': '178.135.0.0/16',
 5438         'LC': '24.92.144.0/20',
 5439         'LI': '82.117.0.0/19',
 5440         'LK': '112.134.0.0/15',
 5441         'LR': '102.183.0.0/16',
 5442         'LS': '129.232.0.0/17',
 5443         'LT': '78.56.0.0/13',
 5444         'LU': '188.42.0.0/16',
 5445         'LV': '46.109.0.0/16',
 5446         'LY': '41.252.0.0/14',
 5447         'MA': '105.128.0.0/11',
 5448         'MC': '88.209.64.0/18',
 5449         'MD': '37.246.0.0/16',
 5450         'ME': '178.175.0.0/17',
 5451         'MF': '74.112.232.0/21',
 5452         'MG': '154.126.0.0/17',
 5453         'MH': '117.103.88.0/21',
 5454         'MK': '77.28.0.0/15',
 5455         'ML': '154.118.128.0/18',
 5456         'MM': '37.111.0.0/17',
 5457         'MN': '49.0.128.0/17',
 5458         'MO': '60.246.0.0/16',
 5459         'MP': '202.88.64.0/20',
 5460         'MQ': '109.203.224.0/19',
 5461         'MR': '41.188.64.0/18',
 5462         'MS': '208.90.112.0/22',
 5463         'MT': '46.11.0.0/16',
 5464         'MU': '105.16.0.0/12',
 5465         'MV': '27.114.128.0/18',
 5466         'MW': '102.70.0.0/15',
 5467         'MX': '187.192.0.0/11',
 5468         'MY': '175.136.0.0/13',
 5469         'MZ': '197.218.0.0/15',
 5470         'NA': '41.182.0.0/16',
 5471         'NC': '101.101.0.0/18',
 5472         'NE': '197.214.0.0/18',
 5473         'NF': '203.17.240.0/22',
 5474         'NG': '105.112.0.0/12',
 5475         'NI': '186.76.0.0/15',
 5476         'NL': '145.96.0.0/11',
 5477         'NO': '84.208.0.0/13',
 5478         'NP': '36.252.0.0/15',
 5479         'NR': '203.98.224.0/19',
 5480         'NU': '49.156.48.0/22',
 5481         'NZ': '49.224.0.0/14',
 5482         'OM': '5.36.0.0/15',
 5483         'PA': '186.72.0.0/15',
 5484         'PE': '186.160.0.0/14',
 5485         'PF': '123.50.64.0/18',
 5486         'PG': '124.240.192.0/19',
 5487         'PH': '49.144.0.0/13',
 5488         'PK': '39.32.0.0/11',
 5489         'PL': '83.0.0.0/11',
 5490         'PM': '70.36.0.0/20',
 5491         'PR': '66.50.0.0/16',
 5492         'PS': '188.161.0.0/16',
 5493         'PT': '85.240.0.0/13',
 5494         'PW': '202.124.224.0/20',
 5495         'PY': '181.120.0.0/14',
 5496         'QA': '37.210.0.0/15',
 5497         'RE': '102.35.0.0/16',
 5498         'RO': '79.112.0.0/13',
 5499         'RS': '93.86.0.0/15',
 5500         'RU': '5.136.0.0/13',
 5501         'RW': '41.186.0.0/16',
 5502         'SA': '188.48.0.0/13',
 5503         'SB': '202.1.160.0/19',
 5504         'SC': '154.192.0.0/11',
 5505         'SD': '102.120.0.0/13',
 5506         'SE': '78.64.0.0/12',
 5507         'SG': '8.128.0.0/10',
 5508         'SI': '188.196.0.0/14',
 5509         'SK': '78.98.0.0/15',
 5510         'SL': '102.143.0.0/17',
 5511         'SM': '89.186.32.0/19',
 5512         'SN': '41.82.0.0/15',
 5513         'SO': '154.115.192.0/18',
 5514         'SR': '186.179.128.0/17',
 5515         'SS': '105.235.208.0/21',
 5516         'ST': '197.159.160.0/19',
 5517         'SV': '168.243.0.0/16',
 5518         'SX': '190.102.0.0/20',
 5519         'SY': '5.0.0.0/16',
 5520         'SZ': '41.84.224.0/19',
 5521         'TC': '65.255.48.0/20',
 5522         'TD': '154.68.128.0/19',
 5523         'TG': '196.168.0.0/14',
 5524         'TH': '171.96.0.0/13',
 5525         'TJ': '85.9.128.0/18',
 5526         'TK': '27.96.24.0/21',
 5527         'TL': '180.189.160.0/20',
 5528         'TM': '95.85.96.0/19',
 5529         'TN': '197.0.0.0/11',
 5530         'TO': '175.176.144.0/21',
 5531         'TR': '78.160.0.0/11',
 5532         'TT': '186.44.0.0/15',
 5533         'TV': '202.2.96.0/19',
 5534         'TW': '120.96.0.0/11',
 5535         'TZ': '156.156.0.0/14',
 5536         'UA': '37.52.0.0/14',
 5537         'UG': '102.80.0.0/13',
 5538         'US': '6.0.0.0/8',
 5539         'UY': '167.56.0.0/13',
 5540         'UZ': '84.54.64.0/18',
 5541         'VA': '212.77.0.0/19',
 5542         'VC': '207.191.240.0/21',
 5543         'VE': '186.88.0.0/13',
 5544         'VG': '66.81.192.0/20',
 5545         'VI': '146.226.0.0/16',
 5546         'VN': '14.160.0.0/11',
 5547         'VU': '202.80.32.0/20',
 5548         'WF': '117.20.32.0/21',
 5549         'WS': '202.4.32.0/19',
 5550         'YE': '134.35.0.0/16',
 5551         'YT': '41.242.116.0/22',
 5552         'ZA': '41.0.0.0/11',
 5553         'ZM': '102.144.0.0/13',
 5554         'ZW': '102.177.192.0/18',
 5555     }
 5556 
 5557     @classmethod
 5558     def random_ipv4(cls, code_or_block):
 5559         if len(code_or_block) == 2:
 5560             block = cls._country_ip_map.get(code_or_block.upper())
 5561             if not block:
 5562                 return None
 5563         else:
 5564             block = code_or_block
 5565         addr, preflen = block.split('/')
 5566         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
 5567         addr_max = addr_min | (0xffffffff >> int(preflen))
 5568         return compat_str(socket.inet_ntoa(
 5569             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 5570 
 5571 
 5572 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 5573     def __init__(self, proxies=None):
 5574         # Set default handlers
 5575         for type in ('http', 'https'):
 5576             setattr(self, '%s_open' % type,
 5577                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
 5578                         meth(r, proxy, type))
 5579         compat_urllib_request.ProxyHandler.__init__(self, proxies)
 5580 
 5581     def proxy_open(self, req, proxy, type):
 5582         req_proxy = req.headers.get('Ytdl-request-proxy')
 5583         if req_proxy is not None:
 5584             proxy = req_proxy
 5585             del req.headers['Ytdl-request-proxy']
 5586 
 5587         if proxy == '__noproxy__':
 5588             return None  # No Proxy
 5589         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
 5590             req.add_header('Ytdl-socks-proxy', proxy)
 5591             # youtube-dl's http/https handlers do wrapping the socket with socks
 5592             return None
 5593         return compat_urllib_request.ProxyHandler.proxy_open(
 5594             self, req, proxy, type)
 5595 
 5596 
 5597 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
 5598 # released into Public Domain
 5599 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
 5600 
 5601 def long_to_bytes(n, blocksize=0):
 5602     """long_to_bytes(n:long, blocksize:int) : string
 5603     Convert a long integer to a byte string.
 5604 
 5605     If optional blocksize is given and greater than zero, pad the front of the
 5606     byte string with binary zeros so that the length is a multiple of
 5607     blocksize.
 5608     """
 5609     # after much testing, this algorithm was deemed to be the fastest
 5610     s = b''
 5611     n = int(n)
 5612     while n > 0:
 5613         s = compat_struct_pack('>I', n & 0xffffffff) + s
 5614         n = n >> 32
 5615     # strip off leading zeros
 5616     for i in range(len(s)):
 5617         if s[i] != b'\000'[0]:
 5618             break
 5619     else:
 5620         # only happens when n == 0
 5621         s = b'\000'
 5622         i = 0
 5623     s = s[i:]
 5624     # add back some pad bytes.  this could be done more efficiently w.r.t. the
 5625     # de-padding being done above, but sigh...
 5626     if blocksize > 0 and len(s) % blocksize:
 5627         s = (blocksize - len(s) % blocksize) * b'\000' + s
 5628     return s
 5629 
 5630 
 5631 def bytes_to_long(s):
 5632     """bytes_to_long(string) : long
 5633     Convert a byte string to a long integer.
 5634 
 5635     This is (essentially) the inverse of long_to_bytes().
 5636     """
 5637     acc = 0
 5638     length = len(s)
 5639     if length % 4:
 5640         extra = (4 - length % 4)
 5641         s = b'\000' * extra + s
 5642         length = length + extra
 5643     for i in range(0, length, 4):
 5644         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
 5645     return acc
 5646 
 5647 
 5648 def ohdave_rsa_encrypt(data, exponent, modulus):
 5649     '''
 5650     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
 5651 
 5652     Input:
 5653         data: data to encrypt, bytes-like object
 5654         exponent, modulus: parameter e and N of RSA algorithm, both integer
 5655     Output: hex string of encrypted data
 5656 
 5657     Limitation: supports one block encryption only
 5658     '''
 5659 
 5660     payload = int(binascii.hexlify(data[::-1]), 16)
 5661     encrypted = pow(payload, exponent, modulus)
 5662     return '%x' % encrypted
 5663 
 5664 
 5665 def pkcs1pad(data, length):
 5666     """
 5667     Padding input data with PKCS#1 scheme
 5668 
 5669     @param {int[]} data        input data
 5670     @param {int}   length      target length
 5671     @returns {int[]}           padded data
 5672     """
 5673     if len(data) > length - 11:
 5674         raise ValueError('Input data too long for PKCS#1 padding')
 5675 
 5676     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
 5677     return [0, 2] + pseudo_random + [0] + data
 5678 
 5679 
 5680 def encode_base_n(num, n, table=None):
 5681     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 5682     if not table:
 5683         table = FULL_TABLE[:n]
 5684 
 5685     if n > len(table):
 5686         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
 5687 
 5688     if num == 0:
 5689         return table[0]
 5690 
 5691     ret = ''
 5692     while num:
 5693         ret = table[num % n] + ret
 5694         num = num // n
 5695     return ret
 5696 
 5697 
 5698 def decode_packed_codes(code):
 5699     mobj = re.search(PACKED_CODES_RE, code)
 5700     obfuscated_code, base, count, symbols = mobj.groups()
 5701     base = int(base)
 5702     count = int(count)
 5703     symbols = symbols.split('|')
 5704     symbol_table = {}
 5705 
 5706     while count:
 5707         count -= 1
 5708         base_n_count = encode_base_n(count, base)
 5709         symbol_table[base_n_count] = symbols[count] or base_n_count
 5710 
 5711     return re.sub(
 5712         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
 5713         obfuscated_code)
 5714 
 5715 
 5716 def caesar(s, alphabet, shift):
 5717     if shift == 0:
 5718         return s
 5719     l = len(alphabet)
 5720     return ''.join(
 5721         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
 5722         for c in s)
 5723 
 5724 
 5725 def rot47(s):
 5726     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
 5727 
 5728 
 5729 def parse_m3u8_attributes(attrib):
 5730     info = {}
 5731     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
 5732         if val.startswith('"'):
 5733             val = val[1:-1]
 5734         info[key] = val
 5735     return info
 5736 
 5737 
 5738 def urshift(val, n):
 5739     return val >> n if val >= 0 else (val + 0x100000000) >> n
 5740 
 5741 
 5742 # Based on png2str() written by @gdkchan and improved by @yokrysty
 5743 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
 5744 def decode_png(png_data):
 5745     # Reference: https://www.w3.org/TR/PNG/
 5746     header = png_data[8:]
 5747 
 5748     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
 5749         raise IOError('Not a valid PNG file.')
 5750 
 5751     int_map = {1: '>B', 2: '>H', 4: '>I'}
 5752     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
 5753 
 5754     chunks = []
 5755 
 5756     while header:
 5757         length = unpack_integer(header[:4])
 5758         header = header[4:]
 5759 
 5760         chunk_type = header[:4]
 5761         header = header[4:]
 5762 
 5763         chunk_data = header[:length]
 5764         header = header[length:]
 5765 
 5766         header = header[4:]  # Skip CRC
 5767 
 5768         chunks.append({
 5769             'type': chunk_type,
 5770             'length': length,
 5771             'data': chunk_data
 5772         })
 5773 
 5774     ihdr = chunks[0]['data']
 5775 
 5776     width = unpack_integer(ihdr[:4])
 5777     height = unpack_integer(ihdr[4:8])
 5778 
 5779     idat = b''
 5780 
 5781     for chunk in chunks:
 5782         if chunk['type'] == b'IDAT':
 5783             idat += chunk['data']
 5784 
 5785     if not idat:
 5786         raise IOError('Unable to read PNG data.')
 5787 
 5788     decompressed_data = bytearray(zlib.decompress(idat))
 5789 
 5790     stride = width * 3
 5791     pixels = []
 5792 
 5793     def _get_pixel(idx):
 5794         x = idx % stride
 5795         y = idx // stride
 5796         return pixels[y][x]
 5797 
 5798     for y in range(height):
 5799         basePos = y * (1 + stride)
 5800         filter_type = decompressed_data[basePos]
 5801 
 5802         current_row = []
 5803 
 5804         pixels.append(current_row)
 5805 
 5806         for x in range(stride):
 5807             color = decompressed_data[1 + basePos + x]
 5808             basex = y * stride + x
 5809             left = 0
 5810             up = 0
 5811 
 5812             if x > 2:
 5813                 left = _get_pixel(basex - 3)
 5814             if y > 0:
 5815                 up = _get_pixel(basex - stride)
 5816 
 5817             if filter_type == 1:  # Sub
 5818                 color = (color + left) & 0xff
 5819             elif filter_type == 2:  # Up
 5820                 color = (color + up) & 0xff
 5821             elif filter_type == 3:  # Average
 5822                 color = (color + ((left + up) >> 1)) & 0xff
 5823             elif filter_type == 4:  # Paeth
 5824                 a = left
 5825                 b = up
 5826                 c = 0
 5827 
 5828                 if x > 2 and y > 0:
 5829                     c = _get_pixel(basex - stride - 3)
 5830 
 5831                 p = a + b - c
 5832 
 5833                 pa = abs(p - a)
 5834                 pb = abs(p - b)
 5835                 pc = abs(p - c)
 5836 
 5837                 if pa <= pb and pa <= pc:
 5838                     color = (color + a) & 0xff
 5839                 elif pb <= pc:
 5840                     color = (color + b) & 0xff
 5841                 else:
 5842                     color = (color + c) & 0xff
 5843 
 5844             current_row.append(color)
 5845 
 5846     return width, height, pixels
 5847 
 5848 
 5849 def write_xattr(path, key, value):
 5850     # This mess below finds the best xattr tool for the job
 5851     try:
 5852         # try the pyxattr module...
 5853         import xattr
 5854 
 5855         if hasattr(xattr, 'set'):  # pyxattr
 5856             # Unicode arguments are not supported in python-pyxattr until
 5857             # version 0.5.0
 5858             # See https://github.com/ytdl-org/youtube-dl/issues/5498
 5859             pyxattr_required_version = '0.5.0'
 5860             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
 5861                 # TODO: fallback to CLI tools
 5862                 raise XAttrUnavailableError(
 5863                     'python-pyxattr is detected but is too old. '
 5864                     'youtube-dl requires %s or above while your version is %s. '
 5865                     'Falling back to other xattr implementations' % (
 5866                         pyxattr_required_version, xattr.__version__))
 5867 
 5868             setxattr = xattr.set
 5869         else:  # xattr
 5870             setxattr = xattr.setxattr
 5871 
 5872         try:
 5873             setxattr(path, key, value)
 5874         except EnvironmentError as e:
 5875             raise XAttrMetadataError(e.errno, e.strerror)
 5876 
 5877     except ImportError:
 5878         if compat_os_name == 'nt':
 5879             # Write xattrs to NTFS Alternate Data Streams:
 5880             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
 5881             assert ':' not in key
 5882             assert os.path.exists(path)
 5883 
 5884             ads_fn = path + ':' + key
 5885             try:
 5886                 with open(ads_fn, 'wb') as f:
 5887                     f.write(value)
 5888             except EnvironmentError as e:
 5889                 raise XAttrMetadataError(e.errno, e.strerror)
 5890         else:
 5891             user_has_setfattr = check_executable('setfattr', ['--version'])
 5892             user_has_xattr = check_executable('xattr', ['-h'])
 5893 
 5894             if user_has_setfattr or user_has_xattr:
 5895 
 5896                 value = value.decode('utf-8')
 5897                 if user_has_setfattr:
 5898                     executable = 'setfattr'
 5899                     opts = ['-n', key, '-v', value]
 5900                 elif user_has_xattr:
 5901                     executable = 'xattr'
 5902                     opts = ['-w', key, value]
 5903 
 5904                 cmd = ([encodeFilename(executable, True)]
 5905                        + [encodeArgument(o) for o in opts]
 5906                        + [encodeFilename(path, True)])
 5907 
 5908                 try:
 5909                     p = subprocess.Popen(
 5910                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
 5911                 except EnvironmentError as e:
 5912                     raise XAttrMetadataError(e.errno, e.strerror)
 5913                 stdout, stderr = process_communicate_or_kill(p)
 5914                 stderr = stderr.decode('utf-8', 'replace')
 5915                 if p.returncode != 0:
 5916                     raise XAttrMetadataError(p.returncode, stderr)
 5917 
 5918             else:
 5919                 # On Unix, and can't find pyxattr, setfattr, or xattr.
 5920                 if sys.platform.startswith('linux'):
 5921                     raise XAttrUnavailableError(
 5922                         "Couldn't find a tool to set the xattrs. "
 5923                         "Install either the python 'pyxattr' or 'xattr' "
 5924                         "modules, or the GNU 'attr' package "
 5925                         "(which contains the 'setfattr' tool).")
 5926                 else:
 5927                     raise XAttrUnavailableError(
 5928                         "Couldn't find a tool to set the xattrs. "
 5929                         "Install either the python 'xattr' module, "
 5930                         "or the 'xattr' binary.")
 5931 
 5932 
 5933 def random_birthday(year_field, month_field, day_field):
 5934     start_date = datetime.date(1950, 1, 1)
 5935     end_date = datetime.date(1995, 12, 31)
 5936     offset = random.randint(0, (end_date - start_date).days)
 5937     random_date = start_date + datetime.timedelta(offset)
 5938     return {
 5939         year_field: str(random_date.year),
 5940         month_field: str(random_date.month),
 5941         day_field: str(random_date.day),
 5942     }
 5943 
 5944 
 5945 def clean_podcast_url(url):
 5946     return re.sub(r'''(?x)
 5947         (?:
 5948             (?:
 5949                 chtbl\.com/track|
 5950                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
 5951                 play\.podtrac\.com
 5952             )/[^/]+|
 5953             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
 5954             flex\.acast\.com|
 5955             pd(?:
 5956                 cn\.co| # https://podcorn.com/analytics-prefix/
 5957                 st\.fm # https://podsights.com/docs/
 5958             )/e
 5959         )/''', '', url)
 5960 
 5961 
 5962 def traverse_obj(obj, *paths, **kwargs):
 5963     """
 5964     Safely traverse nested `dict`s and `Sequence`s
 5965 
 5966     >>> obj = [{}, {"key": "value"}]
 5967     >>> traverse_obj(obj, (1, "key"))
 5968     "value"
 5969 
 5970     Each of the provided `paths` is tested and the first producing a valid result will be returned.
 5971     The next path will also be tested if the path branched but no results could be found.
 5972     Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
 5973     A value of None is treated as the absence of a value.
 5974 
 5975     The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
 5976 
 5977     The keys in the path can be one of:
 5978         - `None`:           Return the current object.
 5979         - `str`/`int`:      Return `obj[key]`. For `re.Match, return `obj.group(key)`.
 5980         - `slice`:          Branch out and return all values in `obj[key]`.
 5981         - `Ellipsis`:       Branch out and return a list of all values.
 5982         - `tuple`/`list`:   Branch out and return a list of all matching values.
 5983                             Read as: `[traverse_obj(obj, branch) for branch in branches]`.
 5984         - `function`:       Branch out and return values filtered by the function.
 5985                             Read as: `[value for key, value in obj if function(key, value)]`.
 5986                             For `Sequence`s, `key` is the index of the value.
 5987         - `dict`            Transform the current object and return a matching dict.
 5988                             Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
 5989 
 5990         `tuple`, `list`, and `dict` all support nested paths and branches.
 5991 
 5992     @params paths           Paths which to traverse by.
 5993     Keyword arguments:
 5994     @param default          Value to return if the paths do not match.
 5995     @param expected_type    If a `type`, only accept final values of this type.
 5996                             If any other callable, try to call the function on each result.
 5997     @param get_all          If `False`, return the first matching result, otherwise all matching ones.
 5998     @param casesense        If `False`, consider string dictionary keys as case insensitive.
 5999 
 6000     The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
 6001 
 6002     @param _is_user_input    Whether the keys are generated from user input.
 6003                             If `True` strings get converted to `int`/`slice` if needed.
 6004     @param _traverse_string  Whether to traverse into objects as strings.
 6005                             If `True`, any non-compatible object will first be
 6006                             converted into a string and then traversed into.
 6007 
 6008 
 6009     @returns                The result of the object traversal.
 6010                             If successful, `get_all=True`, and the path branches at least once,
 6011                             then a list of results is returned instead.
 6012                             A list is always returned if the last path branches and no `default` is given.
 6013     """
 6014 
 6015     # parameter defaults
 6016     default = kwargs.get('default', NO_DEFAULT)
 6017     expected_type = kwargs.get('expected_type')
 6018     get_all = kwargs.get('get_all', True)
 6019     casesense = kwargs.get('casesense', True)
 6020     _is_user_input = kwargs.get('_is_user_input', False)
 6021     _traverse_string = kwargs.get('_traverse_string', False)
 6022 
 6023     # instant compat
 6024     str = compat_str
 6025 
 6026     is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
 6027     # stand-in until compat_re_Match is added
 6028     compat_re_Match = type(re.match('a', 'a'))
 6029     # stand-in until casefold.py is added
 6030     try:
 6031         ''.casefold()
 6032         compat_casefold = lambda s: s.casefold()
 6033     except AttributeError:
 6034         compat_casefold = lambda s: s.lower()
 6035     casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
 6036 
 6037     if isinstance(expected_type, type):
 6038         type_test = lambda val: val if isinstance(val, expected_type) else None
 6039     else:
 6040         type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
 6041 
 6042     def from_iterable(iterables):
 6043         # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
 6044         for it in iterables:
 6045             for item in it:
 6046                 yield item
 6047 
 6048     def apply_key(key, obj):
 6049         if obj is None:
 6050             return
 6051 
 6052         elif key is None:
 6053             yield obj
 6054 
 6055         elif isinstance(key, (list, tuple)):
 6056             for branch in key:
 6057                 _, result = apply_path(obj, branch)
 6058                 for item in result:
 6059                     yield item
 6060 
 6061         elif key is Ellipsis:
 6062             result = []
 6063             if isinstance(obj, compat_collections_abc.Mapping):
 6064                 result = obj.values()
 6065             elif is_sequence(obj):
 6066                 result = obj
 6067             elif isinstance(obj, compat_re_Match):
 6068                 result = obj.groups()
 6069             elif _traverse_string:
 6070                 result = str(obj)
 6071             for item in result:
 6072                 yield item
 6073 
 6074         elif callable(key):
 6075             if is_sequence(obj):
 6076                 iter_obj = enumerate(obj)
 6077             elif isinstance(obj, compat_collections_abc.Mapping):
 6078                 iter_obj = obj.items()
 6079             elif isinstance(obj, compat_re_Match):
 6080                 iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
 6081             elif _traverse_string:
 6082                 iter_obj = enumerate(str(obj))
 6083             else:
 6084                 return
 6085             for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
 6086                 yield item
 6087 
 6088         elif isinstance(key, dict):
 6089             iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
 6090             yield dict((k, v if v is not None else default) for k, v in iter_obj
 6091                        if v is not None or default is not NO_DEFAULT)
 6092 
 6093         elif isinstance(obj, compat_collections_abc.Mapping):
 6094             yield (obj.get(key) if casesense or (key in obj)
 6095                    else next((v for k, v in obj.items() if casefold(k) == key), None))
 6096 
 6097         elif isinstance(obj, compat_re_Match):
 6098             if isinstance(key, int) or casesense:
 6099                 try:
 6100                     yield obj.group(key)
 6101                     return
 6102                 except IndexError:
 6103                     pass
 6104             if not isinstance(key, str):
 6105                 return
 6106 
 6107             yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
 6108 
 6109         else:
 6110             if _is_user_input:
 6111                 key = (int_or_none(key) if ':' not in key
 6112                        else slice(*map(int_or_none, key.split(':'))))
 6113 
 6114             if not isinstance(key, (int, slice)):
 6115                 return
 6116 
 6117             if not is_sequence(obj):
 6118                 if not _traverse_string:
 6119                     return
 6120                 obj = str(obj)
 6121 
 6122             try:
 6123                 yield obj[key]
 6124             except IndexError:
 6125                 pass
 6126 
 6127     def apply_path(start_obj, path):
 6128         objs = (start_obj,)
 6129         has_branched = False
 6130 
 6131         for key in variadic(path):
 6132             if _is_user_input and key == ':':
 6133                 key = Ellipsis
 6134 
 6135             if not casesense and isinstance(key, str):
 6136                 key = compat_casefold(key)
 6137 
 6138             if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
 6139                 has_branched = True
 6140 
 6141             key_func = functools.partial(apply_key, key)
 6142             objs = from_iterable(map(key_func, objs))
 6143 
 6144         return has_branched, objs
 6145 
 6146     def _traverse_obj(obj, path, use_list=True):
 6147         has_branched, results = apply_path(obj, path)
 6148         results = LazyList(x for x in map(type_test, results) if x is not None)
 6149 
 6150         if get_all and has_branched:
 6151             return results.exhaust() if results or use_list else None
 6152 
 6153         return results[0] if results else None
 6154 
 6155     for index, path in enumerate(paths, 1):
 6156         use_list = default is NO_DEFAULT and index == len(paths)
 6157         result = _traverse_obj(obj, path, use_list)
 6158         if result is not None:
 6159             return result
 6160 
 6161     return None if default is NO_DEFAULT else default
 6162 
 6163 
 6164 def get_first(obj, keys, **kwargs):
 6165     return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
 6166 
 6167 
 6168 def join_nonempty(*values, **kwargs):
 6169 
 6170     # parameter defaults
 6171     delim = kwargs.get('delim', '-')
 6172     from_dict = kwargs.get('from_dict')
 6173 
 6174     if from_dict is not None:
 6175         values = (traverse_obj(from_dict, variadic(v)) for v in values)
 6176     return delim.join(map(compat_str, filter(None, values)))

Generated by cgit