summaryrefslogtreecommitdiff
path: root/youtube_dl/utils.py
blob: 23a65a81c2a35a3037ea83d8b27345741fc251af (plain)
    1 #!/usr/bin/env python
    2 # coding: utf-8
    3 
    4 from __future__ import unicode_literals
    5 
    6 import base64
    7 import binascii
    8 import calendar
    9 import codecs
   10 import collections
   11 import contextlib
   12 import ctypes
   13 import datetime
   14 import email.utils
   15 import email.header
   16 import errno
   17 import functools
   18 import gzip
   19 import io
   20 import itertools
   21 import json
   22 import locale
   23 import math
   24 import operator
   25 import os
   26 import platform
   27 import random
   28 import re
   29 import socket
   30 import ssl
   31 import subprocess
   32 import sys
   33 import tempfile
   34 import time
   35 import traceback
   36 import unicodedata
   37 import xml.etree.ElementTree
   38 import zlib
   39 
   40 from .compat import (
   41     compat_HTMLParseError,
   42     compat_HTMLParser,
   43     compat_HTTPError,
   44     compat_basestring,
   45     compat_chr,
   46     compat_cookiejar,
   47     compat_ctypes_WINFUNCTYPE,
   48     compat_etree_fromstring,
   49     compat_expanduser,
   50     compat_html_entities,
   51     compat_html_entities_html5,
   52     compat_http_client,
   53     compat_integer_types,
   54     compat_kwargs,
   55     compat_os_name,
   56     compat_parse_qs,
   57     compat_shlex_quote,
   58     compat_str,
   59     compat_struct_pack,
   60     compat_struct_unpack,
   61     compat_urllib_error,
   62     compat_urllib_parse,
   63     compat_urllib_parse_urlencode,
   64     compat_urllib_parse_urlparse,
   65     compat_urllib_parse_unquote_plus,
   66     compat_urllib_request,
   67     compat_urlparse,
   68     compat_xpath,
   69 )
   70 
   71 from .socks import (
   72     ProxyType,
   73     sockssocket,
   74 )
   75 
   76 
   77 def register_socks_protocols():
   78     # "Register" SOCKS protocols
   79     # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
   80     # URLs with protocols not in urlparse.uses_netloc are not handled correctly
   81     for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
   82         if scheme not in compat_urlparse.uses_netloc:
   83             compat_urlparse.uses_netloc.append(scheme)
   84 
   85 
   86 # This is not clearly defined otherwise
   87 compiled_regex_type = type(re.compile(''))
   88 
   89 
   90 def random_user_agent():
   91     _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
   92     _CHROME_VERSIONS = (
   93         '74.0.3729.129',
   94         '76.0.3780.3',
   95         '76.0.3780.2',
   96         '74.0.3729.128',
   97         '76.0.3780.1',
   98         '76.0.3780.0',
   99         '75.0.3770.15',
  100         '74.0.3729.127',
  101         '74.0.3729.126',
  102         '76.0.3779.1',
  103         '76.0.3779.0',
  104         '75.0.3770.14',
  105         '74.0.3729.125',
  106         '76.0.3778.1',
  107         '76.0.3778.0',
  108         '75.0.3770.13',
  109         '74.0.3729.124',
  110         '74.0.3729.123',
  111         '73.0.3683.121',
  112         '76.0.3777.1',
  113         '76.0.3777.0',
  114         '75.0.3770.12',
  115         '74.0.3729.122',
  116         '76.0.3776.4',
  117         '75.0.3770.11',
  118         '74.0.3729.121',
  119         '76.0.3776.3',
  120         '76.0.3776.2',
  121         '73.0.3683.120',
  122         '74.0.3729.120',
  123         '74.0.3729.119',
  124         '74.0.3729.118',
  125         '76.0.3776.1',
  126         '76.0.3776.0',
  127         '76.0.3775.5',
  128         '75.0.3770.10',
  129         '74.0.3729.117',
  130         '76.0.3775.4',
  131         '76.0.3775.3',
  132         '74.0.3729.116',
  133         '75.0.3770.9',
  134         '76.0.3775.2',
  135         '76.0.3775.1',
  136         '76.0.3775.0',
  137         '75.0.3770.8',
  138         '74.0.3729.115',
  139         '74.0.3729.114',
  140         '76.0.3774.1',
  141         '76.0.3774.0',
  142         '75.0.3770.7',
  143         '74.0.3729.113',
  144         '74.0.3729.112',
  145         '74.0.3729.111',
  146         '76.0.3773.1',
  147         '76.0.3773.0',
  148         '75.0.3770.6',
  149         '74.0.3729.110',
  150         '74.0.3729.109',
  151         '76.0.3772.1',
  152         '76.0.3772.0',
  153         '75.0.3770.5',
  154         '74.0.3729.108',
  155         '74.0.3729.107',
  156         '76.0.3771.1',
  157         '76.0.3771.0',
  158         '75.0.3770.4',
  159         '74.0.3729.106',
  160         '74.0.3729.105',
  161         '75.0.3770.3',
  162         '74.0.3729.104',
  163         '74.0.3729.103',
  164         '74.0.3729.102',
  165         '75.0.3770.2',
  166         '74.0.3729.101',
  167         '75.0.3770.1',
  168         '75.0.3770.0',
  169         '74.0.3729.100',
  170         '75.0.3769.5',
  171         '75.0.3769.4',
  172         '74.0.3729.99',
  173         '75.0.3769.3',
  174         '75.0.3769.2',
  175         '75.0.3768.6',
  176         '74.0.3729.98',
  177         '75.0.3769.1',
  178         '75.0.3769.0',
  179         '74.0.3729.97',
  180         '73.0.3683.119',
  181         '73.0.3683.118',
  182         '74.0.3729.96',
  183         '75.0.3768.5',
  184         '75.0.3768.4',
  185         '75.0.3768.3',
  186         '75.0.3768.2',
  187         '74.0.3729.95',
  188         '74.0.3729.94',
  189         '75.0.3768.1',
  190         '75.0.3768.0',
  191         '74.0.3729.93',
  192         '74.0.3729.92',
  193         '73.0.3683.117',
  194         '74.0.3729.91',
  195         '75.0.3766.3',
  196         '74.0.3729.90',
  197         '75.0.3767.2',
  198         '75.0.3767.1',
  199         '75.0.3767.0',
  200         '74.0.3729.89',
  201         '73.0.3683.116',
  202         '75.0.3766.2',
  203         '74.0.3729.88',
  204         '75.0.3766.1',
  205         '75.0.3766.0',
  206         '74.0.3729.87',
  207         '73.0.3683.115',
  208         '74.0.3729.86',
  209         '75.0.3765.1',
  210         '75.0.3765.0',
  211         '74.0.3729.85',
  212         '73.0.3683.114',
  213         '74.0.3729.84',
  214         '75.0.3764.1',
  215         '75.0.3764.0',
  216         '74.0.3729.83',
  217         '73.0.3683.113',
  218         '75.0.3763.2',
  219         '75.0.3761.4',
  220         '74.0.3729.82',
  221         '75.0.3763.1',
  222         '75.0.3763.0',
  223         '74.0.3729.81',
  224         '73.0.3683.112',
  225         '75.0.3762.1',
  226         '75.0.3762.0',
  227         '74.0.3729.80',
  228         '75.0.3761.3',
  229         '74.0.3729.79',
  230         '73.0.3683.111',
  231         '75.0.3761.2',
  232         '74.0.3729.78',
  233         '74.0.3729.77',
  234         '75.0.3761.1',
  235         '75.0.3761.0',
  236         '73.0.3683.110',
  237         '74.0.3729.76',
  238         '74.0.3729.75',
  239         '75.0.3760.0',
  240         '74.0.3729.74',
  241         '75.0.3759.8',
  242         '75.0.3759.7',
  243         '75.0.3759.6',
  244         '74.0.3729.73',
  245         '75.0.3759.5',
  246         '74.0.3729.72',
  247         '73.0.3683.109',
  248         '75.0.3759.4',
  249         '75.0.3759.3',
  250         '74.0.3729.71',
  251         '75.0.3759.2',
  252         '74.0.3729.70',
  253         '73.0.3683.108',
  254         '74.0.3729.69',
  255         '75.0.3759.1',
  256         '75.0.3759.0',
  257         '74.0.3729.68',
  258         '73.0.3683.107',
  259         '74.0.3729.67',
  260         '75.0.3758.1',
  261         '75.0.3758.0',
  262         '74.0.3729.66',
  263         '73.0.3683.106',
  264         '74.0.3729.65',
  265         '75.0.3757.1',
  266         '75.0.3757.0',
  267         '74.0.3729.64',
  268         '73.0.3683.105',
  269         '74.0.3729.63',
  270         '75.0.3756.1',
  271         '75.0.3756.0',
  272         '74.0.3729.62',
  273         '73.0.3683.104',
  274         '75.0.3755.3',
  275         '75.0.3755.2',
  276         '73.0.3683.103',
  277         '75.0.3755.1',
  278         '75.0.3755.0',
  279         '74.0.3729.61',
  280         '73.0.3683.102',
  281         '74.0.3729.60',
  282         '75.0.3754.2',
  283         '74.0.3729.59',
  284         '75.0.3753.4',
  285         '74.0.3729.58',
  286         '75.0.3754.1',
  287         '75.0.3754.0',
  288         '74.0.3729.57',
  289         '73.0.3683.101',
  290         '75.0.3753.3',
  291         '75.0.3752.2',
  292         '75.0.3753.2',
  293         '74.0.3729.56',
  294         '75.0.3753.1',
  295         '75.0.3753.0',
  296         '74.0.3729.55',
  297         '73.0.3683.100',
  298         '74.0.3729.54',
  299         '75.0.3752.1',
  300         '75.0.3752.0',
  301         '74.0.3729.53',
  302         '73.0.3683.99',
  303         '74.0.3729.52',
  304         '75.0.3751.1',
  305         '75.0.3751.0',
  306         '74.0.3729.51',
  307         '73.0.3683.98',
  308         '74.0.3729.50',
  309         '75.0.3750.0',
  310         '74.0.3729.49',
  311         '74.0.3729.48',
  312         '74.0.3729.47',
  313         '75.0.3749.3',
  314         '74.0.3729.46',
  315         '73.0.3683.97',
  316         '75.0.3749.2',
  317         '74.0.3729.45',
  318         '75.0.3749.1',
  319         '75.0.3749.0',
  320         '74.0.3729.44',
  321         '73.0.3683.96',
  322         '74.0.3729.43',
  323         '74.0.3729.42',
  324         '75.0.3748.1',
  325         '75.0.3748.0',
  326         '74.0.3729.41',
  327         '75.0.3747.1',
  328         '73.0.3683.95',
  329         '75.0.3746.4',
  330         '74.0.3729.40',
  331         '74.0.3729.39',
  332         '75.0.3747.0',
  333         '75.0.3746.3',
  334         '75.0.3746.2',
  335         '74.0.3729.38',
  336         '75.0.3746.1',
  337         '75.0.3746.0',
  338         '74.0.3729.37',
  339         '73.0.3683.94',
  340         '75.0.3745.5',
  341         '75.0.3745.4',
  342         '75.0.3745.3',
  343         '75.0.3745.2',
  344         '74.0.3729.36',
  345         '75.0.3745.1',
  346         '75.0.3745.0',
  347         '75.0.3744.2',
  348         '74.0.3729.35',
  349         '73.0.3683.93',
  350         '74.0.3729.34',
  351         '75.0.3744.1',
  352         '75.0.3744.0',
  353         '74.0.3729.33',
  354         '73.0.3683.92',
  355         '74.0.3729.32',
  356         '74.0.3729.31',
  357         '73.0.3683.91',
  358         '75.0.3741.2',
  359         '75.0.3740.5',
  360         '74.0.3729.30',
  361         '75.0.3741.1',
  362         '75.0.3741.0',
  363         '74.0.3729.29',
  364         '75.0.3740.4',
  365         '73.0.3683.90',
  366         '74.0.3729.28',
  367         '75.0.3740.3',
  368         '73.0.3683.89',
  369         '75.0.3740.2',
  370         '74.0.3729.27',
  371         '75.0.3740.1',
  372         '75.0.3740.0',
  373         '74.0.3729.26',
  374         '73.0.3683.88',
  375         '73.0.3683.87',
  376         '74.0.3729.25',
  377         '75.0.3739.1',
  378         '75.0.3739.0',
  379         '73.0.3683.86',
  380         '74.0.3729.24',
  381         '73.0.3683.85',
  382         '75.0.3738.4',
  383         '75.0.3738.3',
  384         '75.0.3738.2',
  385         '75.0.3738.1',
  386         '75.0.3738.0',
  387         '74.0.3729.23',
  388         '73.0.3683.84',
  389         '74.0.3729.22',
  390         '74.0.3729.21',
  391         '75.0.3737.1',
  392         '75.0.3737.0',
  393         '74.0.3729.20',
  394         '73.0.3683.83',
  395         '74.0.3729.19',
  396         '75.0.3736.1',
  397         '75.0.3736.0',
  398         '74.0.3729.18',
  399         '73.0.3683.82',
  400         '74.0.3729.17',
  401         '75.0.3735.1',
  402         '75.0.3735.0',
  403         '74.0.3729.16',
  404         '73.0.3683.81',
  405         '75.0.3734.1',
  406         '75.0.3734.0',
  407         '74.0.3729.15',
  408         '73.0.3683.80',
  409         '74.0.3729.14',
  410         '75.0.3733.1',
  411         '75.0.3733.0',
  412         '75.0.3732.1',
  413         '74.0.3729.13',
  414         '74.0.3729.12',
  415         '73.0.3683.79',
  416         '74.0.3729.11',
  417         '75.0.3732.0',
  418         '74.0.3729.10',
  419         '73.0.3683.78',
  420         '74.0.3729.9',
  421         '74.0.3729.8',
  422         '74.0.3729.7',
  423         '75.0.3731.3',
  424         '75.0.3731.2',
  425         '75.0.3731.0',
  426         '74.0.3729.6',
  427         '73.0.3683.77',
  428         '73.0.3683.76',
  429         '75.0.3730.5',
  430         '75.0.3730.4',
  431         '73.0.3683.75',
  432         '74.0.3729.5',
  433         '73.0.3683.74',
  434         '75.0.3730.3',
  435         '75.0.3730.2',
  436         '74.0.3729.4',
  437         '73.0.3683.73',
  438         '73.0.3683.72',
  439         '75.0.3730.1',
  440         '75.0.3730.0',
  441         '74.0.3729.3',
  442         '73.0.3683.71',
  443         '74.0.3729.2',
  444         '73.0.3683.70',
  445         '74.0.3729.1',
  446         '74.0.3729.0',
  447         '74.0.3726.4',
  448         '73.0.3683.69',
  449         '74.0.3726.3',
  450         '74.0.3728.0',
  451         '74.0.3726.2',
  452         '73.0.3683.68',
  453         '74.0.3726.1',
  454         '74.0.3726.0',
  455         '74.0.3725.4',
  456         '73.0.3683.67',
  457         '73.0.3683.66',
  458         '74.0.3725.3',
  459         '74.0.3725.2',
  460         '74.0.3725.1',
  461         '74.0.3724.8',
  462         '74.0.3725.0',
  463         '73.0.3683.65',
  464         '74.0.3724.7',
  465         '74.0.3724.6',
  466         '74.0.3724.5',
  467         '74.0.3724.4',
  468         '74.0.3724.3',
  469         '74.0.3724.2',
  470         '74.0.3724.1',
  471         '74.0.3724.0',
  472         '73.0.3683.64',
  473         '74.0.3723.1',
  474         '74.0.3723.0',
  475         '73.0.3683.63',
  476         '74.0.3722.1',
  477         '74.0.3722.0',
  478         '73.0.3683.62',
  479         '74.0.3718.9',
  480         '74.0.3702.3',
  481         '74.0.3721.3',
  482         '74.0.3721.2',
  483         '74.0.3721.1',
  484         '74.0.3721.0',
  485         '74.0.3720.6',
  486         '73.0.3683.61',
  487         '72.0.3626.122',
  488         '73.0.3683.60',
  489         '74.0.3720.5',
  490         '72.0.3626.121',
  491         '74.0.3718.8',
  492         '74.0.3720.4',
  493         '74.0.3720.3',
  494         '74.0.3718.7',
  495         '74.0.3720.2',
  496         '74.0.3720.1',
  497         '74.0.3720.0',
  498         '74.0.3718.6',
  499         '74.0.3719.5',
  500         '73.0.3683.59',
  501         '74.0.3718.5',
  502         '74.0.3718.4',
  503         '74.0.3719.4',
  504         '74.0.3719.3',
  505         '74.0.3719.2',
  506         '74.0.3719.1',
  507         '73.0.3683.58',
  508         '74.0.3719.0',
  509         '73.0.3683.57',
  510         '73.0.3683.56',
  511         '74.0.3718.3',
  512         '73.0.3683.55',
  513         '74.0.3718.2',
  514         '74.0.3718.1',
  515         '74.0.3718.0',
  516         '73.0.3683.54',
  517         '74.0.3717.2',
  518         '73.0.3683.53',
  519         '74.0.3717.1',
  520         '74.0.3717.0',
  521         '73.0.3683.52',
  522         '74.0.3716.1',
  523         '74.0.3716.0',
  524         '73.0.3683.51',
  525         '74.0.3715.1',
  526         '74.0.3715.0',
  527         '73.0.3683.50',
  528         '74.0.3711.2',
  529         '74.0.3714.2',
  530         '74.0.3713.3',
  531         '74.0.3714.1',
  532         '74.0.3714.0',
  533         '73.0.3683.49',
  534         '74.0.3713.1',
  535         '74.0.3713.0',
  536         '72.0.3626.120',
  537         '73.0.3683.48',
  538         '74.0.3712.2',
  539         '74.0.3712.1',
  540         '74.0.3712.0',
  541         '73.0.3683.47',
  542         '72.0.3626.119',
  543         '73.0.3683.46',
  544         '74.0.3710.2',
  545         '72.0.3626.118',
  546         '74.0.3711.1',
  547         '74.0.3711.0',
  548         '73.0.3683.45',
  549         '72.0.3626.117',
  550         '74.0.3710.1',
  551         '74.0.3710.0',
  552         '73.0.3683.44',
  553         '72.0.3626.116',
  554         '74.0.3709.1',
  555         '74.0.3709.0',
  556         '74.0.3704.9',
  557         '73.0.3683.43',
  558         '72.0.3626.115',
  559         '74.0.3704.8',
  560         '74.0.3704.7',
  561         '74.0.3708.0',
  562         '74.0.3706.7',
  563         '74.0.3704.6',
  564         '73.0.3683.42',
  565         '72.0.3626.114',
  566         '74.0.3706.6',
  567         '72.0.3626.113',
  568         '74.0.3704.5',
  569         '74.0.3706.5',
  570         '74.0.3706.4',
  571         '74.0.3706.3',
  572         '74.0.3706.2',
  573         '74.0.3706.1',
  574         '74.0.3706.0',
  575         '73.0.3683.41',
  576         '72.0.3626.112',
  577         '74.0.3705.1',
  578         '74.0.3705.0',
  579         '73.0.3683.40',
  580         '72.0.3626.111',
  581         '73.0.3683.39',
  582         '74.0.3704.4',
  583         '73.0.3683.38',
  584         '74.0.3704.3',
  585         '74.0.3704.2',
  586         '74.0.3704.1',
  587         '74.0.3704.0',
  588         '73.0.3683.37',
  589         '72.0.3626.110',
  590         '72.0.3626.109',
  591         '74.0.3703.3',
  592         '74.0.3703.2',
  593         '73.0.3683.36',
  594         '74.0.3703.1',
  595         '74.0.3703.0',
  596         '73.0.3683.35',
  597         '72.0.3626.108',
  598         '74.0.3702.2',
  599         '74.0.3699.3',
  600         '74.0.3702.1',
  601         '74.0.3702.0',
  602         '73.0.3683.34',
  603         '72.0.3626.107',
  604         '73.0.3683.33',
  605         '74.0.3701.1',
  606         '74.0.3701.0',
  607         '73.0.3683.32',
  608         '73.0.3683.31',
  609         '72.0.3626.105',
  610         '74.0.3700.1',
  611         '74.0.3700.0',
  612         '73.0.3683.29',
  613         '72.0.3626.103',
  614         '74.0.3699.2',
  615         '74.0.3699.1',
  616         '74.0.3699.0',
  617         '73.0.3683.28',
  618         '72.0.3626.102',
  619         '73.0.3683.27',
  620         '73.0.3683.26',
  621         '74.0.3698.0',
  622         '74.0.3696.2',
  623         '72.0.3626.101',
  624         '73.0.3683.25',
  625         '74.0.3696.1',
  626         '74.0.3696.0',
  627         '74.0.3694.8',
  628         '72.0.3626.100',
  629         '74.0.3694.7',
  630         '74.0.3694.6',
  631         '74.0.3694.5',
  632         '74.0.3694.4',
  633         '72.0.3626.99',
  634         '72.0.3626.98',
  635         '74.0.3694.3',
  636         '73.0.3683.24',
  637         '72.0.3626.97',
  638         '72.0.3626.96',
  639         '72.0.3626.95',
  640         '73.0.3683.23',
  641         '72.0.3626.94',
  642         '73.0.3683.22',
  643         '73.0.3683.21',
  644         '72.0.3626.93',
  645         '74.0.3694.2',
  646         '72.0.3626.92',
  647         '74.0.3694.1',
  648         '74.0.3694.0',
  649         '74.0.3693.6',
  650         '73.0.3683.20',
  651         '72.0.3626.91',
  652         '74.0.3693.5',
  653         '74.0.3693.4',
  654         '74.0.3693.3',
  655         '74.0.3693.2',
  656         '73.0.3683.19',
  657         '74.0.3693.1',
  658         '74.0.3693.0',
  659         '73.0.3683.18',
  660         '72.0.3626.90',
  661         '74.0.3692.1',
  662         '74.0.3692.0',
  663         '73.0.3683.17',
  664         '72.0.3626.89',
  665         '74.0.3687.3',
  666         '74.0.3691.1',
  667         '74.0.3691.0',
  668         '73.0.3683.16',
  669         '72.0.3626.88',
  670         '72.0.3626.87',
  671         '73.0.3683.15',
  672         '74.0.3690.1',
  673         '74.0.3690.0',
  674         '73.0.3683.14',
  675         '72.0.3626.86',
  676         '73.0.3683.13',
  677         '73.0.3683.12',
  678         '74.0.3689.1',
  679         '74.0.3689.0',
  680         '73.0.3683.11',
  681         '72.0.3626.85',
  682         '73.0.3683.10',
  683         '72.0.3626.84',
  684         '73.0.3683.9',
  685         '74.0.3688.1',
  686         '74.0.3688.0',
  687         '73.0.3683.8',
  688         '72.0.3626.83',
  689         '74.0.3687.2',
  690         '74.0.3687.1',
  691         '74.0.3687.0',
  692         '73.0.3683.7',
  693         '72.0.3626.82',
  694         '74.0.3686.4',
  695         '72.0.3626.81',
  696         '74.0.3686.3',
  697         '74.0.3686.2',
  698         '74.0.3686.1',
  699         '74.0.3686.0',
  700         '73.0.3683.6',
  701         '72.0.3626.80',
  702         '74.0.3685.1',
  703         '74.0.3685.0',
  704         '73.0.3683.5',
  705         '72.0.3626.79',
  706         '74.0.3684.1',
  707         '74.0.3684.0',
  708         '73.0.3683.4',
  709         '72.0.3626.78',
  710         '72.0.3626.77',
  711         '73.0.3683.3',
  712         '73.0.3683.2',
  713         '72.0.3626.76',
  714         '73.0.3683.1',
  715         '73.0.3683.0',
  716         '72.0.3626.75',
  717         '71.0.3578.141',
  718         '73.0.3682.1',
  719         '73.0.3682.0',
  720         '72.0.3626.74',
  721         '71.0.3578.140',
  722         '73.0.3681.4',
  723         '73.0.3681.3',
  724         '73.0.3681.2',
  725         '73.0.3681.1',
  726         '73.0.3681.0',
  727         '72.0.3626.73',
  728         '71.0.3578.139',
  729         '72.0.3626.72',
  730         '72.0.3626.71',
  731         '73.0.3680.1',
  732         '73.0.3680.0',
  733         '72.0.3626.70',
  734         '71.0.3578.138',
  735         '73.0.3678.2',
  736         '73.0.3679.1',
  737         '73.0.3679.0',
  738         '72.0.3626.69',
  739         '71.0.3578.137',
  740         '73.0.3678.1',
  741         '73.0.3678.0',
  742         '71.0.3578.136',
  743         '73.0.3677.1',
  744         '73.0.3677.0',
  745         '72.0.3626.68',
  746         '72.0.3626.67',
  747         '71.0.3578.135',
  748         '73.0.3676.1',
  749         '73.0.3676.0',
  750         '73.0.3674.2',
  751         '72.0.3626.66',
  752         '71.0.3578.134',
  753         '73.0.3674.1',
  754         '73.0.3674.0',
  755         '72.0.3626.65',
  756         '71.0.3578.133',
  757         '73.0.3673.2',
  758         '73.0.3673.1',
  759         '73.0.3673.0',
  760         '72.0.3626.64',
  761         '71.0.3578.132',
  762         '72.0.3626.63',
  763         '72.0.3626.62',
  764         '72.0.3626.61',
  765         '72.0.3626.60',
  766         '73.0.3672.1',
  767         '73.0.3672.0',
  768         '72.0.3626.59',
  769         '71.0.3578.131',
  770         '73.0.3671.3',
  771         '73.0.3671.2',
  772         '73.0.3671.1',
  773         '73.0.3671.0',
  774         '72.0.3626.58',
  775         '71.0.3578.130',
  776         '73.0.3670.1',
  777         '73.0.3670.0',
  778         '72.0.3626.57',
  779         '71.0.3578.129',
  780         '73.0.3669.1',
  781         '73.0.3669.0',
  782         '72.0.3626.56',
  783         '71.0.3578.128',
  784         '73.0.3668.2',
  785         '73.0.3668.1',
  786         '73.0.3668.0',
  787         '72.0.3626.55',
  788         '71.0.3578.127',
  789         '73.0.3667.2',
  790         '73.0.3667.1',
  791         '73.0.3667.0',
  792         '72.0.3626.54',
  793         '71.0.3578.126',
  794         '73.0.3666.1',
  795         '73.0.3666.0',
  796         '72.0.3626.53',
  797         '71.0.3578.125',
  798         '73.0.3665.4',
  799         '73.0.3665.3',
  800         '72.0.3626.52',
  801         '73.0.3665.2',
  802         '73.0.3664.4',
  803         '73.0.3665.1',
  804         '73.0.3665.0',
  805         '72.0.3626.51',
  806         '71.0.3578.124',
  807         '72.0.3626.50',
  808         '73.0.3664.3',
  809         '73.0.3664.2',
  810         '73.0.3664.1',
  811         '73.0.3664.0',
  812         '73.0.3663.2',
  813         '72.0.3626.49',
  814         '71.0.3578.123',
  815         '73.0.3663.1',
  816         '73.0.3663.0',
  817         '72.0.3626.48',
  818         '71.0.3578.122',
  819         '73.0.3662.1',
  820         '73.0.3662.0',
  821         '72.0.3626.47',
  822         '71.0.3578.121',
  823         '73.0.3661.1',
  824         '72.0.3626.46',
  825         '73.0.3661.0',
  826         '72.0.3626.45',
  827         '71.0.3578.120',
  828         '73.0.3660.2',
  829         '73.0.3660.1',
  830         '73.0.3660.0',
  831         '72.0.3626.44',
  832         '71.0.3578.119',
  833         '73.0.3659.1',
  834         '73.0.3659.0',
  835         '72.0.3626.43',
  836         '71.0.3578.118',
  837         '73.0.3658.1',
  838         '73.0.3658.0',
  839         '72.0.3626.42',
  840         '71.0.3578.117',
  841         '73.0.3657.1',
  842         '73.0.3657.0',
  843         '72.0.3626.41',
  844         '71.0.3578.116',
  845         '73.0.3656.1',
  846         '73.0.3656.0',
  847         '72.0.3626.40',
  848         '71.0.3578.115',
  849         '73.0.3655.1',
  850         '73.0.3655.0',
  851         '72.0.3626.39',
  852         '71.0.3578.114',
  853         '73.0.3654.1',
  854         '73.0.3654.0',
  855         '72.0.3626.38',
  856         '71.0.3578.113',
  857         '73.0.3653.1',
  858         '73.0.3653.0',
  859         '72.0.3626.37',
  860         '71.0.3578.112',
  861         '73.0.3652.1',
  862         '73.0.3652.0',
  863         '72.0.3626.36',
  864         '71.0.3578.111',
  865         '73.0.3651.1',
  866         '73.0.3651.0',
  867         '72.0.3626.35',
  868         '71.0.3578.110',
  869         '73.0.3650.1',
  870         '73.0.3650.0',
  871         '72.0.3626.34',
  872         '71.0.3578.109',
  873         '73.0.3649.1',
  874         '73.0.3649.0',
  875         '72.0.3626.33',
  876         '71.0.3578.108',
  877         '73.0.3648.2',
  878         '73.0.3648.1',
  879         '73.0.3648.0',
  880         '72.0.3626.32',
  881         '71.0.3578.107',
  882         '73.0.3647.2',
  883         '73.0.3647.1',
  884         '73.0.3647.0',
  885         '72.0.3626.31',
  886         '71.0.3578.106',
  887         '73.0.3635.3',
  888         '73.0.3646.2',
  889         '73.0.3646.1',
  890         '73.0.3646.0',
  891         '72.0.3626.30',
  892         '71.0.3578.105',
  893         '72.0.3626.29',
  894         '73.0.3645.2',
  895         '73.0.3645.1',
  896         '73.0.3645.0',
  897         '72.0.3626.28',
  898         '71.0.3578.104',
  899         '72.0.3626.27',
  900         '72.0.3626.26',
  901         '72.0.3626.25',
  902         '72.0.3626.24',
  903         '73.0.3644.0',
  904         '73.0.3643.2',
  905         '72.0.3626.23',
  906         '71.0.3578.103',
  907         '73.0.3643.1',
  908         '73.0.3643.0',
  909         '72.0.3626.22',
  910         '71.0.3578.102',
  911         '73.0.3642.1',
  912         '73.0.3642.0',
  913         '72.0.3626.21',
  914         '71.0.3578.101',
  915         '73.0.3641.1',
  916         '73.0.3641.0',
  917         '72.0.3626.20',
  918         '71.0.3578.100',
  919         '72.0.3626.19',
  920         '73.0.3640.1',
  921         '73.0.3640.0',
  922         '72.0.3626.18',
  923         '73.0.3639.1',
  924         '71.0.3578.99',
  925         '73.0.3639.0',
  926         '72.0.3626.17',
  927         '73.0.3638.2',
  928         '72.0.3626.16',
  929         '73.0.3638.1',
  930         '73.0.3638.0',
  931         '72.0.3626.15',
  932         '71.0.3578.98',
  933         '73.0.3635.2',
  934         '71.0.3578.97',
  935         '73.0.3637.1',
  936         '73.0.3637.0',
  937         '72.0.3626.14',
  938         '71.0.3578.96',
  939         '71.0.3578.95',
  940         '72.0.3626.13',
  941         '71.0.3578.94',
  942         '73.0.3636.2',
  943         '71.0.3578.93',
  944         '73.0.3636.1',
  945         '73.0.3636.0',
  946         '72.0.3626.12',
  947         '71.0.3578.92',
  948         '73.0.3635.1',
  949         '73.0.3635.0',
  950         '72.0.3626.11',
  951         '71.0.3578.91',
  952         '73.0.3634.2',
  953         '73.0.3634.1',
  954         '73.0.3634.0',
  955         '72.0.3626.10',
  956         '71.0.3578.90',
  957         '71.0.3578.89',
  958         '73.0.3633.2',
  959         '73.0.3633.1',
  960         '73.0.3633.0',
  961         '72.0.3610.4',
  962         '72.0.3626.9',
  963         '71.0.3578.88',
  964         '73.0.3632.5',
  965         '73.0.3632.4',
  966         '73.0.3632.3',
  967         '73.0.3632.2',
  968         '73.0.3632.1',
  969         '73.0.3632.0',
  970         '72.0.3626.8',
  971         '71.0.3578.87',
  972         '73.0.3631.2',
  973         '73.0.3631.1',
  974         '73.0.3631.0',
  975         '72.0.3626.7',
  976         '71.0.3578.86',
  977         '72.0.3626.6',
  978         '73.0.3630.1',
  979         '73.0.3630.0',
  980         '72.0.3626.5',
  981         '71.0.3578.85',
  982         '72.0.3626.4',
  983         '73.0.3628.3',
  984         '73.0.3628.2',
  985         '73.0.3629.1',
  986         '73.0.3629.0',
  987         '72.0.3626.3',
  988         '71.0.3578.84',
  989         '73.0.3628.1',
  990         '73.0.3628.0',
  991         '71.0.3578.83',
  992         '73.0.3627.1',
  993         '73.0.3627.0',
  994         '72.0.3626.2',
  995         '71.0.3578.82',
  996         '71.0.3578.81',
  997         '71.0.3578.80',
  998         '72.0.3626.1',
  999         '72.0.3626.0',
 1000         '71.0.3578.79',
 1001         '70.0.3538.124',
 1002         '71.0.3578.78',
 1003         '72.0.3623.4',
 1004         '72.0.3625.2',
 1005         '72.0.3625.1',
 1006         '72.0.3625.0',
 1007         '71.0.3578.77',
 1008         '70.0.3538.123',
 1009         '72.0.3624.4',
 1010         '72.0.3624.3',
 1011         '72.0.3624.2',
 1012         '71.0.3578.76',
 1013         '72.0.3624.1',
 1014         '72.0.3624.0',
 1015         '72.0.3623.3',
 1016         '71.0.3578.75',
 1017         '70.0.3538.122',
 1018         '71.0.3578.74',
 1019         '72.0.3623.2',
 1020         '72.0.3610.3',
 1021         '72.0.3623.1',
 1022         '72.0.3623.0',
 1023         '72.0.3622.3',
 1024         '72.0.3622.2',
 1025         '71.0.3578.73',
 1026         '70.0.3538.121',
 1027         '72.0.3622.1',
 1028         '72.0.3622.0',
 1029         '71.0.3578.72',
 1030         '70.0.3538.120',
 1031         '72.0.3621.1',
 1032         '72.0.3621.0',
 1033         '71.0.3578.71',
 1034         '70.0.3538.119',
 1035         '72.0.3620.1',
 1036         '72.0.3620.0',
 1037         '71.0.3578.70',
 1038         '70.0.3538.118',
 1039         '71.0.3578.69',
 1040         '72.0.3619.1',
 1041         '72.0.3619.0',
 1042         '71.0.3578.68',
 1043         '70.0.3538.117',
 1044         '71.0.3578.67',
 1045         '72.0.3618.1',
 1046         '72.0.3618.0',
 1047         '71.0.3578.66',
 1048         '70.0.3538.116',
 1049         '72.0.3617.1',
 1050         '72.0.3617.0',
 1051         '71.0.3578.65',
 1052         '70.0.3538.115',
 1053         '72.0.3602.3',
 1054         '71.0.3578.64',
 1055         '72.0.3616.1',
 1056         '72.0.3616.0',
 1057         '71.0.3578.63',
 1058         '70.0.3538.114',
 1059         '71.0.3578.62',
 1060         '72.0.3615.1',
 1061         '72.0.3615.0',
 1062         '71.0.3578.61',
 1063         '70.0.3538.113',
 1064         '72.0.3614.1',
 1065         '72.0.3614.0',
 1066         '71.0.3578.60',
 1067         '70.0.3538.112',
 1068         '72.0.3613.1',
 1069         '72.0.3613.0',
 1070         '71.0.3578.59',
 1071         '70.0.3538.111',
 1072         '72.0.3612.2',
 1073         '72.0.3612.1',
 1074         '72.0.3612.0',
 1075         '70.0.3538.110',
 1076         '71.0.3578.58',
 1077         '70.0.3538.109',
 1078         '72.0.3611.2',
 1079         '72.0.3611.1',
 1080         '72.0.3611.0',
 1081         '71.0.3578.57',
 1082         '70.0.3538.108',
 1083         '72.0.3610.2',
 1084         '71.0.3578.56',
 1085         '71.0.3578.55',
 1086         '72.0.3610.1',
 1087         '72.0.3610.0',
 1088         '71.0.3578.54',
 1089         '70.0.3538.107',
 1090         '71.0.3578.53',
 1091         '72.0.3609.3',
 1092         '71.0.3578.52',
 1093         '72.0.3609.2',
 1094         '71.0.3578.51',
 1095         '72.0.3608.5',
 1096         '72.0.3609.1',
 1097         '72.0.3609.0',
 1098         '71.0.3578.50',
 1099         '70.0.3538.106',
 1100         '72.0.3608.4',
 1101         '72.0.3608.3',
 1102         '72.0.3608.2',
 1103         '71.0.3578.49',
 1104         '72.0.3608.1',
 1105         '72.0.3608.0',
 1106         '70.0.3538.105',
 1107         '71.0.3578.48',
 1108         '72.0.3607.1',
 1109         '72.0.3607.0',
 1110         '71.0.3578.47',
 1111         '70.0.3538.104',
 1112         '72.0.3606.2',
 1113         '72.0.3606.1',
 1114         '72.0.3606.0',
 1115         '71.0.3578.46',
 1116         '70.0.3538.103',
 1117         '70.0.3538.102',
 1118         '72.0.3605.3',
 1119         '72.0.3605.2',
 1120         '72.0.3605.1',
 1121         '72.0.3605.0',
 1122         '71.0.3578.45',
 1123         '70.0.3538.101',
 1124         '71.0.3578.44',
 1125         '71.0.3578.43',
 1126         '70.0.3538.100',
 1127         '70.0.3538.99',
 1128         '71.0.3578.42',
 1129         '72.0.3604.1',
 1130         '72.0.3604.0',
 1131         '71.0.3578.41',
 1132         '70.0.3538.98',
 1133         '71.0.3578.40',
 1134         '72.0.3603.2',
 1135         '72.0.3603.1',
 1136         '72.0.3603.0',
 1137         '71.0.3578.39',
 1138         '70.0.3538.97',
 1139         '72.0.3602.2',
 1140         '71.0.3578.38',
 1141         '71.0.3578.37',
 1142         '72.0.3602.1',
 1143         '72.0.3602.0',
 1144         '71.0.3578.36',
 1145         '70.0.3538.96',
 1146         '72.0.3601.1',
 1147         '72.0.3601.0',
 1148         '71.0.3578.35',
 1149         '70.0.3538.95',
 1150         '72.0.3600.1',
 1151         '72.0.3600.0',
 1152         '71.0.3578.34',
 1153         '70.0.3538.94',
 1154         '72.0.3599.3',
 1155         '72.0.3599.2',
 1156         '72.0.3599.1',
 1157         '72.0.3599.0',
 1158         '71.0.3578.33',
 1159         '70.0.3538.93',
 1160         '72.0.3598.1',
 1161         '72.0.3598.0',
 1162         '71.0.3578.32',
 1163         '70.0.3538.87',
 1164         '72.0.3597.1',
 1165         '72.0.3597.0',
 1166         '72.0.3596.2',
 1167         '71.0.3578.31',
 1168         '70.0.3538.86',
 1169         '71.0.3578.30',
 1170         '71.0.3578.29',
 1171         '72.0.3596.1',
 1172         '72.0.3596.0',
 1173         '71.0.3578.28',
 1174         '70.0.3538.85',
 1175         '72.0.3595.2',
 1176         '72.0.3591.3',
 1177         '72.0.3595.1',
 1178         '72.0.3595.0',
 1179         '71.0.3578.27',
 1180         '70.0.3538.84',
 1181         '72.0.3594.1',
 1182         '72.0.3594.0',
 1183         '71.0.3578.26',
 1184         '70.0.3538.83',
 1185         '72.0.3593.2',
 1186         '72.0.3593.1',
 1187         '72.0.3593.0',
 1188         '71.0.3578.25',
 1189         '70.0.3538.82',
 1190         '72.0.3589.3',
 1191         '72.0.3592.2',
 1192         '72.0.3592.1',
 1193         '72.0.3592.0',
 1194         '71.0.3578.24',
 1195         '72.0.3589.2',
 1196         '70.0.3538.81',
 1197         '70.0.3538.80',
 1198         '72.0.3591.2',
 1199         '72.0.3591.1',
 1200         '72.0.3591.0',
 1201         '71.0.3578.23',
 1202         '70.0.3538.79',
 1203         '71.0.3578.22',
 1204         '72.0.3590.1',
 1205         '72.0.3590.0',
 1206         '71.0.3578.21',
 1207         '70.0.3538.78',
 1208         '70.0.3538.77',
 1209         '72.0.3589.1',
 1210         '72.0.3589.0',
 1211         '71.0.3578.20',
 1212         '70.0.3538.76',
 1213         '71.0.3578.19',
 1214         '70.0.3538.75',
 1215         '72.0.3588.1',
 1216         '72.0.3588.0',
 1217         '71.0.3578.18',
 1218         '70.0.3538.74',
 1219         '72.0.3586.2',
 1220         '72.0.3587.0',
 1221         '71.0.3578.17',
 1222         '70.0.3538.73',
 1223         '72.0.3586.1',
 1224         '72.0.3586.0',
 1225         '71.0.3578.16',
 1226         '70.0.3538.72',
 1227         '72.0.3585.1',
 1228         '72.0.3585.0',
 1229         '71.0.3578.15',
 1230         '70.0.3538.71',
 1231         '71.0.3578.14',
 1232         '72.0.3584.1',
 1233         '72.0.3584.0',
 1234         '71.0.3578.13',
 1235         '70.0.3538.70',
 1236         '72.0.3583.2',
 1237         '71.0.3578.12',
 1238         '72.0.3583.1',
 1239         '72.0.3583.0',
 1240         '71.0.3578.11',
 1241         '70.0.3538.69',
 1242         '71.0.3578.10',
 1243         '72.0.3582.0',
 1244         '72.0.3581.4',
 1245         '71.0.3578.9',
 1246         '70.0.3538.67',
 1247         '72.0.3581.3',
 1248         '72.0.3581.2',
 1249         '72.0.3581.1',
 1250         '72.0.3581.0',
 1251         '71.0.3578.8',
 1252         '70.0.3538.66',
 1253         '72.0.3580.1',
 1254         '72.0.3580.0',
 1255         '71.0.3578.7',
 1256         '70.0.3538.65',
 1257         '71.0.3578.6',
 1258         '72.0.3579.1',
 1259         '72.0.3579.0',
 1260         '71.0.3578.5',
 1261         '70.0.3538.64',
 1262         '71.0.3578.4',
 1263         '71.0.3578.3',
 1264         '71.0.3578.2',
 1265         '71.0.3578.1',
 1266         '71.0.3578.0',
 1267         '70.0.3538.63',
 1268         '69.0.3497.128',
 1269         '70.0.3538.62',
 1270         '70.0.3538.61',
 1271         '70.0.3538.60',
 1272         '70.0.3538.59',
 1273         '71.0.3577.1',
 1274         '71.0.3577.0',
 1275         '70.0.3538.58',
 1276         '69.0.3497.127',
 1277         '71.0.3576.2',
 1278         '71.0.3576.1',
 1279         '71.0.3576.0',
 1280         '70.0.3538.57',
 1281         '70.0.3538.56',
 1282         '71.0.3575.2',
 1283         '70.0.3538.55',
 1284         '69.0.3497.126',
 1285         '70.0.3538.54',
 1286         '71.0.3575.1',
 1287         '71.0.3575.0',
 1288         '71.0.3574.1',
 1289         '71.0.3574.0',
 1290         '70.0.3538.53',
 1291         '69.0.3497.125',
 1292         '70.0.3538.52',
 1293         '71.0.3573.1',
 1294         '71.0.3573.0',
 1295         '70.0.3538.51',
 1296         '69.0.3497.124',
 1297         '71.0.3572.1',
 1298         '71.0.3572.0',
 1299         '70.0.3538.50',
 1300         '69.0.3497.123',
 1301         '71.0.3571.2',
 1302         '70.0.3538.49',
 1303         '69.0.3497.122',
 1304         '71.0.3571.1',
 1305         '71.0.3571.0',
 1306         '70.0.3538.48',
 1307         '69.0.3497.121',
 1308         '71.0.3570.1',
 1309         '71.0.3570.0',
 1310         '70.0.3538.47',
 1311         '69.0.3497.120',
 1312         '71.0.3568.2',
 1313         '71.0.3569.1',
 1314         '71.0.3569.0',
 1315         '70.0.3538.46',
 1316         '69.0.3497.119',
 1317         '70.0.3538.45',
 1318         '71.0.3568.1',
 1319         '71.0.3568.0',
 1320         '70.0.3538.44',
 1321         '69.0.3497.118',
 1322         '70.0.3538.43',
 1323         '70.0.3538.42',
 1324         '71.0.3567.1',
 1325         '71.0.3567.0',
 1326         '70.0.3538.41',
 1327         '69.0.3497.117',
 1328         '71.0.3566.1',
 1329         '71.0.3566.0',
 1330         '70.0.3538.40',
 1331         '69.0.3497.116',
 1332         '71.0.3565.1',
 1333         '71.0.3565.0',
 1334         '70.0.3538.39',
 1335         '69.0.3497.115',
 1336         '71.0.3564.1',
 1337         '71.0.3564.0',
 1338         '70.0.3538.38',
 1339         '69.0.3497.114',
 1340         '71.0.3563.0',
 1341         '71.0.3562.2',
 1342         '70.0.3538.37',
 1343         '69.0.3497.113',
 1344         '70.0.3538.36',
 1345         '70.0.3538.35',
 1346         '71.0.3562.1',
 1347         '71.0.3562.0',
 1348         '70.0.3538.34',
 1349         '69.0.3497.112',
 1350         '70.0.3538.33',
 1351         '71.0.3561.1',
 1352         '71.0.3561.0',
 1353         '70.0.3538.32',
 1354         '69.0.3497.111',
 1355         '71.0.3559.6',
 1356         '71.0.3560.1',
 1357         '71.0.3560.0',
 1358         '71.0.3559.5',
 1359         '71.0.3559.4',
 1360         '70.0.3538.31',
 1361         '69.0.3497.110',
 1362         '71.0.3559.3',
 1363         '70.0.3538.30',
 1364         '69.0.3497.109',
 1365         '71.0.3559.2',
 1366         '71.0.3559.1',
 1367         '71.0.3559.0',
 1368         '70.0.3538.29',
 1369         '69.0.3497.108',
 1370         '71.0.3558.2',
 1371         '71.0.3558.1',
 1372         '71.0.3558.0',
 1373         '70.0.3538.28',
 1374         '69.0.3497.107',
 1375         '71.0.3557.2',
 1376         '71.0.3557.1',
 1377         '71.0.3557.0',
 1378         '70.0.3538.27',
 1379         '69.0.3497.106',
 1380         '71.0.3554.4',
 1381         '70.0.3538.26',
 1382         '71.0.3556.1',
 1383         '71.0.3556.0',
 1384         '70.0.3538.25',
 1385         '71.0.3554.3',
 1386         '69.0.3497.105',
 1387         '71.0.3554.2',
 1388         '70.0.3538.24',
 1389         '69.0.3497.104',
 1390         '71.0.3555.2',
 1391         '70.0.3538.23',
 1392         '71.0.3555.1',
 1393         '71.0.3555.0',
 1394         '70.0.3538.22',
 1395         '69.0.3497.103',
 1396         '71.0.3554.1',
 1397         '71.0.3554.0',
 1398         '70.0.3538.21',
 1399         '69.0.3497.102',
 1400         '71.0.3553.3',
 1401         '70.0.3538.20',
 1402         '69.0.3497.101',
 1403         '71.0.3553.2',
 1404         '69.0.3497.100',
 1405         '71.0.3553.1',
 1406         '71.0.3553.0',
 1407         '70.0.3538.19',
 1408         '69.0.3497.99',
 1409         '69.0.3497.98',
 1410         '69.0.3497.97',
 1411         '71.0.3552.6',
 1412         '71.0.3552.5',
 1413         '71.0.3552.4',
 1414         '71.0.3552.3',
 1415         '71.0.3552.2',
 1416         '71.0.3552.1',
 1417         '71.0.3552.0',
 1418         '70.0.3538.18',
 1419         '69.0.3497.96',
 1420         '71.0.3551.3',
 1421         '71.0.3551.2',
 1422         '71.0.3551.1',
 1423         '71.0.3551.0',
 1424         '70.0.3538.17',
 1425         '69.0.3497.95',
 1426         '71.0.3550.3',
 1427         '71.0.3550.2',
 1428         '71.0.3550.1',
 1429         '71.0.3550.0',
 1430         '70.0.3538.16',
 1431         '69.0.3497.94',
 1432         '71.0.3549.1',
 1433         '71.0.3549.0',
 1434         '70.0.3538.15',
 1435         '69.0.3497.93',
 1436         '69.0.3497.92',
 1437         '71.0.3548.1',
 1438         '71.0.3548.0',
 1439         '70.0.3538.14',
 1440         '69.0.3497.91',
 1441         '71.0.3547.1',
 1442         '71.0.3547.0',
 1443         '70.0.3538.13',
 1444         '69.0.3497.90',
 1445         '71.0.3546.2',
 1446         '69.0.3497.89',
 1447         '71.0.3546.1',
 1448         '71.0.3546.0',
 1449         '70.0.3538.12',
 1450         '69.0.3497.88',
 1451         '71.0.3545.4',
 1452         '71.0.3545.3',
 1453         '71.0.3545.2',
 1454         '71.0.3545.1',
 1455         '71.0.3545.0',
 1456         '70.0.3538.11',
 1457         '69.0.3497.87',
 1458         '71.0.3544.5',
 1459         '71.0.3544.4',
 1460         '71.0.3544.3',
 1461         '71.0.3544.2',
 1462         '71.0.3544.1',
 1463         '71.0.3544.0',
 1464         '69.0.3497.86',
 1465         '70.0.3538.10',
 1466         '69.0.3497.85',
 1467         '70.0.3538.9',
 1468         '69.0.3497.84',
 1469         '71.0.3543.4',
 1470         '70.0.3538.8',
 1471         '71.0.3543.3',
 1472         '71.0.3543.2',
 1473         '71.0.3543.1',
 1474         '71.0.3543.0',
 1475         '70.0.3538.7',
 1476         '69.0.3497.83',
 1477         '71.0.3542.2',
 1478         '71.0.3542.1',
 1479         '71.0.3542.0',
 1480         '70.0.3538.6',
 1481         '69.0.3497.82',
 1482         '69.0.3497.81',
 1483         '71.0.3541.1',
 1484         '71.0.3541.0',
 1485         '70.0.3538.5',
 1486         '69.0.3497.80',
 1487         '71.0.3540.1',
 1488         '71.0.3540.0',
 1489         '70.0.3538.4',
 1490         '69.0.3497.79',
 1491         '70.0.3538.3',
 1492         '71.0.3539.1',
 1493         '71.0.3539.0',
 1494         '69.0.3497.78',
 1495         '68.0.3440.134',
 1496         '69.0.3497.77',
 1497         '70.0.3538.2',
 1498         '70.0.3538.1',
 1499         '70.0.3538.0',
 1500         '69.0.3497.76',
 1501         '68.0.3440.133',
 1502         '69.0.3497.75',
 1503         '70.0.3537.2',
 1504         '70.0.3537.1',
 1505         '70.0.3537.0',
 1506         '69.0.3497.74',
 1507         '68.0.3440.132',
 1508         '70.0.3536.0',
 1509         '70.0.3535.5',
 1510         '70.0.3535.4',
 1511         '70.0.3535.3',
 1512         '69.0.3497.73',
 1513         '68.0.3440.131',
 1514         '70.0.3532.8',
 1515         '70.0.3532.7',
 1516         '69.0.3497.72',
 1517         '69.0.3497.71',
 1518         '70.0.3535.2',
 1519         '70.0.3535.1',
 1520         '70.0.3535.0',
 1521         '69.0.3497.70',
 1522         '68.0.3440.130',
 1523         '69.0.3497.69',
 1524         '68.0.3440.129',
 1525         '70.0.3534.4',
 1526         '70.0.3534.3',
 1527         '70.0.3534.2',
 1528         '70.0.3534.1',
 1529         '70.0.3534.0',
 1530         '69.0.3497.68',
 1531         '68.0.3440.128',
 1532         '70.0.3533.2',
 1533         '70.0.3533.1',
 1534         '70.0.3533.0',
 1535         '69.0.3497.67',
 1536         '68.0.3440.127',
 1537         '70.0.3532.6',
 1538         '70.0.3532.5',
 1539         '70.0.3532.4',
 1540         '69.0.3497.66',
 1541         '68.0.3440.126',
 1542         '70.0.3532.3',
 1543         '70.0.3532.2',
 1544         '70.0.3532.1',
 1545         '69.0.3497.60',
 1546         '69.0.3497.65',
 1547         '69.0.3497.64',
 1548         '70.0.3532.0',
 1549         '70.0.3531.0',
 1550         '70.0.3530.4',
 1551         '70.0.3530.3',
 1552         '70.0.3530.2',
 1553         '69.0.3497.58',
 1554         '68.0.3440.125',
 1555         '69.0.3497.57',
 1556         '69.0.3497.56',
 1557         '69.0.3497.55',
 1558         '69.0.3497.54',
 1559         '70.0.3530.1',
 1560         '70.0.3530.0',
 1561         '69.0.3497.53',
 1562         '68.0.3440.124',
 1563         '69.0.3497.52',
 1564         '70.0.3529.3',
 1565         '70.0.3529.2',
 1566         '70.0.3529.1',
 1567         '70.0.3529.0',
 1568         '69.0.3497.51',
 1569         '70.0.3528.4',
 1570         '68.0.3440.123',
 1571         '70.0.3528.3',
 1572         '70.0.3528.2',
 1573         '70.0.3528.1',
 1574         '70.0.3528.0',
 1575         '69.0.3497.50',
 1576         '68.0.3440.122',
 1577         '70.0.3527.1',
 1578         '70.0.3527.0',
 1579         '69.0.3497.49',
 1580         '68.0.3440.121',
 1581         '70.0.3526.1',
 1582         '70.0.3526.0',
 1583         '68.0.3440.120',
 1584         '69.0.3497.48',
 1585         '69.0.3497.47',
 1586         '68.0.3440.119',
 1587         '68.0.3440.118',
 1588         '70.0.3525.5',
 1589         '70.0.3525.4',
 1590         '70.0.3525.3',
 1591         '68.0.3440.117',
 1592         '69.0.3497.46',
 1593         '70.0.3525.2',
 1594         '70.0.3525.1',
 1595         '70.0.3525.0',
 1596         '69.0.3497.45',
 1597         '68.0.3440.116',
 1598         '70.0.3524.4',
 1599         '70.0.3524.3',
 1600         '69.0.3497.44',
 1601         '70.0.3524.2',
 1602         '70.0.3524.1',
 1603         '70.0.3524.0',
 1604         '70.0.3523.2',
 1605         '69.0.3497.43',
 1606         '68.0.3440.115',
 1607         '70.0.3505.9',
 1608         '69.0.3497.42',
 1609         '70.0.3505.8',
 1610         '70.0.3523.1',
 1611         '70.0.3523.0',
 1612         '69.0.3497.41',
 1613         '68.0.3440.114',
 1614         '70.0.3505.7',
 1615         '69.0.3497.40',
 1616         '70.0.3522.1',
 1617         '70.0.3522.0',
 1618         '70.0.3521.2',
 1619         '69.0.3497.39',
 1620         '68.0.3440.113',
 1621         '70.0.3505.6',
 1622         '70.0.3521.1',
 1623         '70.0.3521.0',
 1624         '69.0.3497.38',
 1625         '68.0.3440.112',
 1626         '70.0.3520.1',
 1627         '70.0.3520.0',
 1628         '69.0.3497.37',
 1629         '68.0.3440.111',
 1630         '70.0.3519.3',
 1631         '70.0.3519.2',
 1632         '70.0.3519.1',
 1633         '70.0.3519.0',
 1634         '69.0.3497.36',
 1635         '68.0.3440.110',
 1636         '70.0.3518.1',
 1637         '70.0.3518.0',
 1638         '69.0.3497.35',
 1639         '69.0.3497.34',
 1640         '68.0.3440.109',
 1641         '70.0.3517.1',
 1642         '70.0.3517.0',
 1643         '69.0.3497.33',
 1644         '68.0.3440.108',
 1645         '69.0.3497.32',
 1646         '70.0.3516.3',
 1647         '70.0.3516.2',
 1648         '70.0.3516.1',
 1649         '70.0.3516.0',
 1650         '69.0.3497.31',
 1651         '68.0.3440.107',
 1652         '70.0.3515.4',
 1653         '68.0.3440.106',
 1654         '70.0.3515.3',
 1655         '70.0.3515.2',
 1656         '70.0.3515.1',
 1657         '70.0.3515.0',
 1658         '69.0.3497.30',
 1659         '68.0.3440.105',
 1660         '68.0.3440.104',
 1661         '70.0.3514.2',
 1662         '70.0.3514.1',
 1663         '70.0.3514.0',
 1664         '69.0.3497.29',
 1665         '68.0.3440.103',
 1666         '70.0.3513.1',
 1667         '70.0.3513.0',
 1668         '69.0.3497.28',
 1669     )
 1670     return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
 1671 
 1672 
 1673 std_headers = {
 1674     'User-Agent': random_user_agent(),
 1675     'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 1676     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 1677     'Accept-Encoding': 'gzip, deflate',
 1678     'Accept-Language': 'en-us,en;q=0.5',
 1679 }
 1680 
 1681 
 1682 USER_AGENTS = {
 1683     'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
 1684 }
 1685 
 1686 
 1687 NO_DEFAULT = object()
 1688 
 1689 ENGLISH_MONTH_NAMES = [
 1690     'January', 'February', 'March', 'April', 'May', 'June',
 1691     'July', 'August', 'September', 'October', 'November', 'December']
 1692 
 1693 MONTH_NAMES = {
 1694     'en': ENGLISH_MONTH_NAMES,
 1695     'fr': [
 1696         'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
 1697         'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
 1698 }
 1699 
 1700 # Timezone names for RFC2822 obs-zone
 1701 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
 1702 TIMEZONE_NAMES = {
 1703     'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
 1704     'AST': -4, 'ADT': -3,  # Atlantic (used in Canada)
 1705     'EST': -5, 'EDT': -4,  # Eastern
 1706     'CST': -6, 'CDT': -5,  # Central
 1707     'MST': -7, 'MDT': -6,  # Mountain
 1708     'PST': -8, 'PDT': -7   # Pacific
 1709 }
 1710 
 1711 KNOWN_EXTENSIONS = (
 1712     'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
 1713     'flv', 'f4v', 'f4a', 'f4b',
 1714     'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
 1715     'mkv', 'mka', 'mk3d',
 1716     'avi', 'divx',
 1717     'mov',
 1718     'asf', 'wmv', 'wma',
 1719     '3gp', '3g2',
 1720     'mp3',
 1721     'flac',
 1722     'ape',
 1723     'wav',
 1724     'f4f', 'f4m', 'm3u8', 'smil')
 1725 
 1726 # needed for sanitizing filenames in restricted mode
 1727 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
 1728                         itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
 1729                                         'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
 1730 
 1731 DATE_FORMATS = (
 1732     '%d %B %Y',
 1733     '%d %b %Y',
 1734     '%B %d %Y',
 1735     '%B %dst %Y',
 1736     '%B %dnd %Y',
 1737     '%B %drd %Y',
 1738     '%B %dth %Y',
 1739     '%b %d %Y',
 1740     '%b %dst %Y',
 1741     '%b %dnd %Y',
 1742     '%b %drd %Y',
 1743     '%b %dth %Y',
 1744     '%b %dst %Y %I:%M',
 1745     '%b %dnd %Y %I:%M',
 1746     '%b %drd %Y %I:%M',
 1747     '%b %dth %Y %I:%M',
 1748     '%Y %m %d',
 1749     '%Y-%m-%d',
 1750     '%Y.%m.%d.',
 1751     '%Y/%m/%d',
 1752     '%Y/%m/%d %H:%M',
 1753     '%Y/%m/%d %H:%M:%S',
 1754     '%Y%m%d%H%M',
 1755     '%Y%m%d%H%M%S',
 1756     '%Y%m%d',
 1757     '%Y-%m-%d %H:%M',
 1758     '%Y-%m-%d %H:%M:%S',
 1759     '%Y-%m-%d %H:%M:%S.%f',
 1760     '%Y-%m-%d %H:%M:%S:%f',
 1761     '%d.%m.%Y %H:%M',
 1762     '%d.%m.%Y %H.%M',
 1763     '%Y-%m-%dT%H:%M:%SZ',
 1764     '%Y-%m-%dT%H:%M:%S.%fZ',
 1765     '%Y-%m-%dT%H:%M:%S.%f0Z',
 1766     '%Y-%m-%dT%H:%M:%S',
 1767     '%Y-%m-%dT%H:%M:%S.%f',
 1768     '%Y-%m-%dT%H:%M',
 1769     '%b %d %Y at %H:%M',
 1770     '%b %d %Y at %H:%M:%S',
 1771     '%B %d %Y at %H:%M',
 1772     '%B %d %Y at %H:%M:%S',
 1773     '%H:%M %d-%b-%Y',
 1774 )
 1775 
 1776 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
 1777 DATE_FORMATS_DAY_FIRST.extend([
 1778     '%d-%m-%Y',
 1779     '%d.%m.%Y',
 1780     '%d.%m.%y',
 1781     '%d/%m/%Y',
 1782     '%d/%m/%y',
 1783     '%d/%m/%Y %H:%M:%S',
 1784     '%d-%m-%Y %H:%M',
 1785 ])
 1786 
 1787 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
 1788 DATE_FORMATS_MONTH_FIRST.extend([
 1789     '%m-%d-%Y',
 1790     '%m.%d.%Y',
 1791     '%m/%d/%Y',
 1792     '%m/%d/%y',
 1793     '%m/%d/%Y %H:%M:%S',
 1794 ])
 1795 
 1796 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
 1797 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
 1798 
 1799 
 1800 def preferredencoding():
 1801     """Get preferred encoding.
 1802 
 1803     Returns the best encoding scheme for the system, based on
 1804     locale.getpreferredencoding() and some further tweaks.
 1805     """
 1806     try:
 1807         pref = locale.getpreferredencoding()
 1808         'TEST'.encode(pref)
 1809     except Exception:
 1810         pref = 'UTF-8'
 1811 
 1812     return pref
 1813 
 1814 
 1815 def write_json_file(obj, fn):
 1816     """ Encode obj as JSON and write it to fn, atomically if possible """
 1817 
 1818     fn = encodeFilename(fn)
 1819     if sys.version_info < (3, 0) and sys.platform != 'win32':
 1820         encoding = get_filesystem_encoding()
 1821         # os.path.basename returns a bytes object, but NamedTemporaryFile
 1822         # will fail if the filename contains non ascii characters unless we
 1823         # use a unicode object
 1824         path_basename = lambda f: os.path.basename(fn).decode(encoding)
 1825         # the same for os.path.dirname
 1826         path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
 1827     else:
 1828         path_basename = os.path.basename
 1829         path_dirname = os.path.dirname
 1830 
 1831     args = {
 1832         'suffix': '.tmp',
 1833         'prefix': path_basename(fn) + '.',
 1834         'dir': path_dirname(fn),
 1835         'delete': False,
 1836     }
 1837 
 1838     # In Python 2.x, json.dump expects a bytestream.
 1839     # In Python 3.x, it writes to a character stream
 1840     if sys.version_info < (3, 0):
 1841         args['mode'] = 'wb'
 1842     else:
 1843         args.update({
 1844             'mode': 'w',
 1845             'encoding': 'utf-8',
 1846         })
 1847 
 1848     tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
 1849 
 1850     try:
 1851         with tf:
 1852             json.dump(obj, tf)
 1853         if sys.platform == 'win32':
 1854             # Need to remove existing file on Windows, else os.rename raises
 1855             # WindowsError or FileExistsError.
 1856             try:
 1857                 os.unlink(fn)
 1858             except OSError:
 1859                 pass
 1860         try:
 1861             mask = os.umask(0)
 1862             os.umask(mask)
 1863             os.chmod(tf.name, 0o666 & ~mask)
 1864         except OSError:
 1865             pass
 1866         os.rename(tf.name, fn)
 1867     except Exception:
 1868         try:
 1869             os.remove(tf.name)
 1870         except OSError:
 1871             pass
 1872         raise
 1873 
 1874 
 1875 if sys.version_info >= (2, 7):
 1876     def find_xpath_attr(node, xpath, key, val=None):
 1877         """ Find the xpath xpath[@key=val] """
 1878         assert re.match(r'^[a-zA-Z_-]+$', key)
 1879         expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
 1880         return node.find(expr)
 1881 else:
 1882     def find_xpath_attr(node, xpath, key, val=None):
 1883         for f in node.findall(compat_xpath(xpath)):
 1884             if key not in f.attrib:
 1885                 continue
 1886             if val is None or f.attrib.get(key) == val:
 1887                 return f
 1888         return None
 1889 
 1890 # On python2.6 the xml.etree.ElementTree.Element methods don't support
 1891 # the namespace parameter
 1892 
 1893 
 1894 def xpath_with_ns(path, ns_map):
 1895     components = [c.split(':') for c in path.split('/')]
 1896     replaced = []
 1897     for c in components:
 1898         if len(c) == 1:
 1899             replaced.append(c[0])
 1900         else:
 1901             ns, tag = c
 1902             replaced.append('{%s}%s' % (ns_map[ns], tag))
 1903     return '/'.join(replaced)
 1904 
 1905 
 1906 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 1907     def _find_xpath(xpath):
 1908         return node.find(compat_xpath(xpath))
 1909 
 1910     if isinstance(xpath, (str, compat_str)):
 1911         n = _find_xpath(xpath)
 1912     else:
 1913         for xp in xpath:
 1914             n = _find_xpath(xp)
 1915             if n is not None:
 1916                 break
 1917 
 1918     if n is None:
 1919         if default is not NO_DEFAULT:
 1920             return default
 1921         elif fatal:
 1922             name = xpath if name is None else name
 1923             raise ExtractorError('Could not find XML element %s' % name)
 1924         else:
 1925             return None
 1926     return n
 1927 
 1928 
 1929 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
 1930     n = xpath_element(node, xpath, name, fatal=fatal, default=default)
 1931     if n is None or n == default:
 1932         return n
 1933     if n.text is None:
 1934         if default is not NO_DEFAULT:
 1935             return default
 1936         elif fatal:
 1937             name = xpath if name is None else name
 1938             raise ExtractorError('Could not find XML element\'s text %s' % name)
 1939         else:
 1940             return None
 1941     return n.text
 1942 
 1943 
 1944 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
 1945     n = find_xpath_attr(node, xpath, key)
 1946     if n is None:
 1947         if default is not NO_DEFAULT:
 1948             return default
 1949         elif fatal:
 1950             name = '%s[@%s]' % (xpath, key) if name is None else name
 1951             raise ExtractorError('Could not find XML attribute %s' % name)
 1952         else:
 1953             return None
 1954     return n.attrib[key]
 1955 
 1956 
 1957 def get_element_by_id(id, html):
 1958     """Return the content of the tag with the specified ID in the passed HTML document"""
 1959     return get_element_by_attribute('id', id, html)
 1960 
 1961 
 1962 def get_element_by_class(class_name, html):
 1963     """Return the content of the first tag with the specified class in the passed HTML document"""
 1964     retval = get_elements_by_class(class_name, html)
 1965     return retval[0] if retval else None
 1966 
 1967 
 1968 def get_element_by_attribute(attribute, value, html, escape_value=True):
 1969     retval = get_elements_by_attribute(attribute, value, html, escape_value)
 1970     return retval[0] if retval else None
 1971 
 1972 
 1973 def get_elements_by_class(class_name, html):
 1974     """Return the content of all tags with the specified class in the passed HTML document as a list"""
 1975     return get_elements_by_attribute(
 1976         'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
 1977         html, escape_value=False)
 1978 
 1979 
 1980 def get_elements_by_attribute(attribute, value, html, escape_value=True):
 1981     """Return the content of the tag with the specified attribute in the passed HTML document"""
 1982 
 1983     value = re.escape(value) if escape_value else value
 1984 
 1985     retlist = []
 1986     for m in re.finditer(r'''(?xs)
 1987         <([a-zA-Z0-9:._-]+)
 1988          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 1989          \s+%s=['"]?%s['"]?
 1990          (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
 1991         \s*>
 1992         (?P<content>.*?)
 1993         </\1>
 1994     ''' % (re.escape(attribute), value), html):
 1995         res = m.group('content')
 1996 
 1997         if res.startswith('"') or res.startswith("'"):
 1998             res = res[1:-1]
 1999 
 2000         retlist.append(unescapeHTML(res))
 2001 
 2002     return retlist
 2003 
 2004 
 2005 class HTMLAttributeParser(compat_HTMLParser):
 2006     """Trivial HTML parser to gather the attributes for a single element"""
 2007     def __init__(self):
 2008         self.attrs = {}
 2009         compat_HTMLParser.__init__(self)
 2010 
 2011     def handle_starttag(self, tag, attrs):
 2012         self.attrs = dict(attrs)
 2013 
 2014 
 2015 def extract_attributes(html_element):
 2016     """Given a string for an HTML element such as
 2017     <el
 2018          a="foo" B="bar" c="&98;az" d=boz
 2019          empty= noval entity="&amp;"
 2020          sq='"' dq="'"
 2021     >
 2022     Decode and return a dictionary of attributes.
 2023     {
 2024         'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
 2025         'empty': '', 'noval': None, 'entity': '&',
 2026         'sq': '"', 'dq': '\''
 2027     }.
 2028     NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
 2029     but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
 2030     """
 2031     parser = HTMLAttributeParser()
 2032     try:
 2033         parser.feed(html_element)
 2034         parser.close()
 2035     # Older Python may throw HTMLParseError in case of malformed HTML
 2036     except compat_HTMLParseError:
 2037         pass
 2038     return parser.attrs
 2039 
 2040 
 2041 def clean_html(html):
 2042     """Clean an HTML snippet into a readable string"""
 2043 
 2044     if html is None:  # Convenience for sanitizing descriptions etc.
 2045         return html
 2046 
 2047     # Newline vs <br />
 2048     html = html.replace('\n', ' ')
 2049     html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
 2050     html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
 2051     # Strip html tags
 2052     html = re.sub('<.*?>', '', html)
 2053     # Replace html entities
 2054     html = unescapeHTML(html)
 2055     return html.strip()
 2056 
 2057 
 2058 def sanitize_open(filename, open_mode):
 2059     """Try to open the given filename, and slightly tweak it if this fails.
 2060 
 2061     Attempts to open the given filename. If this fails, it tries to change
 2062     the filename slightly, step by step, until it's either able to open it
 2063     or it fails and raises a final exception, like the standard open()
 2064     function.
 2065 
 2066     It returns the tuple (stream, definitive_file_name).
 2067     """
 2068     try:
 2069         if filename == '-':
 2070             if sys.platform == 'win32':
 2071                 import msvcrt
 2072                 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
 2073             return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
 2074         stream = open(encodeFilename(filename), open_mode)
 2075         return (stream, filename)
 2076     except (IOError, OSError) as err:
 2077         if err.errno in (errno.EACCES,):
 2078             raise
 2079 
 2080         # In case of error, try to remove win32 forbidden chars
 2081         alt_filename = sanitize_path(filename)
 2082         if alt_filename == filename:
 2083             raise
 2084         else:
 2085             # An exception here should be caught in the caller
 2086             stream = open(encodeFilename(alt_filename), open_mode)
 2087             return (stream, alt_filename)
 2088 
 2089 
 2090 def timeconvert(timestr):
 2091     """Convert RFC 2822 defined time string into system timestamp"""
 2092     timestamp = None
 2093     timetuple = email.utils.parsedate_tz(timestr)
 2094     if timetuple is not None:
 2095         timestamp = email.utils.mktime_tz(timetuple)
 2096     return timestamp
 2097 
 2098 
 2099 def sanitize_filename(s, restricted=False, is_id=False):
 2100     """Sanitizes a string so it could be used as part of a filename.
 2101     If restricted is set, use a stricter subset of allowed characters.
 2102     Set is_id if this is not an arbitrary string, but an ID that should be kept
 2103     if possible.
 2104     """
 2105     def replace_insane(char):
 2106         if restricted and char in ACCENT_CHARS:
 2107             return ACCENT_CHARS[char]
 2108         if char == '?' or ord(char) < 32 or ord(char) == 127:
 2109             return ''
 2110         elif char == '"':
 2111             return '' if restricted else '\''
 2112         elif char == ':':
 2113             return '_-' if restricted else ' -'
 2114         elif char in '\\/|*<>':
 2115             return '_'
 2116         if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
 2117             return '_'
 2118         if restricted and ord(char) > 127:
 2119             return '_'
 2120         return char
 2121 
 2122     # Replace look-alike Unicode glyphs
 2123     if restricted and not is_id:
 2124         s = unicodedata.normalize('NFKC', s)
 2125     # Handle timestamps
 2126     s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
 2127     result = ''.join(map(replace_insane, s))
 2128     if not is_id:
 2129         while '__' in result:
 2130             result = result.replace('__', '_')
 2131         result = result.strip('_')
 2132         # Common case of "Foreign band name - English song title"
 2133         if restricted and result.startswith('-_'):
 2134             result = result[2:]
 2135         if result.startswith('-'):
 2136             result = '_' + result[len('-'):]
 2137         result = result.lstrip('.')
 2138         if not result:
 2139             result = '_'
 2140     return result
 2141 
 2142 
 2143 def sanitize_path(s):
 2144     """Sanitizes and normalizes path on Windows"""
 2145     if sys.platform != 'win32':
 2146         return s
 2147     drive_or_unc, _ = os.path.splitdrive(s)
 2148     if sys.version_info < (2, 7) and not drive_or_unc:
 2149         drive_or_unc, _ = os.path.splitunc(s)
 2150     norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
 2151     if drive_or_unc:
 2152         norm_path.pop(0)
 2153     sanitized_path = [
 2154         path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
 2155         for path_part in norm_path]
 2156     if drive_or_unc:
 2157         sanitized_path.insert(0, drive_or_unc + os.path.sep)
 2158     return os.path.join(*sanitized_path)
 2159 
 2160 
 2161 def sanitize_url(url):
 2162     # Prepend protocol-less URLs with `http:` scheme in order to mitigate
 2163     # the number of unwanted failures due to missing protocol
 2164     if url.startswith('//'):
 2165         return 'http:%s' % url
 2166     # Fix some common typos seen so far
 2167     COMMON_TYPOS = (
 2168         # https://github.com/ytdl-org/youtube-dl/issues/15649
 2169         (r'^httpss://', r'https://'),
 2170         # https://bx1.be/lives/direct-tv/
 2171         (r'^rmtp([es]?)://', r'rtmp\1://'),
 2172     )
 2173     for mistake, fixup in COMMON_TYPOS:
 2174         if re.match(mistake, url):
 2175             return re.sub(mistake, fixup, url)
 2176     return escape_url(url)
 2177 
 2178 
 2179 def sanitized_Request(url, *args, **kwargs):
 2180     return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
 2181 
 2182 
 2183 def expand_path(s):
 2184     """Expand shell variables and ~"""
 2185     return os.path.expandvars(compat_expanduser(s))
 2186 
 2187 
 2188 def orderedSet(iterable):
 2189     """ Remove all duplicates from the input iterable """
 2190     res = []
 2191     for el in iterable:
 2192         if el not in res:
 2193             res.append(el)
 2194     return res
 2195 
 2196 
 2197 def _htmlentity_transform(entity_with_semicolon):
 2198     """Transforms an HTML entity to a character."""
 2199     entity = entity_with_semicolon[:-1]
 2200 
 2201     # Known non-numeric HTML entity
 2202     if entity in compat_html_entities.name2codepoint:
 2203         return compat_chr(compat_html_entities.name2codepoint[entity])
 2204 
 2205     # TODO: HTML5 allows entities without a semicolon. For example,
 2206     # '&Eacuteric' should be decoded as 'Éric'.
 2207     if entity_with_semicolon in compat_html_entities_html5:
 2208         return compat_html_entities_html5[entity_with_semicolon]
 2209 
 2210     mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
 2211     if mobj is not None:
 2212         numstr = mobj.group(1)
 2213         if numstr.startswith('x'):
 2214             base = 16
 2215             numstr = '0%s' % numstr
 2216         else:
 2217             base = 10
 2218         # See https://github.com/ytdl-org/youtube-dl/issues/7518
 2219         try:
 2220             return compat_chr(int(numstr, base))
 2221         except ValueError:
 2222             pass
 2223 
 2224     # Unknown entity in name, return its literal representation
 2225     return '&%s;' % entity
 2226 
 2227 
 2228 def unescapeHTML(s):
 2229     if s is None:
 2230         return None
 2231     assert type(s) == compat_str
 2232 
 2233     return re.sub(
 2234         r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
 2235 
 2236 
 2237 def process_communicate_or_kill(p, *args, **kwargs):
 2238     try:
 2239         return p.communicate(*args, **kwargs)
 2240     except BaseException:  # Including KeyboardInterrupt
 2241         p.kill()
 2242         p.wait()
 2243         raise
 2244 
 2245 
 2246 def get_subprocess_encoding():
 2247     if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 2248         # For subprocess calls, encode with locale encoding
 2249         # Refer to http://stackoverflow.com/a/9951851/35070
 2250         encoding = preferredencoding()
 2251     else:
 2252         encoding = sys.getfilesystemencoding()
 2253     if encoding is None:
 2254         encoding = 'utf-8'
 2255     return encoding
 2256 
 2257 
 2258 def encodeFilename(s, for_subprocess=False):
 2259     """
 2260     @param s The name of the file
 2261     """
 2262 
 2263     assert type(s) == compat_str
 2264 
 2265     # Python 3 has a Unicode API
 2266     if sys.version_info >= (3, 0):
 2267         return s
 2268 
 2269     # Pass '' directly to use Unicode APIs on Windows 2000 and up
 2270     # (Detecting Windows NT 4 is tricky because 'major >= 4' would
 2271     # match Windows 9x series as well. Besides, NT 4 is obsolete.)
 2272     if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
 2273         return s
 2274 
 2275     # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
 2276     if sys.platform.startswith('java'):
 2277         return s
 2278 
 2279     return s.encode(get_subprocess_encoding(), 'ignore')
 2280 
 2281 
 2282 def decodeFilename(b, for_subprocess=False):
 2283 
 2284     if sys.version_info >= (3, 0):
 2285         return b
 2286 
 2287     if not isinstance(b, bytes):
 2288         return b
 2289 
 2290     return b.decode(get_subprocess_encoding(), 'ignore')
 2291 
 2292 
 2293 def encodeArgument(s):
 2294     if not isinstance(s, compat_str):
 2295         # Legacy code that uses byte strings
 2296         # Uncomment the following line after fixing all post processors
 2297         # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
 2298         s = s.decode('ascii')
 2299     return encodeFilename(s, True)
 2300 
 2301 
 2302 def decodeArgument(b):
 2303     return decodeFilename(b, True)
 2304 
 2305 
 2306 def decodeOption(optval):
 2307     if optval is None:
 2308         return optval
 2309     if isinstance(optval, bytes):
 2310         optval = optval.decode(preferredencoding())
 2311 
 2312     assert isinstance(optval, compat_str)
 2313     return optval
 2314 
 2315 
 2316 def formatSeconds(secs):
 2317     if secs > 3600:
 2318         return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
 2319     elif secs > 60:
 2320         return '%d:%02d' % (secs // 60, secs % 60)
 2321     else:
 2322         return '%d' % secs
 2323 
 2324 
 2325 def make_HTTPS_handler(params, **kwargs):
 2326 
 2327     # https://www.rfc-editor.org/info/rfc7301
 2328     ALPN_PROTOCOLS = ['http/1.1']
 2329 
 2330     def set_alpn_protocols(ctx):
 2331         # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
 2332         # Thanks @coletdjnz
 2333         # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
 2334         # https://github.com/python/cpython/issues/85140
 2335         # https://github.com/yt-dlp/yt-dlp/issues/3878
 2336         try:
 2337             ctx.set_alpn_protocols(ALPN_PROTOCOLS)
 2338         except (AttributeError, NotImplementedError):
 2339             # Python < 2.7.10, not ssl.HAS_ALPN
 2340             pass
 2341 
 2342     opts_no_check_certificate = params.get('nocheckcertificate', False)
 2343     if hasattr(ssl, 'create_default_context'):  # Python >= 3.4 or 2.7.9
 2344         context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
 2345         set_alpn_protocols(context)
 2346         if opts_no_check_certificate:
 2347             context.check_hostname = False
 2348             context.verify_mode = ssl.CERT_NONE
 2349 
 2350         try:
 2351             return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 2352         except TypeError:
 2353             # Python 2.7.8
 2354             # (create_default_context present but HTTPSHandler has no context=)
 2355             pass
 2356 
 2357     if sys.version_info < (3, 2):
 2358         return YoutubeDLHTTPSHandler(params, **kwargs)
 2359     else:  # Python < 3.4
 2360         context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
 2361         context.verify_mode = (ssl.CERT_NONE
 2362                                if opts_no_check_certificate
 2363                                else ssl.CERT_REQUIRED)
 2364         context.set_default_verify_paths()
 2365         set_alpn_protocols(context)
 2366         return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
 2367 
 2368 
 2369 def bug_reports_message():
 2370     if ytdl_is_updateable():
 2371         update_cmd = 'type  youtube-dl -U  to update'
 2372     else:
 2373         update_cmd = 'see  https://yt-dl.org/update  on how to update'
 2374     msg = '; please report this issue on https://yt-dl.org/bug .'
 2375     msg += ' Make sure you are using the latest version; %s.' % update_cmd
 2376     msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
 2377     return msg
 2378 
 2379 
 2380 class YoutubeDLError(Exception):
 2381     """Base exception for YoutubeDL errors."""
 2382     pass
 2383 
 2384 
 2385 class ExtractorError(YoutubeDLError):
 2386     """Error during info extraction."""
 2387 
 2388     def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
 2389         """ tb, if given, is the original traceback (so that it can be printed out).
 2390         If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
 2391         """
 2392 
 2393         if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
 2394             expected = True
 2395         if video_id is not None:
 2396             msg = video_id + ': ' + msg
 2397         if cause:
 2398             msg += ' (caused by %r)' % cause
 2399         if not expected:
 2400             msg += bug_reports_message()
 2401         super(ExtractorError, self).__init__(msg)
 2402 
 2403         self.traceback = tb
 2404         self.exc_info = sys.exc_info()  # preserve original exception
 2405         self.cause = cause
 2406         self.video_id = video_id
 2407 
 2408     def format_traceback(self):
 2409         if self.traceback is None:
 2410             return None
 2411         return ''.join(traceback.format_tb(self.traceback))
 2412 
 2413 
 2414 class UnsupportedError(ExtractorError):
 2415     def __init__(self, url):
 2416         super(UnsupportedError, self).__init__(
 2417             'Unsupported URL: %s' % url, expected=True)
 2418         self.url = url
 2419 
 2420 
 2421 class RegexNotFoundError(ExtractorError):
 2422     """Error when a regex didn't match"""
 2423     pass
 2424 
 2425 
 2426 class GeoRestrictedError(ExtractorError):
 2427     """Geographic restriction Error exception.
 2428 
 2429     This exception may be thrown when a video is not available from your
 2430     geographic location due to geographic restrictions imposed by a website.
 2431     """
 2432     def __init__(self, msg, countries=None):
 2433         super(GeoRestrictedError, self).__init__(msg, expected=True)
 2434         self.msg = msg
 2435         self.countries = countries
 2436 
 2437 
 2438 class DownloadError(YoutubeDLError):
 2439     """Download Error exception.
 2440 
 2441     This exception may be thrown by FileDownloader objects if they are not
 2442     configured to continue on errors. They will contain the appropriate
 2443     error message.
 2444     """
 2445 
 2446     def __init__(self, msg, exc_info=None):
 2447         """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
 2448         super(DownloadError, self).__init__(msg)
 2449         self.exc_info = exc_info
 2450 
 2451 
 2452 class SameFileError(YoutubeDLError):
 2453     """Same File exception.
 2454 
 2455     This exception will be thrown by FileDownloader objects if they detect
 2456     multiple files would have to be downloaded to the same file on disk.
 2457     """
 2458     pass
 2459 
 2460 
 2461 class PostProcessingError(YoutubeDLError):
 2462     """Post Processing exception.
 2463 
 2464     This exception may be raised by PostProcessor's .run() method to
 2465     indicate an error in the postprocessing task.
 2466     """
 2467 
 2468     def __init__(self, msg):
 2469         super(PostProcessingError, self).__init__(msg)
 2470         self.msg = msg
 2471 
 2472 
 2473 class MaxDownloadsReached(YoutubeDLError):
 2474     """ --max-downloads limit has been reached. """
 2475     pass
 2476 
 2477 
 2478 class UnavailableVideoError(YoutubeDLError):
 2479     """Unavailable Format exception.
 2480 
 2481     This exception will be thrown when a video is requested
 2482     in a format that is not available for that video.
 2483     """
 2484     pass
 2485 
 2486 
 2487 class ContentTooShortError(YoutubeDLError):
 2488     """Content Too Short exception.
 2489 
 2490     This exception may be raised by FileDownloader objects when a file they
 2491     download is too small for what the server announced first, indicating
 2492     the connection was probably interrupted.
 2493     """
 2494 
 2495     def __init__(self, downloaded, expected):
 2496         super(ContentTooShortError, self).__init__(
 2497             'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
 2498         )
 2499         # Both in bytes
 2500         self.downloaded = downloaded
 2501         self.expected = expected
 2502 
 2503 
 2504 class XAttrMetadataError(YoutubeDLError):
 2505     def __init__(self, code=None, msg='Unknown error'):
 2506         super(XAttrMetadataError, self).__init__(msg)
 2507         self.code = code
 2508         self.msg = msg
 2509 
 2510         # Parsing code and msg
 2511         if (self.code in (errno.ENOSPC, errno.EDQUOT)
 2512                 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
 2513             self.reason = 'NO_SPACE'
 2514         elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
 2515             self.reason = 'VALUE_TOO_LONG'
 2516         else:
 2517             self.reason = 'NOT_SUPPORTED'
 2518 
 2519 
 2520 class XAttrUnavailableError(YoutubeDLError):
 2521     pass
 2522 
 2523 
 2524 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
 2525     # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
 2526     # expected HTTP responses to meet HTTP/1.0 or later (see also
 2527     # https://github.com/ytdl-org/youtube-dl/issues/6727)
 2528     if sys.version_info < (3, 0):
 2529         kwargs['strict'] = True
 2530     hc = http_class(*args, **compat_kwargs(kwargs))
 2531     source_address = ydl_handler._params.get('source_address')
 2532 
 2533     if source_address is not None:
 2534         # This is to workaround _create_connection() from socket where it will try all
 2535         # address data from getaddrinfo() including IPv6. This filters the result from
 2536         # getaddrinfo() based on the source_address value.
 2537         # This is based on the cpython socket.create_connection() function.
 2538         # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
 2539         def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
 2540             host, port = address
 2541             err = None
 2542             addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
 2543             af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
 2544             ip_addrs = [addr for addr in addrs if addr[0] == af]
 2545             if addrs and not ip_addrs:
 2546                 ip_version = 'v4' if af == socket.AF_INET else 'v6'
 2547                 raise socket.error(
 2548                     "No remote IP%s addresses available for connect, can't use '%s' as source address"
 2549                     % (ip_version, source_address[0]))
 2550             for res in ip_addrs:
 2551                 af, socktype, proto, canonname, sa = res
 2552                 sock = None
 2553                 try:
 2554                     sock = socket.socket(af, socktype, proto)
 2555                     if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
 2556                         sock.settimeout(timeout)
 2557                     sock.bind(source_address)
 2558                     sock.connect(sa)
 2559                     err = None  # Explicitly break reference cycle
 2560                     return sock
 2561                 except socket.error as _:
 2562                     err = _
 2563                     if sock is not None:
 2564                         sock.close()
 2565             if err is not None:
 2566                 raise err
 2567             else:
 2568                 raise socket.error('getaddrinfo returns an empty list')
 2569         if hasattr(hc, '_create_connection'):
 2570             hc._create_connection = _create_connection
 2571         sa = (source_address, 0)
 2572         if hasattr(hc, 'source_address'):  # Python 2.7+
 2573             hc.source_address = sa
 2574         else:  # Python 2.6
 2575             def _hc_connect(self, *args, **kwargs):
 2576                 sock = _create_connection(
 2577                     (self.host, self.port), self.timeout, sa)
 2578                 if is_https:
 2579                     self.sock = ssl.wrap_socket(
 2580                         sock, self.key_file, self.cert_file,
 2581                         ssl_version=ssl.PROTOCOL_TLSv1)
 2582                 else:
 2583                     self.sock = sock
 2584             hc.connect = functools.partial(_hc_connect, hc)
 2585 
 2586     return hc
 2587 
 2588 
 2589 def handle_youtubedl_headers(headers):
 2590     filtered_headers = headers
 2591 
 2592     if 'Youtubedl-no-compression' in filtered_headers:
 2593         filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
 2594         del filtered_headers['Youtubedl-no-compression']
 2595 
 2596     return filtered_headers
 2597 
 2598 
 2599 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 2600     """Handler for HTTP requests and responses.
 2601 
 2602     This class, when installed with an OpenerDirector, automatically adds
 2603     the standard headers to every HTTP request and handles gzipped and
 2604     deflated responses from web servers. If compression is to be avoided in
 2605     a particular request, the original request in the program code only has
 2606     to include the HTTP header "Youtubedl-no-compression", which will be
 2607     removed before making the real request.
 2608 
 2609     Part of this code was copied from:
 2610 
 2611     http://techknack.net/python-urllib2-handlers/
 2612 
 2613     Andrew Rowls, the author of that code, agreed to release it to the
 2614     public domain.
 2615     """
 2616 
 2617     def __init__(self, params, *args, **kwargs):
 2618         compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
 2619         self._params = params
 2620 
 2621     def http_open(self, req):
 2622         conn_class = compat_http_client.HTTPConnection
 2623 
 2624         socks_proxy = req.headers.get('Ytdl-socks-proxy')
 2625         if socks_proxy:
 2626             conn_class = make_socks_conn_class(conn_class, socks_proxy)
 2627             del req.headers['Ytdl-socks-proxy']
 2628 
 2629         return self.do_open(functools.partial(
 2630             _create_http_connection, self, conn_class, False),
 2631             req)
 2632 
 2633     @staticmethod
 2634     def deflate(data):
 2635         try:
 2636             return zlib.decompress(data, -zlib.MAX_WBITS)
 2637         except zlib.error:
 2638             return zlib.decompress(data)
 2639 
 2640     def http_request(self, req):
 2641         # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
 2642         # always respected by websites, some tend to give out URLs with non percent-encoded
 2643         # non-ASCII characters (see telemb.py, ard.py [#3412])
 2644         # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
 2645         # To work around aforementioned issue we will replace request's original URL with
 2646         # percent-encoded one
 2647         # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
 2648         # the code of this workaround has been moved here from YoutubeDL.urlopen()
 2649         url = req.get_full_url()
 2650         url_escaped = escape_url(url)
 2651 
 2652         # Substitute URL if any change after escaping
 2653         if url != url_escaped:
 2654             req = update_Request(req, url=url_escaped)
 2655 
 2656         for h, v in std_headers.items():
 2657             # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
 2658             # The dict keys are capitalized because of this bug by urllib
 2659             if h.capitalize() not in req.headers:
 2660                 req.add_header(h, v)
 2661 
 2662         req.headers = handle_youtubedl_headers(req.headers)
 2663 
 2664         if sys.version_info < (2, 7) and '#' in req.get_full_url():
 2665             # Python 2.6 is brain-dead when it comes to fragments
 2666             req._Request__original = req._Request__original.partition('#')[0]
 2667             req._Request__r_type = req._Request__r_type.partition('#')[0]
 2668 
 2669         return req
 2670 
 2671     def http_response(self, req, resp):
 2672         old_resp = resp
 2673         # gzip
 2674         if resp.headers.get('Content-encoding', '') == 'gzip':
 2675             content = resp.read()
 2676             gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
 2677             try:
 2678                 uncompressed = io.BytesIO(gz.read())
 2679             except IOError as original_ioerror:
 2680                 # There may be junk add the end of the file
 2681                 # See http://stackoverflow.com/q/4928560/35070 for details
 2682                 for i in range(1, 1024):
 2683                     try:
 2684                         gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
 2685                         uncompressed = io.BytesIO(gz.read())
 2686                     except IOError:
 2687                         continue
 2688                     break
 2689                 else:
 2690                     raise original_ioerror
 2691             resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
 2692             resp.msg = old_resp.msg
 2693             del resp.headers['Content-encoding']
 2694         # deflate
 2695         if resp.headers.get('Content-encoding', '') == 'deflate':
 2696             gz = io.BytesIO(self.deflate(resp.read()))
 2697             resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
 2698             resp.msg = old_resp.msg
 2699             del resp.headers['Content-encoding']
 2700         # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
 2701         # https://github.com/ytdl-org/youtube-dl/issues/6457).
 2702         if 300 <= resp.code < 400:
 2703             location = resp.headers.get('Location')
 2704             if location:
 2705                 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
 2706                 if sys.version_info >= (3, 0):
 2707                     location = location.encode('iso-8859-1').decode('utf-8')
 2708                 else:
 2709                     location = location.decode('utf-8')
 2710                 location_escaped = escape_url(location)
 2711                 if location != location_escaped:
 2712                     del resp.headers['Location']
 2713                     if sys.version_info < (3, 0):
 2714                         location_escaped = location_escaped.encode('utf-8')
 2715                     resp.headers['Location'] = location_escaped
 2716         return resp
 2717 
 2718     https_request = http_request
 2719     https_response = http_response
 2720 
 2721 
 2722 def make_socks_conn_class(base_class, socks_proxy):
 2723     assert issubclass(base_class, (
 2724         compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
 2725 
 2726     url_components = compat_urlparse.urlparse(socks_proxy)
 2727     if url_components.scheme.lower() == 'socks5':
 2728         socks_type = ProxyType.SOCKS5
 2729     elif url_components.scheme.lower() in ('socks', 'socks4'):
 2730         socks_type = ProxyType.SOCKS4
 2731     elif url_components.scheme.lower() == 'socks4a':
 2732         socks_type = ProxyType.SOCKS4A
 2733 
 2734     def unquote_if_non_empty(s):
 2735         if not s:
 2736             return s
 2737         return compat_urllib_parse_unquote_plus(s)
 2738 
 2739     proxy_args = (
 2740         socks_type,
 2741         url_components.hostname, url_components.port or 1080,
 2742         True,  # Remote DNS
 2743         unquote_if_non_empty(url_components.username),
 2744         unquote_if_non_empty(url_components.password),
 2745     )
 2746 
 2747     class SocksConnection(base_class):
 2748         def connect(self):
 2749             self.sock = sockssocket()
 2750             self.sock.setproxy(*proxy_args)
 2751             if type(self.timeout) in (int, float):
 2752                 self.sock.settimeout(self.timeout)
 2753             self.sock.connect((self.host, self.port))
 2754 
 2755             if isinstance(self, compat_http_client.HTTPSConnection):
 2756                 if hasattr(self, '_context'):  # Python > 2.6
 2757                     self.sock = self._context.wrap_socket(
 2758                         self.sock, server_hostname=self.host)
 2759                 else:
 2760                     self.sock = ssl.wrap_socket(self.sock)
 2761 
 2762     return SocksConnection
 2763 
 2764 
 2765 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
 2766     def __init__(self, params, https_conn_class=None, *args, **kwargs):
 2767         compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
 2768         self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
 2769         self._params = params
 2770 
 2771     def https_open(self, req):
 2772         kwargs = {}
 2773         conn_class = self._https_conn_class
 2774 
 2775         if hasattr(self, '_context'):  # python > 2.6
 2776             kwargs['context'] = self._context
 2777         if hasattr(self, '_check_hostname'):  # python 3.x
 2778             kwargs['check_hostname'] = self._check_hostname
 2779 
 2780         socks_proxy = req.headers.get('Ytdl-socks-proxy')
 2781         if socks_proxy:
 2782             conn_class = make_socks_conn_class(conn_class, socks_proxy)
 2783             del req.headers['Ytdl-socks-proxy']
 2784 
 2785         return self.do_open(functools.partial(
 2786             _create_http_connection, self, conn_class, True),
 2787             req, **kwargs)
 2788 
 2789 
 2790 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
 2791     """
 2792     See [1] for cookie file format.
 2793 
 2794     1. https://curl.haxx.se/docs/http-cookies.html
 2795     """
 2796     _HTTPONLY_PREFIX = '#HttpOnly_'
 2797     _ENTRY_LEN = 7
 2798     _HEADER = '''# Netscape HTTP Cookie File
 2799 # This file is generated by youtube-dl.  Do not edit.
 2800 
 2801 '''
 2802     _CookieFileEntry = collections.namedtuple(
 2803         'CookieFileEntry',
 2804         ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
 2805 
 2806     def save(self, filename=None, ignore_discard=False, ignore_expires=False):
 2807         """
 2808         Save cookies to a file.
 2809 
 2810         Most of the code is taken from CPython 3.8 and slightly adapted
 2811         to support cookie files with UTF-8 in both python 2 and 3.
 2812         """
 2813         if filename is None:
 2814             if self.filename is not None:
 2815                 filename = self.filename
 2816             else:
 2817                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 2818 
 2819         # Store session cookies with `expires` set to 0 instead of an empty
 2820         # string
 2821         for cookie in self:
 2822             if cookie.expires is None:
 2823                 cookie.expires = 0
 2824 
 2825         with io.open(filename, 'w', encoding='utf-8') as f:
 2826             f.write(self._HEADER)
 2827             now = time.time()
 2828             for cookie in self:
 2829                 if not ignore_discard and cookie.discard:
 2830                     continue
 2831                 if not ignore_expires and cookie.is_expired(now):
 2832                     continue
 2833                 if cookie.secure:
 2834                     secure = 'TRUE'
 2835                 else:
 2836                     secure = 'FALSE'
 2837                 if cookie.domain.startswith('.'):
 2838                     initial_dot = 'TRUE'
 2839                 else:
 2840                     initial_dot = 'FALSE'
 2841                 if cookie.expires is not None:
 2842                     expires = compat_str(cookie.expires)
 2843                 else:
 2844                     expires = ''
 2845                 if cookie.value is None:
 2846                     # cookies.txt regards 'Set-Cookie: foo' as a cookie
 2847                     # with no name, whereas http.cookiejar regards it as a
 2848                     # cookie with no value.
 2849                     name = ''
 2850                     value = cookie.name
 2851                 else:
 2852                     name = cookie.name
 2853                     value = cookie.value
 2854                 f.write(
 2855                     '\t'.join([cookie.domain, initial_dot, cookie.path,
 2856                                secure, expires, name, value]) + '\n')
 2857 
 2858     def load(self, filename=None, ignore_discard=False, ignore_expires=False):
 2859         """Load cookies from a file."""
 2860         if filename is None:
 2861             if self.filename is not None:
 2862                 filename = self.filename
 2863             else:
 2864                 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
 2865 
 2866         def prepare_line(line):
 2867             if line.startswith(self._HTTPONLY_PREFIX):
 2868                 line = line[len(self._HTTPONLY_PREFIX):]
 2869             # comments and empty lines are fine
 2870             if line.startswith('#') or not line.strip():
 2871                 return line
 2872             cookie_list = line.split('\t')
 2873             if len(cookie_list) != self._ENTRY_LEN:
 2874                 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
 2875             cookie = self._CookieFileEntry(*cookie_list)
 2876             if cookie.expires_at and not cookie.expires_at.isdigit():
 2877                 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
 2878             return line
 2879 
 2880         cf = io.StringIO()
 2881         with io.open(filename, encoding='utf-8') as f:
 2882             for line in f:
 2883                 try:
 2884                     cf.write(prepare_line(line))
 2885                 except compat_cookiejar.LoadError as e:
 2886                     write_string(
 2887                         'WARNING: skipping cookie file entry due to %s: %r\n'
 2888                         % (e, line), sys.stderr)
 2889                     continue
 2890         cf.seek(0)
 2891         self._really_load(cf, filename, ignore_discard, ignore_expires)
 2892         # Session cookies are denoted by either `expires` field set to
 2893         # an empty string or 0. MozillaCookieJar only recognizes the former
 2894         # (see [1]). So we need force the latter to be recognized as session
 2895         # cookies on our own.
 2896         # Session cookies may be important for cookies-based authentication,
 2897         # e.g. usually, when user does not check 'Remember me' check box while
 2898         # logging in on a site, some important cookies are stored as session
 2899         # cookies so that not recognizing them will result in failed login.
 2900         # 1. https://bugs.python.org/issue17164
 2901         for cookie in self:
 2902             # Treat `expires=0` cookies as session cookies
 2903             if cookie.expires == 0:
 2904                 cookie.expires = None
 2905                 cookie.discard = True
 2906 
 2907 
 2908 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
 2909     def __init__(self, cookiejar=None):
 2910         compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
 2911 
 2912     def http_response(self, request, response):
 2913         # Python 2 will choke on next HTTP request in row if there are non-ASCII
 2914         # characters in Set-Cookie HTTP header of last response (see
 2915         # https://github.com/ytdl-org/youtube-dl/issues/6769).
 2916         # In order to at least prevent crashing we will percent encode Set-Cookie
 2917         # header before HTTPCookieProcessor starts processing it.
 2918         # if sys.version_info < (3, 0) and response.headers:
 2919         #     for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
 2920         #         set_cookie = response.headers.get(set_cookie_header)
 2921         #         if set_cookie:
 2922         #             set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
 2923         #             if set_cookie != set_cookie_escaped:
 2924         #                 del response.headers[set_cookie_header]
 2925         #                 response.headers[set_cookie_header] = set_cookie_escaped
 2926         return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
 2927 
 2928     https_request = compat_urllib_request.HTTPCookieProcessor.http_request
 2929     https_response = http_response
 2930 
 2931 
 2932 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
 2933     """YoutubeDL redirect handler
 2934 
 2935     The code is based on HTTPRedirectHandler implementation from CPython [1].
 2936 
 2937     This redirect handler solves two issues:
 2938      - ensures redirect URL is always unicode under python 2
 2939      - introduces support for experimental HTTP response status code
 2940        308 Permanent Redirect [2] used by some sites [3]
 2941 
 2942     1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
 2943     2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
 2944     3. https://github.com/ytdl-org/youtube-dl/issues/28768
 2945     """
 2946 
 2947     http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
 2948 
 2949     def redirect_request(self, req, fp, code, msg, headers, newurl):
 2950         """Return a Request or None in response to a redirect.
 2951 
 2952         This is called by the http_error_30x methods when a
 2953         redirection response is received.  If a redirection should
 2954         take place, return a new Request to allow http_error_30x to
 2955         perform the redirect.  Otherwise, raise HTTPError if no-one
 2956         else should try to handle this url.  Return None if you can't
 2957         but another Handler might.
 2958         """
 2959         m = req.get_method()
 2960         if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
 2961                  or code in (301, 302, 303) and m == "POST")):
 2962             raise compat_HTTPError(req.full_url, code, msg, headers, fp)
 2963         # Strictly (according to RFC 2616), 301 or 302 in response to
 2964         # a POST MUST NOT cause a redirection without confirmation
 2965         # from the user (of urllib.request, in this case).  In practice,
 2966         # essentially all clients do redirect in this case, so we do
 2967         # the same.
 2968 
 2969         # On python 2 urlh.geturl() may sometimes return redirect URL
 2970         # as byte string instead of unicode. This workaround allows
 2971         # to force it always return unicode.
 2972         if sys.version_info[0] < 3:
 2973             newurl = compat_str(newurl)
 2974 
 2975         # Be conciliant with URIs containing a space.  This is mainly
 2976         # redundant with the more complete encoding done in http_error_302(),
 2977         # but it is kept for compatibility with other callers.
 2978         newurl = newurl.replace(' ', '%20')
 2979 
 2980         CONTENT_HEADERS = ("content-length", "content-type")
 2981         # NB: don't use dict comprehension for python 2.6 compatibility
 2982         newheaders = dict((k, v) for k, v in req.headers.items()
 2983                           if k.lower() not in CONTENT_HEADERS)
 2984         return compat_urllib_request.Request(
 2985             newurl, headers=newheaders, origin_req_host=req.origin_req_host,
 2986             unverifiable=True)
 2987 
 2988 
 2989 def extract_timezone(date_str):
 2990     m = re.search(
 2991         r'''(?x)
 2992             ^.{8,}?                                              # >=8 char non-TZ prefix, if present
 2993             (?P<tz>Z|                                            # just the UTC Z, or
 2994                 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)|                   # preceded by 4 digits or hh:mm or
 2995                    (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d))     # not preceded by 3 alpha word or >= 4 alpha or 2 digits
 2996                    [ ]?                                          # optional space
 2997                 (?P<sign>\+|-)                                   # +/-
 2998                 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})       # hh[:]mm
 2999             $)
 3000         ''', date_str)
 3001     if not m:
 3002         m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 3003         timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
 3004         if timezone is not None:
 3005             date_str = date_str[:-len(m.group('tz'))]
 3006         timezone = datetime.timedelta(hours=timezone or 0)
 3007     else:
 3008         date_str = date_str[:-len(m.group('tz'))]
 3009         if not m.group('sign'):
 3010             timezone = datetime.timedelta()
 3011         else:
 3012             sign = 1 if m.group('sign') == '+' else -1
 3013             timezone = datetime.timedelta(
 3014                 hours=sign * int(m.group('hours')),
 3015                 minutes=sign * int(m.group('minutes')))
 3016     return timezone, date_str
 3017 
 3018 
 3019 def parse_iso8601(date_str, delimiter='T', timezone=None):
 3020     """ Return a UNIX timestamp from the given date """
 3021 
 3022     if date_str is None:
 3023         return None
 3024 
 3025     date_str = re.sub(r'\.[0-9]+', '', date_str)
 3026 
 3027     if timezone is None:
 3028         timezone, date_str = extract_timezone(date_str)
 3029 
 3030     try:
 3031         date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
 3032         dt = datetime.datetime.strptime(date_str, date_format) - timezone
 3033         return calendar.timegm(dt.timetuple())
 3034     except ValueError:
 3035         pass
 3036 
 3037 
 3038 def date_formats(day_first=True):
 3039     return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
 3040 
 3041 
 3042 def unified_strdate(date_str, day_first=True):
 3043     """Return a string with the date in the format YYYYMMDD"""
 3044 
 3045     if date_str is None:
 3046         return None
 3047     upload_date = None
 3048     # Replace commas
 3049     date_str = date_str.replace(',', ' ')
 3050     # Remove AM/PM + timezone
 3051     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 3052     _, date_str = extract_timezone(date_str)
 3053 
 3054     for expression in date_formats(day_first):
 3055         try:
 3056             upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
 3057         except ValueError:
 3058             pass
 3059     if upload_date is None:
 3060         timetuple = email.utils.parsedate_tz(date_str)
 3061         if timetuple:
 3062             try:
 3063                 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
 3064             except ValueError:
 3065                 pass
 3066     if upload_date is not None:
 3067         return compat_str(upload_date)
 3068 
 3069 
 3070 def unified_timestamp(date_str, day_first=True):
 3071     if date_str is None:
 3072         return None
 3073 
 3074     date_str = re.sub(r'\s+', ' ', re.sub(
 3075         r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
 3076 
 3077     pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
 3078     timezone, date_str = extract_timezone(date_str)
 3079 
 3080     # Remove AM/PM + timezone
 3081     date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
 3082 
 3083     # Remove unrecognized timezones from ISO 8601 alike timestamps
 3084     m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
 3085     if m:
 3086         date_str = date_str[:-len(m.group('tz'))]
 3087 
 3088     # Python only supports microseconds, so remove nanoseconds
 3089     m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
 3090     if m:
 3091         date_str = m.group(1)
 3092 
 3093     for expression in date_formats(day_first):
 3094         try:
 3095             dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
 3096             return calendar.timegm(dt.timetuple())
 3097         except ValueError:
 3098             pass
 3099     timetuple = email.utils.parsedate_tz(date_str)
 3100     if timetuple:
 3101         return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
 3102 
 3103 
 3104 def determine_ext(url, default_ext='unknown_video'):
 3105     if url is None or '.' not in url:
 3106         return default_ext
 3107     guess = url.partition('?')[0].rpartition('.')[2]
 3108     if re.match(r'^[A-Za-z0-9]+$', guess):
 3109         return guess
 3110     # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
 3111     elif guess.rstrip('/') in KNOWN_EXTENSIONS:
 3112         return guess.rstrip('/')
 3113     else:
 3114         return default_ext
 3115 
 3116 
 3117 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
 3118     return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
 3119 
 3120 
 3121 def date_from_str(date_str):
 3122     """
 3123     Return a datetime object from a string in the format YYYYMMDD or
 3124     (now|today)[+-][0-9](day|week|month|year)(s)?"""
 3125     today = datetime.date.today()
 3126     if date_str in ('now', 'today'):
 3127         return today
 3128     if date_str == 'yesterday':
 3129         return today - datetime.timedelta(days=1)
 3130     match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
 3131     if match is not None:
 3132         sign = match.group('sign')
 3133         time = int(match.group('time'))
 3134         if sign == '-':
 3135             time = -time
 3136         unit = match.group('unit')
 3137         # A bad approximation?
 3138         if unit == 'month':
 3139             unit = 'day'
 3140             time *= 30
 3141         elif unit == 'year':
 3142             unit = 'day'
 3143             time *= 365
 3144         unit += 's'
 3145         delta = datetime.timedelta(**{unit: time})
 3146         return today + delta
 3147     return datetime.datetime.strptime(date_str, '%Y%m%d').date()
 3148 
 3149 
 3150 def hyphenate_date(date_str):
 3151     """
 3152     Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
 3153     match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
 3154     if match is not None:
 3155         return '-'.join(match.groups())
 3156     else:
 3157         return date_str
 3158 
 3159 
 3160 class DateRange(object):
 3161     """Represents a time interval between two dates"""
 3162 
 3163     def __init__(self, start=None, end=None):
 3164         """start and end must be strings in the format accepted by date"""
 3165         if start is not None:
 3166             self.start = date_from_str(start)
 3167         else:
 3168             self.start = datetime.datetime.min.date()
 3169         if end is not None:
 3170             self.end = date_from_str(end)
 3171         else:
 3172             self.end = datetime.datetime.max.date()
 3173         if self.start > self.end:
 3174             raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
 3175 
 3176     @classmethod
 3177     def day(cls, day):
 3178         """Returns a range that only contains the given day"""
 3179         return cls(day, day)
 3180 
 3181     def __contains__(self, date):
 3182         """Check if the date is in the range"""
 3183         if not isinstance(date, datetime.date):
 3184             date = date_from_str(date)
 3185         return self.start <= date <= self.end
 3186 
 3187     def __str__(self):
 3188         return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
 3189 
 3190 
 3191 def platform_name():
 3192     """ Returns the platform name as a compat_str """
 3193     res = platform.platform()
 3194     if isinstance(res, bytes):
 3195         res = res.decode(preferredencoding())
 3196 
 3197     assert isinstance(res, compat_str)
 3198     return res
 3199 
 3200 
 3201 def _windows_write_string(s, out):
 3202     """ Returns True if the string was written using special methods,
 3203     False if it has yet to be written out."""
 3204     # Adapted from http://stackoverflow.com/a/3259271/35070
 3205 
 3206     import ctypes
 3207     import ctypes.wintypes
 3208 
 3209     WIN_OUTPUT_IDS = {
 3210         1: -11,
 3211         2: -12,
 3212     }
 3213 
 3214     try:
 3215         fileno = out.fileno()
 3216     except AttributeError:
 3217         # If the output stream doesn't have a fileno, it's virtual
 3218         return False
 3219     except io.UnsupportedOperation:
 3220         # Some strange Windows pseudo files?
 3221         return False
 3222     if fileno not in WIN_OUTPUT_IDS:
 3223         return False
 3224 
 3225     GetStdHandle = compat_ctypes_WINFUNCTYPE(
 3226         ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
 3227         ('GetStdHandle', ctypes.windll.kernel32))
 3228     h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
 3229 
 3230     WriteConsoleW = compat_ctypes_WINFUNCTYPE(
 3231         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
 3232         ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
 3233         ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
 3234     written = ctypes.wintypes.DWORD(0)
 3235 
 3236     GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
 3237     FILE_TYPE_CHAR = 0x0002
 3238     FILE_TYPE_REMOTE = 0x8000
 3239     GetConsoleMode = compat_ctypes_WINFUNCTYPE(
 3240         ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
 3241         ctypes.POINTER(ctypes.wintypes.DWORD))(
 3242         ('GetConsoleMode', ctypes.windll.kernel32))
 3243     INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
 3244 
 3245     def not_a_console(handle):
 3246         if handle == INVALID_HANDLE_VALUE or handle is None:
 3247             return True
 3248         return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
 3249                 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
 3250 
 3251     if not_a_console(h):
 3252         return False
 3253 
 3254     def next_nonbmp_pos(s):
 3255         try:
 3256             return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
 3257         except StopIteration:
 3258             return len(s)
 3259 
 3260     while s:
 3261         count = min(next_nonbmp_pos(s), 1024)
 3262 
 3263         ret = WriteConsoleW(
 3264             h, s, count if count else 2, ctypes.byref(written), None)
 3265         if ret == 0:
 3266             raise OSError('Failed to write string')
 3267         if not count:  # We just wrote a non-BMP character
 3268             assert written.value == 2
 3269             s = s[1:]
 3270         else:
 3271             assert written.value > 0
 3272             s = s[written.value:]
 3273     return True
 3274 
 3275 
 3276 def write_string(s, out=None, encoding=None):
 3277     if out is None:
 3278         out = sys.stderr
 3279     assert type(s) == compat_str
 3280 
 3281     if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
 3282         if _windows_write_string(s, out):
 3283             return
 3284 
 3285     if ('b' in getattr(out, 'mode', '')
 3286             or sys.version_info[0] < 3):  # Python 2 lies about mode of sys.stderr
 3287         byt = s.encode(encoding or preferredencoding(), 'ignore')
 3288         out.write(byt)
 3289     elif hasattr(out, 'buffer'):
 3290         enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
 3291         byt = s.encode(enc, 'ignore')
 3292         out.buffer.write(byt)
 3293     else:
 3294         out.write(s)
 3295     out.flush()
 3296 
 3297 
 3298 def bytes_to_intlist(bs):
 3299     if not bs:
 3300         return []
 3301     if isinstance(bs[0], int):  # Python 3
 3302         return list(bs)
 3303     else:
 3304         return [ord(c) for c in bs]
 3305 
 3306 
 3307 def intlist_to_bytes(xs):
 3308     if not xs:
 3309         return b''
 3310     return compat_struct_pack('%dB' % len(xs), *xs)
 3311 
 3312 
 3313 # Cross-platform file locking
 3314 if sys.platform == 'win32':
 3315     import ctypes.wintypes
 3316     import msvcrt
 3317 
 3318     class OVERLAPPED(ctypes.Structure):
 3319         _fields_ = [
 3320             ('Internal', ctypes.wintypes.LPVOID),
 3321             ('InternalHigh', ctypes.wintypes.LPVOID),
 3322             ('Offset', ctypes.wintypes.DWORD),
 3323             ('OffsetHigh', ctypes.wintypes.DWORD),
 3324             ('hEvent', ctypes.wintypes.HANDLE),
 3325         ]
 3326 
 3327     kernel32 = ctypes.windll.kernel32
 3328     LockFileEx = kernel32.LockFileEx
 3329     LockFileEx.argtypes = [
 3330         ctypes.wintypes.HANDLE,     # hFile
 3331         ctypes.wintypes.DWORD,      # dwFlags
 3332         ctypes.wintypes.DWORD,      # dwReserved
 3333         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 3334         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 3335         ctypes.POINTER(OVERLAPPED)  # Overlapped
 3336     ]
 3337     LockFileEx.restype = ctypes.wintypes.BOOL
 3338     UnlockFileEx = kernel32.UnlockFileEx
 3339     UnlockFileEx.argtypes = [
 3340         ctypes.wintypes.HANDLE,     # hFile
 3341         ctypes.wintypes.DWORD,      # dwReserved
 3342         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockLow
 3343         ctypes.wintypes.DWORD,      # nNumberOfBytesToLockHigh
 3344         ctypes.POINTER(OVERLAPPED)  # Overlapped
 3345     ]
 3346     UnlockFileEx.restype = ctypes.wintypes.BOOL
 3347     whole_low = 0xffffffff
 3348     whole_high = 0x7fffffff
 3349 
 3350     def _lock_file(f, exclusive):
 3351         overlapped = OVERLAPPED()
 3352         overlapped.Offset = 0
 3353         overlapped.OffsetHigh = 0
 3354         overlapped.hEvent = 0
 3355         f._lock_file_overlapped_p = ctypes.pointer(overlapped)
 3356         handle = msvcrt.get_osfhandle(f.fileno())
 3357         if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
 3358                           whole_low, whole_high, f._lock_file_overlapped_p):
 3359             raise OSError('Locking file failed: %r' % ctypes.FormatError())
 3360 
 3361     def _unlock_file(f):
 3362         assert f._lock_file_overlapped_p
 3363         handle = msvcrt.get_osfhandle(f.fileno())
 3364         if not UnlockFileEx(handle, 0,
 3365                             whole_low, whole_high, f._lock_file_overlapped_p):
 3366             raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
 3367 
 3368 else:
 3369     # Some platforms, such as Jython, is missing fcntl
 3370     try:
 3371         import fcntl
 3372 
 3373         def _lock_file(f, exclusive):
 3374             fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
 3375 
 3376         def _unlock_file(f):
 3377             fcntl.flock(f, fcntl.LOCK_UN)
 3378     except ImportError:
 3379         UNSUPPORTED_MSG = 'file locking is not supported on this platform'
 3380 
 3381         def _lock_file(f, exclusive):
 3382             raise IOError(UNSUPPORTED_MSG)
 3383 
 3384         def _unlock_file(f):
 3385             raise IOError(UNSUPPORTED_MSG)
 3386 
 3387 
 3388 class locked_file(object):
 3389     def __init__(self, filename, mode, encoding=None):
 3390         assert mode in ['r', 'a', 'w']
 3391         self.f = io.open(filename, mode, encoding=encoding)
 3392         self.mode = mode
 3393 
 3394     def __enter__(self):
 3395         exclusive = self.mode != 'r'
 3396         try:
 3397             _lock_file(self.f, exclusive)
 3398         except IOError:
 3399             self.f.close()
 3400             raise
 3401         return self
 3402 
 3403     def __exit__(self, etype, value, traceback):
 3404         try:
 3405             _unlock_file(self.f)
 3406         finally:
 3407             self.f.close()
 3408 
 3409     def __iter__(self):
 3410         return iter(self.f)
 3411 
 3412     def write(self, *args):
 3413         return self.f.write(*args)
 3414 
 3415     def read(self, *args):
 3416         return self.f.read(*args)
 3417 
 3418 
 3419 def get_filesystem_encoding():
 3420     encoding = sys.getfilesystemencoding()
 3421     return encoding if encoding is not None else 'utf-8'
 3422 
 3423 
 3424 def shell_quote(args):
 3425     quoted_args = []
 3426     encoding = get_filesystem_encoding()
 3427     for a in args:
 3428         if isinstance(a, bytes):
 3429             # We may get a filename encoded with 'encodeFilename'
 3430             a = a.decode(encoding)
 3431         quoted_args.append(compat_shlex_quote(a))
 3432     return ' '.join(quoted_args)
 3433 
 3434 
 3435 def smuggle_url(url, data):
 3436     """ Pass additional data in a URL for internal use. """
 3437 
 3438     url, idata = unsmuggle_url(url, {})
 3439     data.update(idata)
 3440     sdata = compat_urllib_parse_urlencode(
 3441         {'__youtubedl_smuggle': json.dumps(data)})
 3442     return url + '#' + sdata
 3443 
 3444 
 3445 def unsmuggle_url(smug_url, default=None):
 3446     if '#__youtubedl_smuggle' not in smug_url:
 3447         return smug_url, default
 3448     url, _, sdata = smug_url.rpartition('#')
 3449     jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
 3450     data = json.loads(jsond)
 3451     return url, data
 3452 
 3453 
 3454 def format_bytes(bytes):
 3455     if bytes is None:
 3456         return 'N/A'
 3457     if type(bytes) is str:
 3458         bytes = float(bytes)
 3459     if bytes == 0.0:
 3460         exponent = 0
 3461     else:
 3462         exponent = int(math.log(bytes, 1024.0))
 3463     suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
 3464     converted = float(bytes) / float(1024 ** exponent)
 3465     return '%.2f%s' % (converted, suffix)
 3466 
 3467 
 3468 def lookup_unit_table(unit_table, s):
 3469     units_re = '|'.join(re.escape(u) for u in unit_table)
 3470     m = re.match(
 3471         r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
 3472     if not m:
 3473         return None
 3474     num_str = m.group('num').replace(',', '.')
 3475     mult = unit_table[m.group('unit')]
 3476     return int(float(num_str) * mult)
 3477 
 3478 
 3479 def parse_filesize(s):
 3480     if s is None:
 3481         return None
 3482 
 3483     # The lower-case forms are of course incorrect and unofficial,
 3484     # but we support those too
 3485     _UNIT_TABLE = {
 3486         'B': 1,
 3487         'b': 1,
 3488         'bytes': 1,
 3489         'KiB': 1024,
 3490         'KB': 1000,
 3491         'kB': 1024,
 3492         'Kb': 1000,
 3493         'kb': 1000,
 3494         'kilobytes': 1000,
 3495         'kibibytes': 1024,
 3496         'MiB': 1024 ** 2,
 3497         'MB': 1000 ** 2,
 3498         'mB': 1024 ** 2,
 3499         'Mb': 1000 ** 2,
 3500         'mb': 1000 ** 2,
 3501         'megabytes': 1000 ** 2,
 3502         'mebibytes': 1024 ** 2,
 3503         'GiB': 1024 ** 3,
 3504         'GB': 1000 ** 3,
 3505         'gB': 1024 ** 3,
 3506         'Gb': 1000 ** 3,
 3507         'gb': 1000 ** 3,
 3508         'gigabytes': 1000 ** 3,
 3509         'gibibytes': 1024 ** 3,
 3510         'TiB': 1024 ** 4,
 3511         'TB': 1000 ** 4,
 3512         'tB': 1024 ** 4,
 3513         'Tb': 1000 ** 4,
 3514         'tb': 1000 ** 4,
 3515         'terabytes': 1000 ** 4,
 3516         'tebibytes': 1024 ** 4,
 3517         'PiB': 1024 ** 5,
 3518         'PB': 1000 ** 5,
 3519         'pB': 1024 ** 5,
 3520         'Pb': 1000 ** 5,
 3521         'pb': 1000 ** 5,
 3522         'petabytes': 1000 ** 5,
 3523         'pebibytes': 1024 ** 5,
 3524         'EiB': 1024 ** 6,
 3525         'EB': 1000 ** 6,
 3526         'eB': 1024 ** 6,
 3527         'Eb': 1000 ** 6,
 3528         'eb': 1000 ** 6,
 3529         'exabytes': 1000 ** 6,
 3530         'exbibytes': 1024 ** 6,
 3531         'ZiB': 1024 ** 7,
 3532         'ZB': 1000 ** 7,
 3533         'zB': 1024 ** 7,
 3534         'Zb': 1000 ** 7,
 3535         'zb': 1000 ** 7,
 3536         'zettabytes': 1000 ** 7,
 3537         'zebibytes': 1024 ** 7,
 3538         'YiB': 1024 ** 8,
 3539         'YB': 1000 ** 8,
 3540         'yB': 1024 ** 8,
 3541         'Yb': 1000 ** 8,
 3542         'yb': 1000 ** 8,
 3543         'yottabytes': 1000 ** 8,
 3544         'yobibytes': 1024 ** 8,
 3545     }
 3546 
 3547     return lookup_unit_table(_UNIT_TABLE, s)
 3548 
 3549 
 3550 def parse_count(s):
 3551     if s is None:
 3552         return None
 3553 
 3554     s = s.strip()
 3555 
 3556     if re.match(r'^[\d,.]+$', s):
 3557         return str_to_int(s)
 3558 
 3559     _UNIT_TABLE = {
 3560         'k': 1000,
 3561         'K': 1000,
 3562         'm': 1000 ** 2,
 3563         'M': 1000 ** 2,
 3564         'kk': 1000 ** 2,
 3565         'KK': 1000 ** 2,
 3566     }
 3567 
 3568     return lookup_unit_table(_UNIT_TABLE, s)
 3569 
 3570 
 3571 def parse_resolution(s):
 3572     if s is None:
 3573         return {}
 3574 
 3575     mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
 3576     if mobj:
 3577         return {
 3578             'width': int(mobj.group('w')),
 3579             'height': int(mobj.group('h')),
 3580         }
 3581 
 3582     mobj = re.search(r'\b(\d+)[pPiI]\b', s)
 3583     if mobj:
 3584         return {'height': int(mobj.group(1))}
 3585 
 3586     mobj = re.search(r'\b([48])[kK]\b', s)
 3587     if mobj:
 3588         return {'height': int(mobj.group(1)) * 540}
 3589 
 3590     return {}
 3591 
 3592 
 3593 def parse_bitrate(s):
 3594     if not isinstance(s, compat_str):
 3595         return
 3596     mobj = re.search(r'\b(\d+)\s*kbps', s)
 3597     if mobj:
 3598         return int(mobj.group(1))
 3599 
 3600 
 3601 def month_by_name(name, lang='en'):
 3602     """ Return the number of a month by (locale-independently) English name """
 3603 
 3604     month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
 3605 
 3606     try:
 3607         return month_names.index(name) + 1
 3608     except ValueError:
 3609         return None
 3610 
 3611 
 3612 def month_by_abbreviation(abbrev):
 3613     """ Return the number of a month by (locale-independently) English
 3614         abbreviations """
 3615 
 3616     try:
 3617         return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
 3618     except ValueError:
 3619         return None
 3620 
 3621 
 3622 def fix_xml_ampersands(xml_str):
 3623     """Replace all the '&' by '&amp;' in XML"""
 3624     return re.sub(
 3625         r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
 3626         '&amp;',
 3627         xml_str)
 3628 
 3629 
 3630 def setproctitle(title):
 3631     assert isinstance(title, compat_str)
 3632 
 3633     # ctypes in Jython is not complete
 3634     # http://bugs.jython.org/issue2148
 3635     if sys.platform.startswith('java'):
 3636         return
 3637 
 3638     try:
 3639         libc = ctypes.cdll.LoadLibrary('libc.so.6')
 3640     except OSError:
 3641         return
 3642     except TypeError:
 3643         # LoadLibrary in Windows Python 2.7.13 only expects
 3644         # a bytestring, but since unicode_literals turns
 3645         # every string into a unicode string, it fails.
 3646         return
 3647     title_bytes = title.encode('utf-8')
 3648     buf = ctypes.create_string_buffer(len(title_bytes))
 3649     buf.value = title_bytes
 3650     try:
 3651         libc.prctl(15, buf, 0, 0, 0)
 3652     except AttributeError:
 3653         return  # Strange libc, just skip this
 3654 
 3655 
 3656 def remove_start(s, start):
 3657     return s[len(start):] if s is not None and s.startswith(start) else s
 3658 
 3659 
 3660 def remove_end(s, end):
 3661     return s[:-len(end)] if s is not None and s.endswith(end) else s
 3662 
 3663 
 3664 def remove_quotes(s):
 3665     if s is None or len(s) < 2:
 3666         return s
 3667     for quote in ('"', "'", ):
 3668         if s[0] == quote and s[-1] == quote:
 3669             return s[1:-1]
 3670     return s
 3671 
 3672 
 3673 def url_basename(url):
 3674     path = compat_urlparse.urlparse(url).path
 3675     return path.strip('/').split('/')[-1]
 3676 
 3677 
 3678 def base_url(url):
 3679     return re.match(r'https?://[^?#&]+/', url).group()
 3680 
 3681 
 3682 def urljoin(base, path):
 3683     if isinstance(path, bytes):
 3684         path = path.decode('utf-8')
 3685     if not isinstance(path, compat_str) or not path:
 3686         return None
 3687     if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
 3688         return path
 3689     if isinstance(base, bytes):
 3690         base = base.decode('utf-8')
 3691     if not isinstance(base, compat_str) or not re.match(
 3692             r'^(?:https?:)?//', base):
 3693         return None
 3694     return compat_urlparse.urljoin(base, path)
 3695 
 3696 
 3697 class HEADRequest(compat_urllib_request.Request):
 3698     def get_method(self):
 3699         return 'HEAD'
 3700 
 3701 
 3702 class PUTRequest(compat_urllib_request.Request):
 3703     def get_method(self):
 3704         return 'PUT'
 3705 
 3706 
 3707 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
 3708     if get_attr:
 3709         if v is not None:
 3710             v = getattr(v, get_attr, None)
 3711     if v in (None, ''):
 3712         return default
 3713     try:
 3714         return int(v) * invscale // scale
 3715     except (ValueError, TypeError, OverflowError):
 3716         return default
 3717 
 3718 
 3719 def str_or_none(v, default=None):
 3720     return default if v is None else compat_str(v)
 3721 
 3722 
 3723 def str_to_int(int_str):
 3724     """ A more relaxed version of int_or_none """
 3725     if isinstance(int_str, compat_integer_types):
 3726         return int_str
 3727     elif isinstance(int_str, compat_str):
 3728         int_str = re.sub(r'[,\.\+]', '', int_str)
 3729         return int_or_none(int_str)
 3730 
 3731 
 3732 def float_or_none(v, scale=1, invscale=1, default=None):
 3733     if v is None:
 3734         return default
 3735     try:
 3736         return float(v) * invscale / scale
 3737     except (ValueError, TypeError):
 3738         return default
 3739 
 3740 
 3741 def bool_or_none(v, default=None):
 3742     return v if isinstance(v, bool) else default
 3743 
 3744 
 3745 def strip_or_none(v, default=None):
 3746     return v.strip() if isinstance(v, compat_str) else default
 3747 
 3748 
 3749 def url_or_none(url):
 3750     if not url or not isinstance(url, compat_str):
 3751         return None
 3752     url = url.strip()
 3753     return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
 3754 
 3755 
 3756 def parse_duration(s):
 3757     if not isinstance(s, compat_basestring):
 3758         return None
 3759 
 3760     s = s.strip()
 3761 
 3762     days, hours, mins, secs, ms = [None] * 5
 3763     m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
 3764     if m:
 3765         days, hours, mins, secs, ms = m.groups()
 3766     else:
 3767         m = re.match(
 3768             r'''(?ix)(?:P?
 3769                 (?:
 3770                     [0-9]+\s*y(?:ears?)?\s*
 3771                 )?
 3772                 (?:
 3773                     [0-9]+\s*m(?:onths?)?\s*
 3774                 )?
 3775                 (?:
 3776                     [0-9]+\s*w(?:eeks?)?\s*
 3777                 )?
 3778                 (?:
 3779                     (?P<days>[0-9]+)\s*d(?:ays?)?\s*
 3780                 )?
 3781                 T)?
 3782                 (?:
 3783                     (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
 3784                 )?
 3785                 (?:
 3786                     (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
 3787                 )?
 3788                 (?:
 3789                     (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
 3790                 )?Z?$''', s)
 3791         if m:
 3792             days, hours, mins, secs, ms = m.groups()
 3793         else:
 3794             m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
 3795             if m:
 3796                 hours, mins = m.groups()
 3797             else:
 3798                 return None
 3799 
 3800     duration = 0
 3801     if secs:
 3802         duration += float(secs)
 3803     if mins:
 3804         duration += float(mins) * 60
 3805     if hours:
 3806         duration += float(hours) * 60 * 60
 3807     if days:
 3808         duration += float(days) * 24 * 60 * 60
 3809     if ms:
 3810         duration += float(ms)
 3811     return duration
 3812 
 3813 
 3814 def prepend_extension(filename, ext, expected_real_ext=None):
 3815     name, real_ext = os.path.splitext(filename)
 3816     return (
 3817         '{0}.{1}{2}'.format(name, ext, real_ext)
 3818         if not expected_real_ext or real_ext[1:] == expected_real_ext
 3819         else '{0}.{1}'.format(filename, ext))
 3820 
 3821 
 3822 def replace_extension(filename, ext, expected_real_ext=None):
 3823     name, real_ext = os.path.splitext(filename)
 3824     return '{0}.{1}'.format(
 3825         name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
 3826         ext)
 3827 
 3828 
 3829 def check_executable(exe, args=[]):
 3830     """ Checks if the given binary is installed somewhere in PATH, and returns its name.
 3831     args can be a list of arguments for a short output (like -version) """
 3832     try:
 3833         process_communicate_or_kill(subprocess.Popen(
 3834             [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
 3835     except OSError:
 3836         return False
 3837     return exe
 3838 
 3839 
 3840 def get_exe_version(exe, args=['--version'],
 3841                     version_re=None, unrecognized='present'):
 3842     """ Returns the version of the specified executable,
 3843     or False if the executable is not present """
 3844     try:
 3845         # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
 3846         # SIGTTOU if youtube-dl is run in the background.
 3847         # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
 3848         out, _ = process_communicate_or_kill(subprocess.Popen(
 3849             [encodeArgument(exe)] + args,
 3850             stdin=subprocess.PIPE,
 3851             stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
 3852     except OSError:
 3853         return False
 3854     if isinstance(out, bytes):  # Python 2.x
 3855         out = out.decode('ascii', 'ignore')
 3856     return detect_exe_version(out, version_re, unrecognized)
 3857 
 3858 
 3859 def detect_exe_version(output, version_re=None, unrecognized='present'):
 3860     assert isinstance(output, compat_str)
 3861     if version_re is None:
 3862         version_re = r'version\s+([-0-9._a-zA-Z]+)'
 3863     m = re.search(version_re, output)
 3864     if m:
 3865         return m.group(1)
 3866     else:
 3867         return unrecognized
 3868 
 3869 
 3870 class PagedList(object):
 3871     def __len__(self):
 3872         # This is only useful for tests
 3873         return len(self.getslice())
 3874 
 3875 
 3876 class OnDemandPagedList(PagedList):
 3877     def __init__(self, pagefunc, pagesize, use_cache=True):
 3878         self._pagefunc = pagefunc
 3879         self._pagesize = pagesize
 3880         self._use_cache = use_cache
 3881         if use_cache:
 3882             self._cache = {}
 3883 
 3884     def getslice(self, start=0, end=None):
 3885         res = []
 3886         for pagenum in itertools.count(start // self._pagesize):
 3887             firstid = pagenum * self._pagesize
 3888             nextfirstid = pagenum * self._pagesize + self._pagesize
 3889             if start >= nextfirstid:
 3890                 continue
 3891 
 3892             page_results = None
 3893             if self._use_cache:
 3894                 page_results = self._cache.get(pagenum)
 3895             if page_results is None:
 3896                 page_results = list(self._pagefunc(pagenum))
 3897             if self._use_cache:
 3898                 self._cache[pagenum] = page_results
 3899 
 3900             startv = (
 3901                 start % self._pagesize
 3902                 if firstid <= start < nextfirstid
 3903                 else 0)
 3904 
 3905             endv = (
 3906                 ((end - 1) % self._pagesize) + 1
 3907                 if (end is not None and firstid <= end <= nextfirstid)
 3908                 else None)
 3909 
 3910             if startv != 0 or endv is not None:
 3911                 page_results = page_results[startv:endv]
 3912             res.extend(page_results)
 3913 
 3914             # A little optimization - if current page is not "full", ie. does
 3915             # not contain page_size videos then we can assume that this page
 3916             # is the last one - there are no more ids on further pages -
 3917             # i.e. no need to query again.
 3918             if len(page_results) + startv < self._pagesize:
 3919                 break
 3920 
 3921             # If we got the whole page, but the next page is not interesting,
 3922             # break out early as well
 3923             if end == nextfirstid:
 3924                 break
 3925         return res
 3926 
 3927 
 3928 class InAdvancePagedList(PagedList):
 3929     def __init__(self, pagefunc, pagecount, pagesize):
 3930         self._pagefunc = pagefunc
 3931         self._pagecount = pagecount
 3932         self._pagesize = pagesize
 3933 
 3934     def getslice(self, start=0, end=None):
 3935         res = []
 3936         start_page = start // self._pagesize
 3937         end_page = (
 3938             self._pagecount if end is None else (end // self._pagesize + 1))
 3939         skip_elems = start - start_page * self._pagesize
 3940         only_more = None if end is None else end - start
 3941         for pagenum in range(start_page, end_page):
 3942             page = list(self._pagefunc(pagenum))
 3943             if skip_elems:
 3944                 page = page[skip_elems:]
 3945                 skip_elems = None
 3946             if only_more is not None:
 3947                 if len(page) < only_more:
 3948                     only_more -= len(page)
 3949                 else:
 3950                     page = page[:only_more]
 3951                     res.extend(page)
 3952                     break
 3953             res.extend(page)
 3954         return res
 3955 
 3956 
 3957 def uppercase_escape(s):
 3958     unicode_escape = codecs.getdecoder('unicode_escape')
 3959     return re.sub(
 3960         r'\\U[0-9a-fA-F]{8}',
 3961         lambda m: unicode_escape(m.group(0))[0],
 3962         s)
 3963 
 3964 
 3965 def lowercase_escape(s):
 3966     unicode_escape = codecs.getdecoder('unicode_escape')
 3967     return re.sub(
 3968         r'\\u[0-9a-fA-F]{4}',
 3969         lambda m: unicode_escape(m.group(0))[0],
 3970         s)
 3971 
 3972 
 3973 def escape_rfc3986(s):
 3974     """Escape non-ASCII characters as suggested by RFC 3986"""
 3975     if sys.version_info < (3, 0) and isinstance(s, compat_str):
 3976         s = s.encode('utf-8')
 3977     # ensure unicode: after quoting, it can always be converted
 3978     return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
 3979 
 3980 
 3981 def escape_url(url):
 3982     """Escape URL as suggested by RFC 3986"""
 3983     url_parsed = compat_urllib_parse_urlparse(url)
 3984     return url_parsed._replace(
 3985         netloc=url_parsed.netloc.encode('idna').decode('ascii'),
 3986         path=escape_rfc3986(url_parsed.path),
 3987         params=escape_rfc3986(url_parsed.params),
 3988         query=escape_rfc3986(url_parsed.query),
 3989         fragment=escape_rfc3986(url_parsed.fragment)
 3990     ).geturl()
 3991 
 3992 
 3993 def read_batch_urls(batch_fd):
 3994     def fixup(url):
 3995         if not isinstance(url, compat_str):
 3996             url = url.decode('utf-8', 'replace')
 3997         BOM_UTF8 = '\xef\xbb\xbf'
 3998         if url.startswith(BOM_UTF8):
 3999             url = url[len(BOM_UTF8):]
 4000         url = url.strip()
 4001         if url.startswith(('#', ';', ']')):
 4002             return False
 4003         return url
 4004 
 4005     with contextlib.closing(batch_fd) as fd:
 4006         return [url for url in map(fixup, fd) if url]
 4007 
 4008 
 4009 def urlencode_postdata(*args, **kargs):
 4010     return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
 4011 
 4012 
 4013 def update_url_query(url, query):
 4014     if not query:
 4015         return url
 4016     parsed_url = compat_urlparse.urlparse(url)
 4017     qs = compat_parse_qs(parsed_url.query)
 4018     qs.update(query)
 4019     return compat_urlparse.urlunparse(parsed_url._replace(
 4020         query=compat_urllib_parse_urlencode(qs, True)))
 4021 
 4022 
 4023 def update_Request(req, url=None, data=None, headers={}, query={}):
 4024     req_headers = req.headers.copy()
 4025     req_headers.update(headers)
 4026     req_data = data or req.data
 4027     req_url = update_url_query(url or req.get_full_url(), query)
 4028     req_get_method = req.get_method()
 4029     if req_get_method == 'HEAD':
 4030         req_type = HEADRequest
 4031     elif req_get_method == 'PUT':
 4032         req_type = PUTRequest
 4033     else:
 4034         req_type = compat_urllib_request.Request
 4035     new_req = req_type(
 4036         req_url, data=req_data, headers=req_headers,
 4037         origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
 4038     if hasattr(req, 'timeout'):
 4039         new_req.timeout = req.timeout
 4040     return new_req
 4041 
 4042 
 4043 def _multipart_encode_impl(data, boundary):
 4044     content_type = 'multipart/form-data; boundary=%s' % boundary
 4045 
 4046     out = b''
 4047     for k, v in data.items():
 4048         out += b'--' + boundary.encode('ascii') + b'\r\n'
 4049         if isinstance(k, compat_str):
 4050             k = k.encode('utf-8')
 4051         if isinstance(v, compat_str):
 4052             v = v.encode('utf-8')
 4053         # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
 4054         # suggests sending UTF-8 directly. Firefox sends UTF-8, too
 4055         content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
 4056         if boundary.encode('ascii') in content:
 4057             raise ValueError('Boundary overlaps with data')
 4058         out += content
 4059 
 4060     out += b'--' + boundary.encode('ascii') + b'--\r\n'
 4061 
 4062     return out, content_type
 4063 
 4064 
 4065 def multipart_encode(data, boundary=None):
 4066     '''
 4067     Encode a dict to RFC 7578-compliant form-data
 4068 
 4069     data:
 4070         A dict where keys and values can be either Unicode or bytes-like
 4071         objects.
 4072     boundary:
 4073         If specified a Unicode object, it's used as the boundary. Otherwise
 4074         a random boundary is generated.
 4075 
 4076     Reference: https://tools.ietf.org/html/rfc7578
 4077     '''
 4078     has_specified_boundary = boundary is not None
 4079 
 4080     while True:
 4081         if boundary is None:
 4082             boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
 4083 
 4084         try:
 4085             out, content_type = _multipart_encode_impl(data, boundary)
 4086             break
 4087         except ValueError:
 4088             if has_specified_boundary:
 4089                 raise
 4090             boundary = None
 4091 
 4092     return out, content_type
 4093 
 4094 
 4095 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
 4096     if isinstance(key_or_keys, (list, tuple)):
 4097         for key in key_or_keys:
 4098             if key not in d or d[key] is None or skip_false_values and not d[key]:
 4099                 continue
 4100             return d[key]
 4101         return default
 4102     return d.get(key_or_keys, default)
 4103 
 4104 
 4105 def try_get(src, getter, expected_type=None):
 4106     if not isinstance(getter, (list, tuple)):
 4107         getter = [getter]
 4108     for get in getter:
 4109         try:
 4110             v = get(src)
 4111         except (AttributeError, KeyError, TypeError, IndexError):
 4112             pass
 4113         else:
 4114             if expected_type is None or isinstance(v, expected_type):
 4115                 return v
 4116 
 4117 
 4118 def merge_dicts(*dicts):
 4119     merged = {}
 4120     for a_dict in dicts:
 4121         for k, v in a_dict.items():
 4122             if v is None:
 4123                 continue
 4124             if (k not in merged
 4125                     or (isinstance(v, compat_str) and v
 4126                         and isinstance(merged[k], compat_str)
 4127                         and not merged[k])):
 4128                 merged[k] = v
 4129     return merged
 4130 
 4131 
 4132 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
 4133     return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
 4134 
 4135 
 4136 US_RATINGS = {
 4137     'G': 0,
 4138     'PG': 10,
 4139     'PG-13': 13,
 4140     'R': 16,
 4141     'NC': 18,
 4142 }
 4143 
 4144 
 4145 TV_PARENTAL_GUIDELINES = {
 4146     'TV-Y': 0,
 4147     'TV-Y7': 7,
 4148     'TV-G': 0,
 4149     'TV-PG': 0,
 4150     'TV-14': 14,
 4151     'TV-MA': 17,
 4152 }
 4153 
 4154 
 4155 def parse_age_limit(s):
 4156     if type(s) == int:
 4157         return s if 0 <= s <= 21 else None
 4158     if not isinstance(s, compat_basestring):
 4159         return None
 4160     m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
 4161     if m:
 4162         return int(m.group('age'))
 4163     if s in US_RATINGS:
 4164         return US_RATINGS[s]
 4165     m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
 4166     if m:
 4167         return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
 4168     return None
 4169 
 4170 
 4171 def strip_jsonp(code):
 4172     return re.sub(
 4173         r'''(?sx)^
 4174             (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
 4175             (?:\s*&&\s*(?P=func_name))?
 4176             \s*\(\s*(?P<callback_data>.*)\);?
 4177             \s*?(?://[^\n]*)*$''',
 4178         r'\g<callback_data>', code)
 4179 
 4180 
 4181 def js_to_json(code):
 4182     COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
 4183     SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
 4184     INTEGER_TABLE = (
 4185         (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
 4186         (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
 4187     )
 4188 
 4189     def fix_kv(m):
 4190         v = m.group(0)
 4191         if v in ('true', 'false', 'null'):
 4192             return v
 4193         elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
 4194             return ""
 4195 
 4196         if v[0] in ("'", '"'):
 4197             v = re.sub(r'(?s)\\.|"', lambda m: {
 4198                 '"': '\\"',
 4199                 "\\'": "'",
 4200                 '\\\n': '',
 4201                 '\\x': '\\u00',
 4202             }.get(m.group(0), m.group(0)), v[1:-1])
 4203         else:
 4204             for regex, base in INTEGER_TABLE:
 4205                 im = re.match(regex, v)
 4206                 if im:
 4207                     i = int(im.group(1), base)
 4208                     return '"%d":' % i if v.endswith(':') else '%d' % i
 4209 
 4210         return '"%s"' % v
 4211 
 4212     return re.sub(r'''(?sx)
 4213         "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
 4214         '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
 4215         {comment}|,(?={skip}[\]}}])|
 4216         (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
 4217         \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
 4218         [0-9]+(?={skip}:)|
 4219         !+
 4220         '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
 4221 
 4222 
 4223 def qualities(quality_ids):
 4224     """ Get a numeric quality value out of a list of possible values """
 4225     def q(qid):
 4226         try:
 4227             return quality_ids.index(qid)
 4228         except ValueError:
 4229             return -1
 4230     return q
 4231 
 4232 
 4233 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
 4234 
 4235 
 4236 def limit_length(s, length):
 4237     """ Add ellipses to overly long strings """
 4238     if s is None:
 4239         return None
 4240     ELLIPSES = '...'
 4241     if len(s) > length:
 4242         return s[:length - len(ELLIPSES)] + ELLIPSES
 4243     return s
 4244 
 4245 
 4246 def version_tuple(v):
 4247     return tuple(int(e) for e in re.split(r'[-.]', v))
 4248 
 4249 
 4250 def is_outdated_version(version, limit, assume_new=True):
 4251     if not version:
 4252         return not assume_new
 4253     try:
 4254         return version_tuple(version) < version_tuple(limit)
 4255     except ValueError:
 4256         return not assume_new
 4257 
 4258 
 4259 def ytdl_is_updateable():
 4260     """ Returns if youtube-dl can be updated with -U """
 4261     from zipimport import zipimporter
 4262 
 4263     return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
 4264 
 4265 
 4266 def args_to_str(args):
 4267     # Get a short string representation for a subprocess command
 4268     return ' '.join(compat_shlex_quote(a) for a in args)
 4269 
 4270 
 4271 def error_to_compat_str(err):
 4272     err_str = str(err)
 4273     # On python 2 error byte string must be decoded with proper
 4274     # encoding rather than ascii
 4275     if sys.version_info[0] < 3:
 4276         err_str = err_str.decode(preferredencoding())
 4277     return err_str
 4278 
 4279 
 4280 def mimetype2ext(mt):
 4281     if mt is None:
 4282         return None
 4283 
 4284     ext = {
 4285         'audio/mp4': 'm4a',
 4286         # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
 4287         # it's the most popular one
 4288         'audio/mpeg': 'mp3',
 4289     }.get(mt)
 4290     if ext is not None:
 4291         return ext
 4292 
 4293     _, _, res = mt.rpartition('/')
 4294     res = res.split(';')[0].strip().lower()
 4295 
 4296     return {
 4297         '3gpp': '3gp',
 4298         'smptett+xml': 'tt',
 4299         'ttaf+xml': 'dfxp',
 4300         'ttml+xml': 'ttml',
 4301         'x-flv': 'flv',
 4302         'x-mp4-fragmented': 'mp4',
 4303         'x-ms-sami': 'sami',
 4304         'x-ms-wmv': 'wmv',
 4305         'mpegurl': 'm3u8',
 4306         'x-mpegurl': 'm3u8',
 4307         'vnd.apple.mpegurl': 'm3u8',
 4308         'dash+xml': 'mpd',
 4309         'f4m+xml': 'f4m',
 4310         'hds+xml': 'f4m',
 4311         'vnd.ms-sstr+xml': 'ism',
 4312         'quicktime': 'mov',
 4313         'mp2t': 'ts',
 4314         'x-wav': 'wav',
 4315     }.get(res, res)
 4316 
 4317 
 4318 def parse_codecs(codecs_str):
 4319     # http://tools.ietf.org/html/rfc6381
 4320     if not codecs_str:
 4321         return {}
 4322     split_codecs = list(filter(None, map(
 4323         lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
 4324     vcodec, acodec = None, None
 4325     for full_codec in split_codecs:
 4326         codec = full_codec.split('.')[0]
 4327         if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
 4328             if not vcodec:
 4329                 vcodec = full_codec
 4330         elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
 4331             if not acodec:
 4332                 acodec = full_codec
 4333         else:
 4334             write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
 4335     if not vcodec and not acodec:
 4336         if len(split_codecs) == 2:
 4337             return {
 4338                 'vcodec': split_codecs[0],
 4339                 'acodec': split_codecs[1],
 4340             }
 4341     else:
 4342         return {
 4343             'vcodec': vcodec or 'none',
 4344             'acodec': acodec or 'none',
 4345         }
 4346     return {}
 4347 
 4348 
 4349 def urlhandle_detect_ext(url_handle):
 4350     getheader = url_handle.headers.get
 4351 
 4352     cd = getheader('Content-Disposition')
 4353     if cd:
 4354         m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
 4355         if m:
 4356             e = determine_ext(m.group('filename'), default_ext=None)
 4357             if e:
 4358                 return e
 4359 
 4360     return mimetype2ext(getheader('Content-Type'))
 4361 
 4362 
 4363 def encode_data_uri(data, mime_type):
 4364     return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
 4365 
 4366 
 4367 def age_restricted(content_limit, age_limit):
 4368     """ Returns True iff the content should be blocked """
 4369 
 4370     if age_limit is None:  # No limit set
 4371         return False
 4372     if content_limit is None:
 4373         return False  # Content available for everyone
 4374     return age_limit < content_limit
 4375 
 4376 
 4377 def is_html(first_bytes):
 4378     """ Detect whether a file contains HTML by examining its first bytes. """
 4379 
 4380     BOMS = [
 4381         (b'\xef\xbb\xbf', 'utf-8'),
 4382         (b'\x00\x00\xfe\xff', 'utf-32-be'),
 4383         (b'\xff\xfe\x00\x00', 'utf-32-le'),
 4384         (b'\xff\xfe', 'utf-16-le'),
 4385         (b'\xfe\xff', 'utf-16-be'),
 4386     ]
 4387     for bom, enc in BOMS:
 4388         if first_bytes.startswith(bom):
 4389             s = first_bytes[len(bom):].decode(enc, 'replace')
 4390             break
 4391     else:
 4392         s = first_bytes.decode('utf-8', 'replace')
 4393 
 4394     return re.match(r'^\s*<', s)
 4395 
 4396 
 4397 def determine_protocol(info_dict):
 4398     protocol = info_dict.get('protocol')
 4399     if protocol is not None:
 4400         return protocol
 4401 
 4402     url = info_dict['url']
 4403     if url.startswith('rtmp'):
 4404         return 'rtmp'
 4405     elif url.startswith('mms'):
 4406         return 'mms'
 4407     elif url.startswith('rtsp'):
 4408         return 'rtsp'
 4409 
 4410     ext = determine_ext(url)
 4411     if ext == 'm3u8':
 4412         return 'm3u8'
 4413     elif ext == 'f4m':
 4414         return 'f4m'
 4415 
 4416     return compat_urllib_parse_urlparse(url).scheme
 4417 
 4418 
 4419 def render_table(header_row, data):
 4420     """ Render a list of rows, each as a list of values """
 4421     table = [header_row] + data
 4422     max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
 4423     format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
 4424     return '\n'.join(format_str % tuple(row) for row in table)
 4425 
 4426 
 4427 def _match_one(filter_part, dct):
 4428     COMPARISON_OPERATORS = {
 4429         '<': operator.lt,
 4430         '<=': operator.le,
 4431         '>': operator.gt,
 4432         '>=': operator.ge,
 4433         '=': operator.eq,
 4434         '!=': operator.ne,
 4435     }
 4436     operator_rex = re.compile(r'''(?x)\s*
 4437         (?P<key>[a-z_]+)
 4438         \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
 4439         (?:
 4440             (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
 4441             (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
 4442             (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
 4443         )
 4444         \s*$
 4445         ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
 4446     m = operator_rex.search(filter_part)
 4447     if m:
 4448         op = COMPARISON_OPERATORS[m.group('op')]
 4449         actual_value = dct.get(m.group('key'))
 4450         if (m.group('quotedstrval') is not None
 4451             or m.group('strval') is not None
 4452             # If the original field is a string and matching comparisonvalue is
 4453             # a number we should respect the origin of the original field
 4454             # and process comparison value as a string (see
 4455             # https://github.com/ytdl-org/youtube-dl/issues/11082).
 4456             or actual_value is not None and m.group('intval') is not None
 4457                 and isinstance(actual_value, compat_str)):
 4458             if m.group('op') not in ('=', '!='):
 4459                 raise ValueError(
 4460                     'Operator %s does not support string values!' % m.group('op'))
 4461             comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
 4462             quote = m.group('quote')
 4463             if quote is not None:
 4464                 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
 4465         else:
 4466             try:
 4467                 comparison_value = int(m.group('intval'))
 4468             except ValueError:
 4469                 comparison_value = parse_filesize(m.group('intval'))
 4470                 if comparison_value is None:
 4471                     comparison_value = parse_filesize(m.group('intval') + 'B')
 4472                 if comparison_value is None:
 4473                     raise ValueError(
 4474                         'Invalid integer value %r in filter part %r' % (
 4475                             m.group('intval'), filter_part))
 4476         if actual_value is None:
 4477             return m.group('none_inclusive')
 4478         return op(actual_value, comparison_value)
 4479 
 4480     UNARY_OPERATORS = {
 4481         '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
 4482         '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
 4483     }
 4484     operator_rex = re.compile(r'''(?x)\s*
 4485         (?P<op>%s)\s*(?P<key>[a-z_]+)
 4486         \s*$
 4487         ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
 4488     m = operator_rex.search(filter_part)
 4489     if m:
 4490         op = UNARY_OPERATORS[m.group('op')]
 4491         actual_value = dct.get(m.group('key'))
 4492         return op(actual_value)
 4493 
 4494     raise ValueError('Invalid filter part %r' % filter_part)
 4495 
 4496 
 4497 def match_str(filter_str, dct):
 4498     """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
 4499 
 4500     return all(
 4501         _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
 4502 
 4503 
 4504 def match_filter_func(filter_str):
 4505     def _match_func(info_dict):
 4506         if match_str(filter_str, info_dict):
 4507             return None
 4508         else:
 4509             video_title = info_dict.get('title', info_dict.get('id', 'video'))
 4510             return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
 4511     return _match_func
 4512 
 4513 
 4514 def parse_dfxp_time_expr(time_expr):
 4515     if not time_expr:
 4516         return
 4517 
 4518     mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
 4519     if mobj:
 4520         return float(mobj.group('time_offset'))
 4521 
 4522     mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
 4523     if mobj:
 4524         return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
 4525 
 4526 
 4527 def srt_subtitles_timecode(seconds):
 4528     return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
 4529 
 4530 
 4531 def dfxp2srt(dfxp_data):
 4532     '''
 4533     @param dfxp_data A bytes-like object containing DFXP data
 4534     @returns A unicode object containing converted SRT data
 4535     '''
 4536     LEGACY_NAMESPACES = (
 4537         (b'http://www.w3.org/ns/ttml', [
 4538             b'http://www.w3.org/2004/11/ttaf1',
 4539             b'http://www.w3.org/2006/04/ttaf1',
 4540             b'http://www.w3.org/2006/10/ttaf1',
 4541         ]),
 4542         (b'http://www.w3.org/ns/ttml#styling', [
 4543             b'http://www.w3.org/ns/ttml#style',
 4544         ]),
 4545     )
 4546 
 4547     SUPPORTED_STYLING = [
 4548         'color',
 4549         'fontFamily',
 4550         'fontSize',
 4551         'fontStyle',
 4552         'fontWeight',
 4553         'textDecoration'
 4554     ]
 4555 
 4556     _x = functools.partial(xpath_with_ns, ns_map={
 4557         'xml': 'http://www.w3.org/XML/1998/namespace',
 4558         'ttml': 'http://www.w3.org/ns/ttml',
 4559         'tts': 'http://www.w3.org/ns/ttml#styling',
 4560     })
 4561 
 4562     styles = {}
 4563     default_style = {}
 4564 
 4565     class TTMLPElementParser(object):
 4566         _out = ''
 4567         _unclosed_elements = []
 4568         _applied_styles = []
 4569 
 4570         def start(self, tag, attrib):
 4571             if tag in (_x('ttml:br'), 'br'):
 4572                 self._out += '\n'
 4573             else:
 4574                 unclosed_elements = []
 4575                 style = {}
 4576                 element_style_id = attrib.get('style')
 4577                 if default_style:
 4578                     style.update(default_style)
 4579                 if element_style_id:
 4580                     style.update(styles.get(element_style_id, {}))
 4581                 for prop in SUPPORTED_STYLING:
 4582                     prop_val = attrib.get(_x('tts:' + prop))
 4583                     if prop_val:
 4584                         style[prop] = prop_val
 4585                 if style:
 4586                     font = ''
 4587                     for k, v in sorted(style.items()):
 4588                         if self._applied_styles and self._applied_styles[-1].get(k) == v:
 4589                             continue
 4590                         if k == 'color':
 4591                             font += ' color="%s"' % v
 4592                         elif k == 'fontSize':
 4593                             font += ' size="%s"' % v
 4594                         elif k == 'fontFamily':
 4595                             font += ' face="%s"' % v
 4596                         elif k == 'fontWeight' and v == 'bold':
 4597                             self._out += '<b>'
 4598                             unclosed_elements.append('b')
 4599                         elif k == 'fontStyle' and v == 'italic':
 4600                             self._out += '<i>'
 4601                             unclosed_elements.append('i')
 4602                         elif k == 'textDecoration' and v == 'underline':
 4603                             self._out += '<u>'
 4604                             unclosed_elements.append('u')
 4605                     if font:
 4606                         self._out += '<font' + font + '>'
 4607                         unclosed_elements.append('font')
 4608                     applied_style = {}
 4609                     if self._applied_styles:
 4610                         applied_style.update(self._applied_styles[-1])
 4611                     applied_style.update(style)
 4612                     self._applied_styles.append(applied_style)
 4613                 self._unclosed_elements.append(unclosed_elements)
 4614 
 4615         def end(self, tag):
 4616             if tag not in (_x('ttml:br'), 'br'):
 4617                 unclosed_elements = self._unclosed_elements.pop()
 4618                 for element in reversed(unclosed_elements):
 4619                     self._out += '</%s>' % element
 4620                 if unclosed_elements and self._applied_styles:
 4621                     self._applied_styles.pop()
 4622 
 4623         def data(self, data):
 4624             self._out += data
 4625 
 4626         def close(self):
 4627             return self._out.strip()
 4628 
 4629     def parse_node(node):
 4630         target = TTMLPElementParser()
 4631         parser = xml.etree.ElementTree.XMLParser(target=target)
 4632         parser.feed(xml.etree.ElementTree.tostring(node))
 4633         return parser.close()
 4634 
 4635     for k, v in LEGACY_NAMESPACES:
 4636         for ns in v:
 4637             dfxp_data = dfxp_data.replace(ns, k)
 4638 
 4639     dfxp = compat_etree_fromstring(dfxp_data)
 4640     out = []
 4641     paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
 4642 
 4643     if not paras:
 4644         raise ValueError('Invalid dfxp/TTML subtitle')
 4645 
 4646     repeat = False
 4647     while True:
 4648         for style in dfxp.findall(_x('.//ttml:style')):
 4649             style_id = style.get('id') or style.get(_x('xml:id'))
 4650             if not style_id:
 4651                 continue
 4652             parent_style_id = style.get('style')
 4653             if parent_style_id:
 4654                 if parent_style_id not in styles:
 4655                     repeat = True
 4656                     continue
 4657                 styles[style_id] = styles[parent_style_id].copy()
 4658             for prop in SUPPORTED_STYLING:
 4659                 prop_val = style.get(_x('tts:' + prop))
 4660                 if prop_val:
 4661                     styles.setdefault(style_id, {})[prop] = prop_val
 4662         if repeat:
 4663             repeat = False
 4664         else:
 4665             break
 4666 
 4667     for p in ('body', 'div'):
 4668         ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
 4669         if ele is None:
 4670             continue
 4671         style = styles.get(ele.get('style'))
 4672         if not style:
 4673             continue
 4674         default_style.update(style)
 4675 
 4676     for para, index in zip(paras, itertools.count(1)):
 4677         begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
 4678         end_time = parse_dfxp_time_expr(para.attrib.get('end'))
 4679         dur = parse_dfxp_time_expr(para.attrib.get('dur'))
 4680         if begin_time is None:
 4681             continue
 4682         if not end_time:
 4683             if not dur:
 4684                 continue
 4685             end_time = begin_time + dur
 4686         out.append('%d\n%s --> %s\n%s\n\n' % (
 4687             index,
 4688             srt_subtitles_timecode(begin_time),
 4689             srt_subtitles_timecode(end_time),
 4690             parse_node(para)))
 4691 
 4692     return ''.join(out)
 4693 
 4694 
 4695 def cli_option(params, command_option, param):
 4696     param = params.get(param)
 4697     if param:
 4698         param = compat_str(param)
 4699     return [command_option, param] if param is not None else []
 4700 
 4701 
 4702 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
 4703     param = params.get(param)
 4704     if param is None:
 4705         return []
 4706     assert isinstance(param, bool)
 4707     if separator:
 4708         return [command_option + separator + (true_value if param else false_value)]
 4709     return [command_option, true_value if param else false_value]
 4710 
 4711 
 4712 def cli_valueless_option(params, command_option, param, expected_value=True):
 4713     param = params.get(param)
 4714     return [command_option] if param == expected_value else []
 4715 
 4716 
 4717 def cli_configuration_args(params, param, default=[]):
 4718     ex_args = params.get(param)
 4719     if ex_args is None:
 4720         return default
 4721     assert isinstance(ex_args, list)
 4722     return ex_args
 4723 
 4724 
 4725 class ISO639Utils(object):
 4726     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 4727     _lang_map = {
 4728         'aa': 'aar',
 4729         'ab': 'abk',
 4730         'ae': 'ave',
 4731         'af': 'afr',
 4732         'ak': 'aka',
 4733         'am': 'amh',
 4734         'an': 'arg',
 4735         'ar': 'ara',
 4736         'as': 'asm',
 4737         'av': 'ava',
 4738         'ay': 'aym',
 4739         'az': 'aze',
 4740         'ba': 'bak',
 4741         'be': 'bel',
 4742         'bg': 'bul',
 4743         'bh': 'bih',
 4744         'bi': 'bis',
 4745         'bm': 'bam',
 4746         'bn': 'ben',
 4747         'bo': 'bod',
 4748         'br': 'bre',
 4749         'bs': 'bos',
 4750         'ca': 'cat',
 4751         'ce': 'che',
 4752         'ch': 'cha',
 4753         'co': 'cos',
 4754         'cr': 'cre',
 4755         'cs': 'ces',
 4756         'cu': 'chu',
 4757         'cv': 'chv',
 4758         'cy': 'cym',
 4759         'da': 'dan',
 4760         'de': 'deu',
 4761         'dv': 'div',
 4762         'dz': 'dzo',
 4763         'ee': 'ewe',
 4764         'el': 'ell',
 4765         'en': 'eng',
 4766         'eo': 'epo',
 4767         'es': 'spa',
 4768         'et': 'est',
 4769         'eu': 'eus',
 4770         'fa': 'fas',
 4771         'ff': 'ful',
 4772         'fi': 'fin',
 4773         'fj': 'fij',
 4774         'fo': 'fao',
 4775         'fr': 'fra',
 4776         'fy': 'fry',
 4777         'ga': 'gle',
 4778         'gd': 'gla',
 4779         'gl': 'glg',
 4780         'gn': 'grn',
 4781         'gu': 'guj',
 4782         'gv': 'glv',
 4783         'ha': 'hau',
 4784         'he': 'heb',
 4785         'iw': 'heb',  # Replaced by he in 1989 revision
 4786         'hi': 'hin',
 4787         'ho': 'hmo',
 4788         'hr': 'hrv',
 4789         'ht': 'hat',
 4790         'hu': 'hun',
 4791         'hy': 'hye',
 4792         'hz': 'her',
 4793         'ia': 'ina',
 4794         'id': 'ind',
 4795         'in': 'ind',  # Replaced by id in 1989 revision
 4796         'ie': 'ile',
 4797         'ig': 'ibo',
 4798         'ii': 'iii',
 4799         'ik': 'ipk',
 4800         'io': 'ido',
 4801         'is': 'isl',
 4802         'it': 'ita',
 4803         'iu': 'iku',
 4804         'ja': 'jpn',
 4805         'jv': 'jav',
 4806         'ka': 'kat',
 4807         'kg': 'kon',
 4808         'ki': 'kik',
 4809         'kj': 'kua',
 4810         'kk': 'kaz',
 4811         'kl': 'kal',
 4812         'km': 'khm',
 4813         'kn': 'kan',
 4814         'ko': 'kor',
 4815         'kr': 'kau',
 4816         'ks': 'kas',
 4817         'ku': 'kur',
 4818         'kv': 'kom',
 4819         'kw': 'cor',
 4820         'ky': 'kir',
 4821         'la': 'lat',
 4822         'lb': 'ltz',
 4823         'lg': 'lug',
 4824         'li': 'lim',
 4825         'ln': 'lin',
 4826         'lo': 'lao',
 4827         'lt': 'lit',
 4828         'lu': 'lub',
 4829         'lv': 'lav',
 4830         'mg': 'mlg',
 4831         'mh': 'mah',
 4832         'mi': 'mri',
 4833         'mk': 'mkd',
 4834         'ml': 'mal',
 4835         'mn': 'mon',
 4836         'mr': 'mar',
 4837         'ms': 'msa',
 4838         'mt': 'mlt',
 4839         'my': 'mya',
 4840         'na': 'nau',
 4841         'nb': 'nob',
 4842         'nd': 'nde',
 4843         'ne': 'nep',
 4844         'ng': 'ndo',
 4845         'nl': 'nld',
 4846         'nn': 'nno',
 4847         'no': 'nor',
 4848         'nr': 'nbl',
 4849         'nv': 'nav',
 4850         'ny': 'nya',
 4851         'oc': 'oci',
 4852         'oj': 'oji',
 4853         'om': 'orm',
 4854         'or': 'ori',
 4855         'os': 'oss',
 4856         'pa': 'pan',
 4857         'pi': 'pli',
 4858         'pl': 'pol',
 4859         'ps': 'pus',
 4860         'pt': 'por',
 4861         'qu': 'que',
 4862         'rm': 'roh',
 4863         'rn': 'run',
 4864         'ro': 'ron',
 4865         'ru': 'rus',
 4866         'rw': 'kin',
 4867         'sa': 'san',
 4868         'sc': 'srd',
 4869         'sd': 'snd',
 4870         'se': 'sme',
 4871         'sg': 'sag',
 4872         'si': 'sin',
 4873         'sk': 'slk',
 4874         'sl': 'slv',
 4875         'sm': 'smo',
 4876         'sn': 'sna',
 4877         'so': 'som',
 4878         'sq': 'sqi',
 4879         'sr': 'srp',
 4880         'ss': 'ssw',
 4881         'st': 'sot',
 4882         'su': 'sun',
 4883         'sv': 'swe',
 4884         'sw': 'swa',
 4885         'ta': 'tam',
 4886         'te': 'tel',
 4887         'tg': 'tgk',
 4888         'th': 'tha',
 4889         'ti': 'tir',
 4890         'tk': 'tuk',
 4891         'tl': 'tgl',
 4892         'tn': 'tsn',
 4893         'to': 'ton',
 4894         'tr': 'tur',
 4895         'ts': 'tso',
 4896         'tt': 'tat',
 4897         'tw': 'twi',
 4898         'ty': 'tah',
 4899         'ug': 'uig',
 4900         'uk': 'ukr',
 4901         'ur': 'urd',
 4902         'uz': 'uzb',
 4903         've': 'ven',
 4904         'vi': 'vie',
 4905         'vo': 'vol',
 4906         'wa': 'wln',
 4907         'wo': 'wol',
 4908         'xh': 'xho',
 4909         'yi': 'yid',
 4910         'ji': 'yid',  # Replaced by yi in 1989 revision
 4911         'yo': 'yor',
 4912         'za': 'zha',
 4913         'zh': 'zho',
 4914         'zu': 'zul',
 4915     }
 4916 
 4917     @classmethod
 4918     def short2long(cls, code):
 4919         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 4920         return cls._lang_map.get(code[:2])
 4921 
 4922     @classmethod
 4923     def long2short(cls, code):
 4924         """Convert language code from ISO 639-2/T to ISO 639-1"""
 4925         for short_name, long_name in cls._lang_map.items():
 4926             if long_name == code:
 4927                 return short_name
 4928 
 4929 
 4930 class ISO3166Utils(object):
 4931     # From http://data.okfn.org/data/core/country-list
 4932     _country_map = {
 4933         'AF': 'Afghanistan',
 4934         'AX': 'Åland Islands',
 4935         'AL': 'Albania',
 4936         'DZ': 'Algeria',
 4937         'AS': 'American Samoa',
 4938         'AD': 'Andorra',
 4939         'AO': 'Angola',
 4940         'AI': 'Anguilla',
 4941         'AQ': 'Antarctica',
 4942         'AG': 'Antigua and Barbuda',
 4943         'AR': 'Argentina',
 4944         'AM': 'Armenia',
 4945         'AW': 'Aruba',
 4946         'AU': 'Australia',
 4947         'AT': 'Austria',
 4948         'AZ': 'Azerbaijan',
 4949         'BS': 'Bahamas',
 4950         'BH': 'Bahrain',
 4951         'BD': 'Bangladesh',
 4952         'BB': 'Barbados',
 4953         'BY': 'Belarus',
 4954         'BE': 'Belgium',
 4955         'BZ': 'Belize',
 4956         'BJ': 'Benin',
 4957         'BM': 'Bermuda',
 4958         'BT': 'Bhutan',
 4959         'BO': 'Bolivia, Plurinational State of',
 4960         'BQ': 'Bonaire, Sint Eustatius and Saba',
 4961         'BA': 'Bosnia and Herzegovina',
 4962         'BW': 'Botswana',
 4963         'BV': 'Bouvet Island',
 4964         'BR': 'Brazil',
 4965         'IO': 'British Indian Ocean Territory',
 4966         'BN': 'Brunei Darussalam',
 4967         'BG': 'Bulgaria',
 4968         'BF': 'Burkina Faso',
 4969         'BI': 'Burundi',
 4970         'KH': 'Cambodia',
 4971         'CM': 'Cameroon',
 4972         'CA': 'Canada',
 4973         'CV': 'Cape Verde',
 4974         'KY': 'Cayman Islands',
 4975         'CF': 'Central African Republic',
 4976         'TD': 'Chad',
 4977         'CL': 'Chile',
 4978         'CN': 'China',
 4979         'CX': 'Christmas Island',
 4980         'CC': 'Cocos (Keeling) Islands',
 4981         'CO': 'Colombia',
 4982         'KM': 'Comoros',
 4983         'CG': 'Congo',
 4984         'CD': 'Congo, the Democratic Republic of the',
 4985         'CK': 'Cook Islands',
 4986         'CR': 'Costa Rica',
 4987         'CI': 'Côte d\'Ivoire',
 4988         'HR': 'Croatia',
 4989         'CU': 'Cuba',
 4990         'CW': 'Curaçao',
 4991         'CY': 'Cyprus',
 4992         'CZ': 'Czech Republic',
 4993         'DK': 'Denmark',
 4994         'DJ': 'Djibouti',
 4995         'DM': 'Dominica',
 4996         'DO': 'Dominican Republic',
 4997         'EC': 'Ecuador',
 4998         'EG': 'Egypt',
 4999         'SV': 'El Salvador',
 5000         'GQ': 'Equatorial Guinea',
 5001         'ER': 'Eritrea',
 5002         'EE': 'Estonia',
 5003         'ET': 'Ethiopia',
 5004         'FK': 'Falkland Islands (Malvinas)',
 5005         'FO': 'Faroe Islands',
 5006         'FJ': 'Fiji',
 5007         'FI': 'Finland',
 5008         'FR': 'France',
 5009         'GF': 'French Guiana',
 5010         'PF': 'French Polynesia',
 5011         'TF': 'French Southern Territories',
 5012         'GA': 'Gabon',
 5013         'GM': 'Gambia',
 5014         'GE': 'Georgia',
 5015         'DE': 'Germany',
 5016         'GH': 'Ghana',
 5017         'GI': 'Gibraltar',
 5018         'GR': 'Greece',
 5019         'GL': 'Greenland',
 5020         'GD': 'Grenada',
 5021         'GP': 'Guadeloupe',
 5022         'GU': 'Guam',
 5023         'GT': 'Guatemala',
 5024         'GG': 'Guernsey',
 5025         'GN': 'Guinea',
 5026         'GW': 'Guinea-Bissau',
 5027         'GY': 'Guyana',
 5028         'HT': 'Haiti',
 5029         'HM': 'Heard Island and McDonald Islands',
 5030         'VA': 'Holy See (Vatican City State)',
 5031         'HN': 'Honduras',
 5032         'HK': 'Hong Kong',
 5033         'HU': 'Hungary',
 5034         'IS': 'Iceland',
 5035         'IN': 'India',
 5036         'ID': 'Indonesia',
 5037         'IR': 'Iran, Islamic Republic of',
 5038         'IQ': 'Iraq',
 5039         'IE': 'Ireland',
 5040         'IM': 'Isle of Man',
 5041         'IL': 'Israel',
 5042         'IT': 'Italy',
 5043         'JM': 'Jamaica',
 5044         'JP': 'Japan',
 5045         'JE': 'Jersey',
 5046         'JO': 'Jordan',
 5047         'KZ': 'Kazakhstan',
 5048         'KE': 'Kenya',
 5049         'KI': 'Kiribati',
 5050         'KP': 'Korea, Democratic People\'s Republic of',
 5051         'KR': 'Korea, Republic of',
 5052         'KW': 'Kuwait',
 5053         'KG': 'Kyrgyzstan',
 5054         'LA': 'Lao People\'s Democratic Republic',
 5055         'LV': 'Latvia',
 5056         'LB': 'Lebanon',
 5057         'LS': 'Lesotho',
 5058         'LR': 'Liberia',
 5059         'LY': 'Libya',
 5060         'LI': 'Liechtenstein',
 5061         'LT': 'Lithuania',
 5062         'LU': 'Luxembourg',
 5063         'MO': 'Macao',
 5064         'MK': 'Macedonia, the Former Yugoslav Republic of',
 5065         'MG': 'Madagascar',
 5066         'MW': 'Malawi',
 5067         'MY': 'Malaysia',
 5068         'MV': 'Maldives',
 5069         'ML': 'Mali',
 5070         'MT': 'Malta',
 5071         'MH': 'Marshall Islands',
 5072         'MQ': 'Martinique',
 5073         'MR': 'Mauritania',
 5074         'MU': 'Mauritius',
 5075         'YT': 'Mayotte',
 5076         'MX': 'Mexico',
 5077         'FM': 'Micronesia, Federated States of',
 5078         'MD': 'Moldova, Republic of',
 5079         'MC': 'Monaco',
 5080         'MN': 'Mongolia',
 5081         'ME': 'Montenegro',
 5082         'MS': 'Montserrat',
 5083         'MA': 'Morocco',
 5084         'MZ': 'Mozambique',
 5085         'MM': 'Myanmar',
 5086         'NA': 'Namibia',
 5087         'NR': 'Nauru',
 5088         'NP': 'Nepal',
 5089         'NL': 'Netherlands',
 5090         'NC': 'New Caledonia',
 5091         'NZ': 'New Zealand',
 5092         'NI': 'Nicaragua',
 5093         'NE': 'Niger',
 5094         'NG': 'Nigeria',
 5095         'NU': 'Niue',
 5096         'NF': 'Norfolk Island',
 5097         'MP': 'Northern Mariana Islands',
 5098         'NO': 'Norway',
 5099         'OM': 'Oman',
 5100         'PK': 'Pakistan',
 5101         'PW': 'Palau',
 5102         'PS': 'Palestine, State of',
 5103         'PA': 'Panama',
 5104         'PG': 'Papua New Guinea',
 5105         'PY': 'Paraguay',
 5106         'PE': 'Peru',
 5107         'PH': 'Philippines',
 5108         'PN': 'Pitcairn',
 5109         'PL': 'Poland',
 5110         'PT': 'Portugal',
 5111         'PR': 'Puerto Rico',
 5112         'QA': 'Qatar',
 5113         'RE': 'Réunion',
 5114         'RO': 'Romania',
 5115         'RU': 'Russian Federation',
 5116         'RW': 'Rwanda',
 5117         'BL': 'Saint Barthélemy',
 5118         'SH': 'Saint Helena, Ascension and Tristan da Cunha',
 5119         'KN': 'Saint Kitts and Nevis',
 5120         'LC': 'Saint Lucia',
 5121         'MF': 'Saint Martin (French part)',
 5122         'PM': 'Saint Pierre and Miquelon',
 5123         'VC': 'Saint Vincent and the Grenadines',
 5124         'WS': 'Samoa',
 5125         'SM': 'San Marino',
 5126         'ST': 'Sao Tome and Principe',
 5127         'SA': 'Saudi Arabia',
 5128         'SN': 'Senegal',
 5129         'RS': 'Serbia',
 5130         'SC': 'Seychelles',
 5131         'SL': 'Sierra Leone',
 5132         'SG': 'Singapore',
 5133         'SX': 'Sint Maarten (Dutch part)',
 5134         'SK': 'Slovakia',
 5135         'SI': 'Slovenia',
 5136         'SB': 'Solomon Islands',
 5137         'SO': 'Somalia',
 5138         'ZA': 'South Africa',
 5139         'GS': 'South Georgia and the South Sandwich Islands',
 5140         'SS': 'South Sudan',
 5141         'ES': 'Spain',
 5142         'LK': 'Sri Lanka',
 5143         'SD': 'Sudan',
 5144         'SR': 'Suriname',
 5145         'SJ': 'Svalbard and Jan Mayen',
 5146         'SZ': 'Swaziland',
 5147         'SE': 'Sweden',
 5148         'CH': 'Switzerland',
 5149         'SY': 'Syrian Arab Republic',
 5150         'TW': 'Taiwan, Province of China',
 5151         'TJ': 'Tajikistan',
 5152         'TZ': 'Tanzania, United Republic of',
 5153         'TH': 'Thailand',
 5154         'TL': 'Timor-Leste',
 5155         'TG': 'Togo',
 5156         'TK': 'Tokelau',
 5157         'TO': 'Tonga',
 5158         'TT': 'Trinidad and Tobago',
 5159         'TN': 'Tunisia',
 5160         'TR': 'Turkey',
 5161         'TM': 'Turkmenistan',
 5162         'TC': 'Turks and Caicos Islands',
 5163         'TV': 'Tuvalu',
 5164         'UG': 'Uganda',
 5165         'UA': 'Ukraine',
 5166         'AE': 'United Arab Emirates',
 5167         'GB': 'United Kingdom',
 5168         'US': 'United States',
 5169         'UM': 'United States Minor Outlying Islands',
 5170         'UY': 'Uruguay',
 5171         'UZ': 'Uzbekistan',
 5172         'VU': 'Vanuatu',
 5173         'VE': 'Venezuela, Bolivarian Republic of',
 5174         'VN': 'Viet Nam',
 5175         'VG': 'Virgin Islands, British',
 5176         'VI': 'Virgin Islands, U.S.',
 5177         'WF': 'Wallis and Futuna',
 5178         'EH': 'Western Sahara',
 5179         'YE': 'Yemen',
 5180         'ZM': 'Zambia',
 5181         'ZW': 'Zimbabwe',
 5182     }
 5183 
 5184     @classmethod
 5185     def short2full(cls, code):
 5186         """Convert an ISO 3166-2 country code to the corresponding full name"""
 5187         return cls._country_map.get(code.upper())
 5188 
 5189 
 5190 class GeoUtils(object):
 5191     # Major IPv4 address blocks per country
 5192     _country_ip_map = {
 5193         'AD': '46.172.224.0/19',
 5194         'AE': '94.200.0.0/13',
 5195         'AF': '149.54.0.0/17',
 5196         'AG': '209.59.64.0/18',
 5197         'AI': '204.14.248.0/21',
 5198         'AL': '46.99.0.0/16',
 5199         'AM': '46.70.0.0/15',
 5200         'AO': '105.168.0.0/13',
 5201         'AP': '182.50.184.0/21',
 5202         'AQ': '23.154.160.0/24',
 5203         'AR': '181.0.0.0/12',
 5204         'AS': '202.70.112.0/20',
 5205         'AT': '77.116.0.0/14',
 5206         'AU': '1.128.0.0/11',
 5207         'AW': '181.41.0.0/18',
 5208         'AX': '185.217.4.0/22',
 5209         'AZ': '5.197.0.0/16',
 5210         'BA': '31.176.128.0/17',
 5211         'BB': '65.48.128.0/17',
 5212         'BD': '114.130.0.0/16',
 5213         'BE': '57.0.0.0/8',
 5214         'BF': '102.178.0.0/15',
 5215         'BG': '95.42.0.0/15',
 5216         'BH': '37.131.0.0/17',
 5217         'BI': '154.117.192.0/18',
 5218         'BJ': '137.255.0.0/16',
 5219         'BL': '185.212.72.0/23',
 5220         'BM': '196.12.64.0/18',
 5221         'BN': '156.31.0.0/16',
 5222         'BO': '161.56.0.0/16',
 5223         'BQ': '161.0.80.0/20',
 5224         'BR': '191.128.0.0/12',
 5225         'BS': '24.51.64.0/18',
 5226         'BT': '119.2.96.0/19',
 5227         'BW': '168.167.0.0/16',
 5228         'BY': '178.120.0.0/13',
 5229         'BZ': '179.42.192.0/18',
 5230         'CA': '99.224.0.0/11',
 5231         'CD': '41.243.0.0/16',
 5232         'CF': '197.242.176.0/21',
 5233         'CG': '160.113.0.0/16',
 5234         'CH': '85.0.0.0/13',
 5235         'CI': '102.136.0.0/14',
 5236         'CK': '202.65.32.0/19',
 5237         'CL': '152.172.0.0/14',
 5238         'CM': '102.244.0.0/14',
 5239         'CN': '36.128.0.0/10',
 5240         'CO': '181.240.0.0/12',
 5241         'CR': '201.192.0.0/12',
 5242         'CU': '152.206.0.0/15',
 5243         'CV': '165.90.96.0/19',
 5244         'CW': '190.88.128.0/17',
 5245         'CY': '31.153.0.0/16',
 5246         'CZ': '88.100.0.0/14',
 5247         'DE': '53.0.0.0/8',
 5248         'DJ': '197.241.0.0/17',
 5249         'DK': '87.48.0.0/12',
 5250         'DM': '192.243.48.0/20',
 5251         'DO': '152.166.0.0/15',
 5252         'DZ': '41.96.0.0/12',
 5253         'EC': '186.68.0.0/15',
 5254         'EE': '90.190.0.0/15',
 5255         'EG': '156.160.0.0/11',
 5256         'ER': '196.200.96.0/20',
 5257         'ES': '88.0.0.0/11',
 5258         'ET': '196.188.0.0/14',
 5259         'EU': '2.16.0.0/13',
 5260         'FI': '91.152.0.0/13',
 5261         'FJ': '144.120.0.0/16',
 5262         'FK': '80.73.208.0/21',
 5263         'FM': '119.252.112.0/20',
 5264         'FO': '88.85.32.0/19',
 5265         'FR': '90.0.0.0/9',
 5266         'GA': '41.158.0.0/15',
 5267         'GB': '25.0.0.0/8',
 5268         'GD': '74.122.88.0/21',
 5269         'GE': '31.146.0.0/16',
 5270         'GF': '161.22.64.0/18',
 5271         'GG': '62.68.160.0/19',
 5272         'GH': '154.160.0.0/12',
 5273         'GI': '95.164.0.0/16',
 5274         'GL': '88.83.0.0/19',
 5275         'GM': '160.182.0.0/15',
 5276         'GN': '197.149.192.0/18',
 5277         'GP': '104.250.0.0/19',
 5278         'GQ': '105.235.224.0/20',
 5279         'GR': '94.64.0.0/13',
 5280         'GT': '168.234.0.0/16',
 5281         'GU': '168.123.0.0/16',
 5282         'GW': '197.214.80.0/20',
 5283         'GY': '181.41.64.0/18',
 5284         'HK': '113.252.0.0/14',
 5285         'HN': '181.210.0.0/16',
 5286         'HR': '93.136.0.0/13',
 5287         'HT': '148.102.128.0/17',
 5288         'HU': '84.0.0.0/14',
 5289         'ID': '39.192.0.0/10',
 5290         'IE': '87.32.0.0/12',
 5291         'IL': '79.176.0.0/13',
 5292         'IM': '5.62.80.0/20',
 5293         'IN': '117.192.0.0/10',
 5294         'IO': '203.83.48.0/21',
 5295         'IQ': '37.236.0.0/14',
 5296         'IR': '2.176.0.0/12',
 5297         'IS': '82.221.0.0/16',
 5298         'IT': '79.0.0.0/10',
 5299         'JE': '87.244.64.0/18',
 5300         'JM': '72.27.0.0/17',
 5301         'JO': '176.29.0.0/16',
 5302         'JP': '133.0.0.0/8',
 5303         'KE': '105.48.0.0/12',
 5304         'KG': '158.181.128.0/17',
 5305         'KH': '36.37.128.0/17',
 5306         'KI': '103.25.140.0/22',
 5307         'KM': '197.255.224.0/20',
 5308         'KN': '198.167.192.0/19',
 5309         'KP': '175.45.176.0/22',
 5310         'KR': '175.192.0.0/10',
 5311         'KW': '37.36.0.0/14',
 5312         'KY': '64.96.0.0/15',
 5313         'KZ': '2.72.0.0/13',
 5314         'LA': '115.84.64.0/18',
 5315         'LB': '178.135.0.0/16',
 5316         'LC': '24.92.144.0/20',
 5317         'LI': '82.117.0.0/19',
 5318         'LK': '112.134.0.0/15',
 5319         'LR': '102.183.0.0/16',
 5320         'LS': '129.232.0.0/17',
 5321         'LT': '78.56.0.0/13',
 5322         'LU': '188.42.0.0/16',
 5323         'LV': '46.109.0.0/16',
 5324         'LY': '41.252.0.0/14',
 5325         'MA': '105.128.0.0/11',
 5326         'MC': '88.209.64.0/18',
 5327         'MD': '37.246.0.0/16',
 5328         'ME': '178.175.0.0/17',
 5329         'MF': '74.112.232.0/21',
 5330         'MG': '154.126.0.0/17',
 5331         'MH': '117.103.88.0/21',
 5332         'MK': '77.28.0.0/15',
 5333         'ML': '154.118.128.0/18',
 5334         'MM': '37.111.0.0/17',
 5335         'MN': '49.0.128.0/17',
 5336         'MO': '60.246.0.0/16',
 5337         'MP': '202.88.64.0/20',
 5338         'MQ': '109.203.224.0/19',
 5339         'MR': '41.188.64.0/18',
 5340         'MS': '208.90.112.0/22',
 5341         'MT': '46.11.0.0/16',
 5342         'MU': '105.16.0.0/12',
 5343         'MV': '27.114.128.0/18',
 5344         'MW': '102.70.0.0/15',
 5345         'MX': '187.192.0.0/11',
 5346         'MY': '175.136.0.0/13',
 5347         'MZ': '197.218.0.0/15',
 5348         'NA': '41.182.0.0/16',
 5349         'NC': '101.101.0.0/18',
 5350         'NE': '197.214.0.0/18',
 5351         'NF': '203.17.240.0/22',
 5352         'NG': '105.112.0.0/12',
 5353         'NI': '186.76.0.0/15',
 5354         'NL': '145.96.0.0/11',
 5355         'NO': '84.208.0.0/13',
 5356         'NP': '36.252.0.0/15',
 5357         'NR': '203.98.224.0/19',
 5358         'NU': '49.156.48.0/22',
 5359         'NZ': '49.224.0.0/14',
 5360         'OM': '5.36.0.0/15',
 5361         'PA': '186.72.0.0/15',
 5362         'PE': '186.160.0.0/14',
 5363         'PF': '123.50.64.0/18',
 5364         'PG': '124.240.192.0/19',
 5365         'PH': '49.144.0.0/13',
 5366         'PK': '39.32.0.0/11',
 5367         'PL': '83.0.0.0/11',
 5368         'PM': '70.36.0.0/20',
 5369         'PR': '66.50.0.0/16',
 5370         'PS': '188.161.0.0/16',
 5371         'PT': '85.240.0.0/13',
 5372         'PW': '202.124.224.0/20',
 5373         'PY': '181.120.0.0/14',
 5374         'QA': '37.210.0.0/15',
 5375         'RE': '102.35.0.0/16',
 5376         'RO': '79.112.0.0/13',
 5377         'RS': '93.86.0.0/15',
 5378         'RU': '5.136.0.0/13',
 5379         'RW': '41.186.0.0/16',
 5380         'SA': '188.48.0.0/13',
 5381         'SB': '202.1.160.0/19',
 5382         'SC': '154.192.0.0/11',
 5383         'SD': '102.120.0.0/13',
 5384         'SE': '78.64.0.0/12',
 5385         'SG': '8.128.0.0/10',
 5386         'SI': '188.196.0.0/14',
 5387         'SK': '78.98.0.0/15',
 5388         'SL': '102.143.0.0/17',
 5389         'SM': '89.186.32.0/19',
 5390         'SN': '41.82.0.0/15',
 5391         'SO': '154.115.192.0/18',
 5392         'SR': '186.179.128.0/17',
 5393         'SS': '105.235.208.0/21',
 5394         'ST': '197.159.160.0/19',
 5395         'SV': '168.243.0.0/16',
 5396         'SX': '190.102.0.0/20',
 5397         'SY': '5.0.0.0/16',
 5398         'SZ': '41.84.224.0/19',
 5399         'TC': '65.255.48.0/20',
 5400         'TD': '154.68.128.0/19',
 5401         'TG': '196.168.0.0/14',
 5402         'TH': '171.96.0.0/13',
 5403         'TJ': '85.9.128.0/18',
 5404         'TK': '27.96.24.0/21',
 5405         'TL': '180.189.160.0/20',
 5406         'TM': '95.85.96.0/19',
 5407         'TN': '197.0.0.0/11',
 5408         'TO': '175.176.144.0/21',
 5409         'TR': '78.160.0.0/11',
 5410         'TT': '186.44.0.0/15',
 5411         'TV': '202.2.96.0/19',
 5412         'TW': '120.96.0.0/11',
 5413         'TZ': '156.156.0.0/14',
 5414         'UA': '37.52.0.0/14',
 5415         'UG': '102.80.0.0/13',
 5416         'US': '6.0.0.0/8',
 5417         'UY': '167.56.0.0/13',
 5418         'UZ': '84.54.64.0/18',
 5419         'VA': '212.77.0.0/19',
 5420         'VC': '207.191.240.0/21',
 5421         'VE': '186.88.0.0/13',
 5422         'VG': '66.81.192.0/20',
 5423         'VI': '146.226.0.0/16',
 5424         'VN': '14.160.0.0/11',
 5425         'VU': '202.80.32.0/20',
 5426         'WF': '117.20.32.0/21',
 5427         'WS': '202.4.32.0/19',
 5428         'YE': '134.35.0.0/16',
 5429         'YT': '41.242.116.0/22',
 5430         'ZA': '41.0.0.0/11',
 5431         'ZM': '102.144.0.0/13',
 5432         'ZW': '102.177.192.0/18',
 5433     }
 5434 
 5435     @classmethod
 5436     def random_ipv4(cls, code_or_block):
 5437         if len(code_or_block) == 2:
 5438             block = cls._country_ip_map.get(code_or_block.upper())
 5439             if not block:
 5440                 return None
 5441         else:
 5442             block = code_or_block
 5443         addr, preflen = block.split('/')
 5444         addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
 5445         addr_max = addr_min | (0xffffffff >> int(preflen))
 5446         return compat_str(socket.inet_ntoa(
 5447             compat_struct_pack('!L', random.randint(addr_min, addr_max))))
 5448 
 5449 
 5450 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
 5451     def __init__(self, proxies=None):
 5452         # Set default handlers
 5453         for type in ('http', 'https'):
 5454             setattr(self, '%s_open' % type,
 5455                     lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
 5456                         meth(r, proxy, type))
 5457         compat_urllib_request.ProxyHandler.__init__(self, proxies)
 5458 
 5459     def proxy_open(self, req, proxy, type):
 5460         req_proxy = req.headers.get('Ytdl-request-proxy')
 5461         if req_proxy is not None:
 5462             proxy = req_proxy
 5463             del req.headers['Ytdl-request-proxy']
 5464 
 5465         if proxy == '__noproxy__':
 5466             return None  # No Proxy
 5467         if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
 5468             req.add_header('Ytdl-socks-proxy', proxy)
 5469             # youtube-dl's http/https handlers do wrapping the socket with socks
 5470             return None
 5471         return compat_urllib_request.ProxyHandler.proxy_open(
 5472             self, req, proxy, type)
 5473 
 5474 
 5475 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
 5476 # released into Public Domain
 5477 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
 5478 
 5479 def long_to_bytes(n, blocksize=0):
 5480     """long_to_bytes(n:long, blocksize:int) : string
 5481     Convert a long integer to a byte string.
 5482 
 5483     If optional blocksize is given and greater than zero, pad the front of the
 5484     byte string with binary zeros so that the length is a multiple of
 5485     blocksize.
 5486     """
 5487     # after much testing, this algorithm was deemed to be the fastest
 5488     s = b''
 5489     n = int(n)
 5490     while n > 0:
 5491         s = compat_struct_pack('>I', n & 0xffffffff) + s
 5492         n = n >> 32
 5493     # strip off leading zeros
 5494     for i in range(len(s)):
 5495         if s[i] != b'\000'[0]:
 5496             break
 5497     else:
 5498         # only happens when n == 0
 5499         s = b'\000'
 5500         i = 0
 5501     s = s[i:]
 5502     # add back some pad bytes.  this could be done more efficiently w.r.t. the
 5503     # de-padding being done above, but sigh...
 5504     if blocksize > 0 and len(s) % blocksize:
 5505         s = (blocksize - len(s) % blocksize) * b'\000' + s
 5506     return s
 5507 
 5508 
 5509 def bytes_to_long(s):
 5510     """bytes_to_long(string) : long
 5511     Convert a byte string to a long integer.
 5512 
 5513     This is (essentially) the inverse of long_to_bytes().
 5514     """
 5515     acc = 0
 5516     length = len(s)
 5517     if length % 4:
 5518         extra = (4 - length % 4)
 5519         s = b'\000' * extra + s
 5520         length = length + extra
 5521     for i in range(0, length, 4):
 5522         acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
 5523     return acc
 5524 
 5525 
 5526 def ohdave_rsa_encrypt(data, exponent, modulus):
 5527     '''
 5528     Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
 5529 
 5530     Input:
 5531         data: data to encrypt, bytes-like object
 5532         exponent, modulus: parameter e and N of RSA algorithm, both integer
 5533     Output: hex string of encrypted data
 5534 
 5535     Limitation: supports one block encryption only
 5536     '''
 5537 
 5538     payload = int(binascii.hexlify(data[::-1]), 16)
 5539     encrypted = pow(payload, exponent, modulus)
 5540     return '%x' % encrypted
 5541 
 5542 
 5543 def pkcs1pad(data, length):
 5544     """
 5545     Padding input data with PKCS#1 scheme
 5546 
 5547     @param {int[]} data        input data
 5548     @param {int}   length      target length
 5549     @returns {int[]}           padded data
 5550     """
 5551     if len(data) > length - 11:
 5552         raise ValueError('Input data too long for PKCS#1 padding')
 5553 
 5554     pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
 5555     return [0, 2] + pseudo_random + [0] + data
 5556 
 5557 
 5558 def encode_base_n(num, n, table=None):
 5559     FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 5560     if not table:
 5561         table = FULL_TABLE[:n]
 5562 
 5563     if n > len(table):
 5564         raise ValueError('base %d exceeds table length %d' % (n, len(table)))
 5565 
 5566     if num == 0:
 5567         return table[0]
 5568 
 5569     ret = ''
 5570     while num:
 5571         ret = table[num % n] + ret
 5572         num = num // n
 5573     return ret
 5574 
 5575 
 5576 def decode_packed_codes(code):
 5577     mobj = re.search(PACKED_CODES_RE, code)
 5578     obfuscated_code, base, count, symbols = mobj.groups()
 5579     base = int(base)
 5580     count = int(count)
 5581     symbols = symbols.split('|')
 5582     symbol_table = {}
 5583 
 5584     while count:
 5585         count -= 1
 5586         base_n_count = encode_base_n(count, base)
 5587         symbol_table[base_n_count] = symbols[count] or base_n_count
 5588 
 5589     return re.sub(
 5590         r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
 5591         obfuscated_code)
 5592 
 5593 
 5594 def caesar(s, alphabet, shift):
 5595     if shift == 0:
 5596         return s
 5597     l = len(alphabet)
 5598     return ''.join(
 5599         alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
 5600         for c in s)
 5601 
 5602 
 5603 def rot47(s):
 5604     return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
 5605 
 5606 
 5607 def parse_m3u8_attributes(attrib):
 5608     info = {}
 5609     for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
 5610         if val.startswith('"'):
 5611             val = val[1:-1]
 5612         info[key] = val
 5613     return info
 5614 
 5615 
 5616 def urshift(val, n):
 5617     return val >> n if val >= 0 else (val + 0x100000000) >> n
 5618 
 5619 
 5620 # Based on png2str() written by @gdkchan and improved by @yokrysty
 5621 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
 5622 def decode_png(png_data):
 5623     # Reference: https://www.w3.org/TR/PNG/
 5624     header = png_data[8:]
 5625 
 5626     if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
 5627         raise IOError('Not a valid PNG file.')
 5628 
 5629     int_map = {1: '>B', 2: '>H', 4: '>I'}
 5630     unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
 5631 
 5632     chunks = []
 5633 
 5634     while header:
 5635         length = unpack_integer(header[:4])
 5636         header = header[4:]
 5637 
 5638         chunk_type = header[:4]
 5639         header = header[4:]
 5640 
 5641         chunk_data = header[:length]
 5642         header = header[length:]
 5643 
 5644         header = header[4:]  # Skip CRC
 5645 
 5646         chunks.append({
 5647             'type': chunk_type,
 5648             'length': length,
 5649             'data': chunk_data
 5650         })
 5651 
 5652     ihdr = chunks[0]['data']
 5653 
 5654     width = unpack_integer(ihdr[:4])
 5655     height = unpack_integer(ihdr[4:8])
 5656 
 5657     idat = b''
 5658 
 5659     for chunk in chunks:
 5660         if chunk['type'] == b'IDAT':
 5661             idat += chunk['data']
 5662 
 5663     if not idat:
 5664         raise IOError('Unable to read PNG data.')
 5665 
 5666     decompressed_data = bytearray(zlib.decompress(idat))
 5667 
 5668     stride = width * 3
 5669     pixels = []
 5670 
 5671     def _get_pixel(idx):
 5672         x = idx % stride
 5673         y = idx // stride
 5674         return pixels[y][x]
 5675 
 5676     for y in range(height):
 5677         basePos = y * (1 + stride)
 5678         filter_type = decompressed_data[basePos]
 5679 
 5680         current_row = []
 5681 
 5682         pixels.append(current_row)
 5683 
 5684         for x in range(stride):
 5685             color = decompressed_data[1 + basePos + x]
 5686             basex = y * stride + x
 5687             left = 0
 5688             up = 0
 5689 
 5690             if x > 2:
 5691                 left = _get_pixel(basex - 3)
 5692             if y > 0:
 5693                 up = _get_pixel(basex - stride)
 5694 
 5695             if filter_type == 1:  # Sub
 5696                 color = (color + left) & 0xff
 5697             elif filter_type == 2:  # Up
 5698                 color = (color + up) & 0xff
 5699             elif filter_type == 3:  # Average
 5700                 color = (color + ((left + up) >> 1)) & 0xff
 5701             elif filter_type == 4:  # Paeth
 5702                 a = left
 5703                 b = up
 5704                 c = 0
 5705 
 5706                 if x > 2 and y > 0:
 5707                     c = _get_pixel(basex - stride - 3)
 5708 
 5709                 p = a + b - c
 5710 
 5711                 pa = abs(p - a)
 5712                 pb = abs(p - b)
 5713                 pc = abs(p - c)
 5714 
 5715                 if pa <= pb and pa <= pc:
 5716                     color = (color + a) & 0xff
 5717                 elif pb <= pc:
 5718                     color = (color + b) & 0xff
 5719                 else:
 5720                     color = (color + c) & 0xff
 5721 
 5722             current_row.append(color)
 5723 
 5724     return width, height, pixels
 5725 
 5726 
 5727 def write_xattr(path, key, value):
 5728     # This mess below finds the best xattr tool for the job
 5729     try:
 5730         # try the pyxattr module...
 5731         import xattr
 5732 
 5733         if hasattr(xattr, 'set'):  # pyxattr
 5734             # Unicode arguments are not supported in python-pyxattr until
 5735             # version 0.5.0
 5736             # See https://github.com/ytdl-org/youtube-dl/issues/5498
 5737             pyxattr_required_version = '0.5.0'
 5738             if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
 5739                 # TODO: fallback to CLI tools
 5740                 raise XAttrUnavailableError(
 5741                     'python-pyxattr is detected but is too old. '
 5742                     'youtube-dl requires %s or above while your version is %s. '
 5743                     'Falling back to other xattr implementations' % (
 5744                         pyxattr_required_version, xattr.__version__))
 5745 
 5746             setxattr = xattr.set
 5747         else:  # xattr
 5748             setxattr = xattr.setxattr
 5749 
 5750         try:
 5751             setxattr(path, key, value)
 5752         except EnvironmentError as e:
 5753             raise XAttrMetadataError(e.errno, e.strerror)
 5754 
 5755     except ImportError:
 5756         if compat_os_name == 'nt':
 5757             # Write xattrs to NTFS Alternate Data Streams:
 5758             # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
 5759             assert ':' not in key
 5760             assert os.path.exists(path)
 5761 
 5762             ads_fn = path + ':' + key
 5763             try:
 5764                 with open(ads_fn, 'wb') as f:
 5765                     f.write(value)
 5766             except EnvironmentError as e:
 5767                 raise XAttrMetadataError(e.errno, e.strerror)
 5768         else:
 5769             user_has_setfattr = check_executable('setfattr', ['--version'])
 5770             user_has_xattr = check_executable('xattr', ['-h'])
 5771 
 5772             if user_has_setfattr or user_has_xattr:
 5773 
 5774                 value = value.decode('utf-8')
 5775                 if user_has_setfattr:
 5776                     executable = 'setfattr'
 5777                     opts = ['-n', key, '-v', value]
 5778                 elif user_has_xattr:
 5779                     executable = 'xattr'
 5780                     opts = ['-w', key, value]
 5781 
 5782                 cmd = ([encodeFilename(executable, True)]
 5783                        + [encodeArgument(o) for o in opts]
 5784                        + [encodeFilename(path, True)])
 5785 
 5786                 try:
 5787                     p = subprocess.Popen(
 5788                         cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
 5789                 except EnvironmentError as e:
 5790                     raise XAttrMetadataError(e.errno, e.strerror)
 5791                 stdout, stderr = process_communicate_or_kill(p)
 5792                 stderr = stderr.decode('utf-8', 'replace')
 5793                 if p.returncode != 0:
 5794                     raise XAttrMetadataError(p.returncode, stderr)
 5795 
 5796             else:
 5797                 # On Unix, and can't find pyxattr, setfattr, or xattr.
 5798                 if sys.platform.startswith('linux'):
 5799                     raise XAttrUnavailableError(
 5800                         "Couldn't find a tool to set the xattrs. "
 5801                         "Install either the python 'pyxattr' or 'xattr' "
 5802                         "modules, or the GNU 'attr' package "
 5803                         "(which contains the 'setfattr' tool).")
 5804                 else:
 5805                     raise XAttrUnavailableError(
 5806                         "Couldn't find a tool to set the xattrs. "
 5807                         "Install either the python 'xattr' module, "
 5808                         "or the 'xattr' binary.")
 5809 
 5810 
 5811 def random_birthday(year_field, month_field, day_field):
 5812     start_date = datetime.date(1950, 1, 1)
 5813     end_date = datetime.date(1995, 12, 31)
 5814     offset = random.randint(0, (end_date - start_date).days)
 5815     random_date = start_date + datetime.timedelta(offset)
 5816     return {
 5817         year_field: str(random_date.year),
 5818         month_field: str(random_date.month),
 5819         day_field: str(random_date.day),
 5820     }
 5821 
 5822 
 5823 def clean_podcast_url(url):
 5824     return re.sub(r'''(?x)
 5825         (?:
 5826             (?:
 5827                 chtbl\.com/track|
 5828                 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
 5829                 play\.podtrac\.com
 5830             )/[^/]+|
 5831             (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
 5832             flex\.acast\.com|
 5833             pd(?:
 5834                 cn\.co| # https://podcorn.com/analytics-prefix/
 5835                 st\.fm # https://podsights.com/docs/
 5836             )/e
 5837         )/''', '', url)

Generated by cgit