1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import unicodedata
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_cookiejar,
47 compat_ctypes_WINFUNCTYPE,
48 compat_etree_fromstring,
49 compat_expanduser,
50 compat_html_entities,
51 compat_html_entities_html5,
52 compat_http_client,
53 compat_integer_types,
54 compat_kwargs,
55 compat_os_name,
56 compat_parse_qs,
57 compat_shlex_quote,
58 compat_str,
59 compat_struct_pack,
60 compat_struct_unpack,
61 compat_urllib_error,
62 compat_urllib_parse,
63 compat_urllib_parse_urlencode,
64 compat_urllib_parse_urlparse,
65 compat_urllib_parse_unquote_plus,
66 compat_urllib_request,
67 compat_urlparse,
68 compat_xpath,
69 )
70
71 from .socks import (
72 ProxyType,
73 sockssocket,
74 )
75
76
77 def register_socks_protocols():
78 # "Register" SOCKS protocols
79 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
80 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
81 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
82 if scheme not in compat_urlparse.uses_netloc:
83 compat_urlparse.uses_netloc.append(scheme)
84
85
86 # This is not clearly defined otherwise
87 compiled_regex_type = type(re.compile(''))
88
89
90 def random_user_agent():
91 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
92 _CHROME_VERSIONS = (
93 '74.0.3729.129',
94 '76.0.3780.3',
95 '76.0.3780.2',
96 '74.0.3729.128',
97 '76.0.3780.1',
98 '76.0.3780.0',
99 '75.0.3770.15',
100 '74.0.3729.127',
101 '74.0.3729.126',
102 '76.0.3779.1',
103 '76.0.3779.0',
104 '75.0.3770.14',
105 '74.0.3729.125',
106 '76.0.3778.1',
107 '76.0.3778.0',
108 '75.0.3770.13',
109 '74.0.3729.124',
110 '74.0.3729.123',
111 '73.0.3683.121',
112 '76.0.3777.1',
113 '76.0.3777.0',
114 '75.0.3770.12',
115 '74.0.3729.122',
116 '76.0.3776.4',
117 '75.0.3770.11',
118 '74.0.3729.121',
119 '76.0.3776.3',
120 '76.0.3776.2',
121 '73.0.3683.120',
122 '74.0.3729.120',
123 '74.0.3729.119',
124 '74.0.3729.118',
125 '76.0.3776.1',
126 '76.0.3776.0',
127 '76.0.3775.5',
128 '75.0.3770.10',
129 '74.0.3729.117',
130 '76.0.3775.4',
131 '76.0.3775.3',
132 '74.0.3729.116',
133 '75.0.3770.9',
134 '76.0.3775.2',
135 '76.0.3775.1',
136 '76.0.3775.0',
137 '75.0.3770.8',
138 '74.0.3729.115',
139 '74.0.3729.114',
140 '76.0.3774.1',
141 '76.0.3774.0',
142 '75.0.3770.7',
143 '74.0.3729.113',
144 '74.0.3729.112',
145 '74.0.3729.111',
146 '76.0.3773.1',
147 '76.0.3773.0',
148 '75.0.3770.6',
149 '74.0.3729.110',
150 '74.0.3729.109',
151 '76.0.3772.1',
152 '76.0.3772.0',
153 '75.0.3770.5',
154 '74.0.3729.108',
155 '74.0.3729.107',
156 '76.0.3771.1',
157 '76.0.3771.0',
158 '75.0.3770.4',
159 '74.0.3729.106',
160 '74.0.3729.105',
161 '75.0.3770.3',
162 '74.0.3729.104',
163 '74.0.3729.103',
164 '74.0.3729.102',
165 '75.0.3770.2',
166 '74.0.3729.101',
167 '75.0.3770.1',
168 '75.0.3770.0',
169 '74.0.3729.100',
170 '75.0.3769.5',
171 '75.0.3769.4',
172 '74.0.3729.99',
173 '75.0.3769.3',
174 '75.0.3769.2',
175 '75.0.3768.6',
176 '74.0.3729.98',
177 '75.0.3769.1',
178 '75.0.3769.0',
179 '74.0.3729.97',
180 '73.0.3683.119',
181 '73.0.3683.118',
182 '74.0.3729.96',
183 '75.0.3768.5',
184 '75.0.3768.4',
185 '75.0.3768.3',
186 '75.0.3768.2',
187 '74.0.3729.95',
188 '74.0.3729.94',
189 '75.0.3768.1',
190 '75.0.3768.0',
191 '74.0.3729.93',
192 '74.0.3729.92',
193 '73.0.3683.117',
194 '74.0.3729.91',
195 '75.0.3766.3',
196 '74.0.3729.90',
197 '75.0.3767.2',
198 '75.0.3767.1',
199 '75.0.3767.0',
200 '74.0.3729.89',
201 '73.0.3683.116',
202 '75.0.3766.2',
203 '74.0.3729.88',
204 '75.0.3766.1',
205 '75.0.3766.0',
206 '74.0.3729.87',
207 '73.0.3683.115',
208 '74.0.3729.86',
209 '75.0.3765.1',
210 '75.0.3765.0',
211 '74.0.3729.85',
212 '73.0.3683.114',
213 '74.0.3729.84',
214 '75.0.3764.1',
215 '75.0.3764.0',
216 '74.0.3729.83',
217 '73.0.3683.113',
218 '75.0.3763.2',
219 '75.0.3761.4',
220 '74.0.3729.82',
221 '75.0.3763.1',
222 '75.0.3763.0',
223 '74.0.3729.81',
224 '73.0.3683.112',
225 '75.0.3762.1',
226 '75.0.3762.0',
227 '74.0.3729.80',
228 '75.0.3761.3',
229 '74.0.3729.79',
230 '73.0.3683.111',
231 '75.0.3761.2',
232 '74.0.3729.78',
233 '74.0.3729.77',
234 '75.0.3761.1',
235 '75.0.3761.0',
236 '73.0.3683.110',
237 '74.0.3729.76',
238 '74.0.3729.75',
239 '75.0.3760.0',
240 '74.0.3729.74',
241 '75.0.3759.8',
242 '75.0.3759.7',
243 '75.0.3759.6',
244 '74.0.3729.73',
245 '75.0.3759.5',
246 '74.0.3729.72',
247 '73.0.3683.109',
248 '75.0.3759.4',
249 '75.0.3759.3',
250 '74.0.3729.71',
251 '75.0.3759.2',
252 '74.0.3729.70',
253 '73.0.3683.108',
254 '74.0.3729.69',
255 '75.0.3759.1',
256 '75.0.3759.0',
257 '74.0.3729.68',
258 '73.0.3683.107',
259 '74.0.3729.67',
260 '75.0.3758.1',
261 '75.0.3758.0',
262 '74.0.3729.66',
263 '73.0.3683.106',
264 '74.0.3729.65',
265 '75.0.3757.1',
266 '75.0.3757.0',
267 '74.0.3729.64',
268 '73.0.3683.105',
269 '74.0.3729.63',
270 '75.0.3756.1',
271 '75.0.3756.0',
272 '74.0.3729.62',
273 '73.0.3683.104',
274 '75.0.3755.3',
275 '75.0.3755.2',
276 '73.0.3683.103',
277 '75.0.3755.1',
278 '75.0.3755.0',
279 '74.0.3729.61',
280 '73.0.3683.102',
281 '74.0.3729.60',
282 '75.0.3754.2',
283 '74.0.3729.59',
284 '75.0.3753.4',
285 '74.0.3729.58',
286 '75.0.3754.1',
287 '75.0.3754.0',
288 '74.0.3729.57',
289 '73.0.3683.101',
290 '75.0.3753.3',
291 '75.0.3752.2',
292 '75.0.3753.2',
293 '74.0.3729.56',
294 '75.0.3753.1',
295 '75.0.3753.0',
296 '74.0.3729.55',
297 '73.0.3683.100',
298 '74.0.3729.54',
299 '75.0.3752.1',
300 '75.0.3752.0',
301 '74.0.3729.53',
302 '73.0.3683.99',
303 '74.0.3729.52',
304 '75.0.3751.1',
305 '75.0.3751.0',
306 '74.0.3729.51',
307 '73.0.3683.98',
308 '74.0.3729.50',
309 '75.0.3750.0',
310 '74.0.3729.49',
311 '74.0.3729.48',
312 '74.0.3729.47',
313 '75.0.3749.3',
314 '74.0.3729.46',
315 '73.0.3683.97',
316 '75.0.3749.2',
317 '74.0.3729.45',
318 '75.0.3749.1',
319 '75.0.3749.0',
320 '74.0.3729.44',
321 '73.0.3683.96',
322 '74.0.3729.43',
323 '74.0.3729.42',
324 '75.0.3748.1',
325 '75.0.3748.0',
326 '74.0.3729.41',
327 '75.0.3747.1',
328 '73.0.3683.95',
329 '75.0.3746.4',
330 '74.0.3729.40',
331 '74.0.3729.39',
332 '75.0.3747.0',
333 '75.0.3746.3',
334 '75.0.3746.2',
335 '74.0.3729.38',
336 '75.0.3746.1',
337 '75.0.3746.0',
338 '74.0.3729.37',
339 '73.0.3683.94',
340 '75.0.3745.5',
341 '75.0.3745.4',
342 '75.0.3745.3',
343 '75.0.3745.2',
344 '74.0.3729.36',
345 '75.0.3745.1',
346 '75.0.3745.0',
347 '75.0.3744.2',
348 '74.0.3729.35',
349 '73.0.3683.93',
350 '74.0.3729.34',
351 '75.0.3744.1',
352 '75.0.3744.0',
353 '74.0.3729.33',
354 '73.0.3683.92',
355 '74.0.3729.32',
356 '74.0.3729.31',
357 '73.0.3683.91',
358 '75.0.3741.2',
359 '75.0.3740.5',
360 '74.0.3729.30',
361 '75.0.3741.1',
362 '75.0.3741.0',
363 '74.0.3729.29',
364 '75.0.3740.4',
365 '73.0.3683.90',
366 '74.0.3729.28',
367 '75.0.3740.3',
368 '73.0.3683.89',
369 '75.0.3740.2',
370 '74.0.3729.27',
371 '75.0.3740.1',
372 '75.0.3740.0',
373 '74.0.3729.26',
374 '73.0.3683.88',
375 '73.0.3683.87',
376 '74.0.3729.25',
377 '75.0.3739.1',
378 '75.0.3739.0',
379 '73.0.3683.86',
380 '74.0.3729.24',
381 '73.0.3683.85',
382 '75.0.3738.4',
383 '75.0.3738.3',
384 '75.0.3738.2',
385 '75.0.3738.1',
386 '75.0.3738.0',
387 '74.0.3729.23',
388 '73.0.3683.84',
389 '74.0.3729.22',
390 '74.0.3729.21',
391 '75.0.3737.1',
392 '75.0.3737.0',
393 '74.0.3729.20',
394 '73.0.3683.83',
395 '74.0.3729.19',
396 '75.0.3736.1',
397 '75.0.3736.0',
398 '74.0.3729.18',
399 '73.0.3683.82',
400 '74.0.3729.17',
401 '75.0.3735.1',
402 '75.0.3735.0',
403 '74.0.3729.16',
404 '73.0.3683.81',
405 '75.0.3734.1',
406 '75.0.3734.0',
407 '74.0.3729.15',
408 '73.0.3683.80',
409 '74.0.3729.14',
410 '75.0.3733.1',
411 '75.0.3733.0',
412 '75.0.3732.1',
413 '74.0.3729.13',
414 '74.0.3729.12',
415 '73.0.3683.79',
416 '74.0.3729.11',
417 '75.0.3732.0',
418 '74.0.3729.10',
419 '73.0.3683.78',
420 '74.0.3729.9',
421 '74.0.3729.8',
422 '74.0.3729.7',
423 '75.0.3731.3',
424 '75.0.3731.2',
425 '75.0.3731.0',
426 '74.0.3729.6',
427 '73.0.3683.77',
428 '73.0.3683.76',
429 '75.0.3730.5',
430 '75.0.3730.4',
431 '73.0.3683.75',
432 '74.0.3729.5',
433 '73.0.3683.74',
434 '75.0.3730.3',
435 '75.0.3730.2',
436 '74.0.3729.4',
437 '73.0.3683.73',
438 '73.0.3683.72',
439 '75.0.3730.1',
440 '75.0.3730.0',
441 '74.0.3729.3',
442 '73.0.3683.71',
443 '74.0.3729.2',
444 '73.0.3683.70',
445 '74.0.3729.1',
446 '74.0.3729.0',
447 '74.0.3726.4',
448 '73.0.3683.69',
449 '74.0.3726.3',
450 '74.0.3728.0',
451 '74.0.3726.2',
452 '73.0.3683.68',
453 '74.0.3726.1',
454 '74.0.3726.0',
455 '74.0.3725.4',
456 '73.0.3683.67',
457 '73.0.3683.66',
458 '74.0.3725.3',
459 '74.0.3725.2',
460 '74.0.3725.1',
461 '74.0.3724.8',
462 '74.0.3725.0',
463 '73.0.3683.65',
464 '74.0.3724.7',
465 '74.0.3724.6',
466 '74.0.3724.5',
467 '74.0.3724.4',
468 '74.0.3724.3',
469 '74.0.3724.2',
470 '74.0.3724.1',
471 '74.0.3724.0',
472 '73.0.3683.64',
473 '74.0.3723.1',
474 '74.0.3723.0',
475 '73.0.3683.63',
476 '74.0.3722.1',
477 '74.0.3722.0',
478 '73.0.3683.62',
479 '74.0.3718.9',
480 '74.0.3702.3',
481 '74.0.3721.3',
482 '74.0.3721.2',
483 '74.0.3721.1',
484 '74.0.3721.0',
485 '74.0.3720.6',
486 '73.0.3683.61',
487 '72.0.3626.122',
488 '73.0.3683.60',
489 '74.0.3720.5',
490 '72.0.3626.121',
491 '74.0.3718.8',
492 '74.0.3720.4',
493 '74.0.3720.3',
494 '74.0.3718.7',
495 '74.0.3720.2',
496 '74.0.3720.1',
497 '74.0.3720.0',
498 '74.0.3718.6',
499 '74.0.3719.5',
500 '73.0.3683.59',
501 '74.0.3718.5',
502 '74.0.3718.4',
503 '74.0.3719.4',
504 '74.0.3719.3',
505 '74.0.3719.2',
506 '74.0.3719.1',
507 '73.0.3683.58',
508 '74.0.3719.0',
509 '73.0.3683.57',
510 '73.0.3683.56',
511 '74.0.3718.3',
512 '73.0.3683.55',
513 '74.0.3718.2',
514 '74.0.3718.1',
515 '74.0.3718.0',
516 '73.0.3683.54',
517 '74.0.3717.2',
518 '73.0.3683.53',
519 '74.0.3717.1',
520 '74.0.3717.0',
521 '73.0.3683.52',
522 '74.0.3716.1',
523 '74.0.3716.0',
524 '73.0.3683.51',
525 '74.0.3715.1',
526 '74.0.3715.0',
527 '73.0.3683.50',
528 '74.0.3711.2',
529 '74.0.3714.2',
530 '74.0.3713.3',
531 '74.0.3714.1',
532 '74.0.3714.0',
533 '73.0.3683.49',
534 '74.0.3713.1',
535 '74.0.3713.0',
536 '72.0.3626.120',
537 '73.0.3683.48',
538 '74.0.3712.2',
539 '74.0.3712.1',
540 '74.0.3712.0',
541 '73.0.3683.47',
542 '72.0.3626.119',
543 '73.0.3683.46',
544 '74.0.3710.2',
545 '72.0.3626.118',
546 '74.0.3711.1',
547 '74.0.3711.0',
548 '73.0.3683.45',
549 '72.0.3626.117',
550 '74.0.3710.1',
551 '74.0.3710.0',
552 '73.0.3683.44',
553 '72.0.3626.116',
554 '74.0.3709.1',
555 '74.0.3709.0',
556 '74.0.3704.9',
557 '73.0.3683.43',
558 '72.0.3626.115',
559 '74.0.3704.8',
560 '74.0.3704.7',
561 '74.0.3708.0',
562 '74.0.3706.7',
563 '74.0.3704.6',
564 '73.0.3683.42',
565 '72.0.3626.114',
566 '74.0.3706.6',
567 '72.0.3626.113',
568 '74.0.3704.5',
569 '74.0.3706.5',
570 '74.0.3706.4',
571 '74.0.3706.3',
572 '74.0.3706.2',
573 '74.0.3706.1',
574 '74.0.3706.0',
575 '73.0.3683.41',
576 '72.0.3626.112',
577 '74.0.3705.1',
578 '74.0.3705.0',
579 '73.0.3683.40',
580 '72.0.3626.111',
581 '73.0.3683.39',
582 '74.0.3704.4',
583 '73.0.3683.38',
584 '74.0.3704.3',
585 '74.0.3704.2',
586 '74.0.3704.1',
587 '74.0.3704.0',
588 '73.0.3683.37',
589 '72.0.3626.110',
590 '72.0.3626.109',
591 '74.0.3703.3',
592 '74.0.3703.2',
593 '73.0.3683.36',
594 '74.0.3703.1',
595 '74.0.3703.0',
596 '73.0.3683.35',
597 '72.0.3626.108',
598 '74.0.3702.2',
599 '74.0.3699.3',
600 '74.0.3702.1',
601 '74.0.3702.0',
602 '73.0.3683.34',
603 '72.0.3626.107',
604 '73.0.3683.33',
605 '74.0.3701.1',
606 '74.0.3701.0',
607 '73.0.3683.32',
608 '73.0.3683.31',
609 '72.0.3626.105',
610 '74.0.3700.1',
611 '74.0.3700.0',
612 '73.0.3683.29',
613 '72.0.3626.103',
614 '74.0.3699.2',
615 '74.0.3699.1',
616 '74.0.3699.0',
617 '73.0.3683.28',
618 '72.0.3626.102',
619 '73.0.3683.27',
620 '73.0.3683.26',
621 '74.0.3698.0',
622 '74.0.3696.2',
623 '72.0.3626.101',
624 '73.0.3683.25',
625 '74.0.3696.1',
626 '74.0.3696.0',
627 '74.0.3694.8',
628 '72.0.3626.100',
629 '74.0.3694.7',
630 '74.0.3694.6',
631 '74.0.3694.5',
632 '74.0.3694.4',
633 '72.0.3626.99',
634 '72.0.3626.98',
635 '74.0.3694.3',
636 '73.0.3683.24',
637 '72.0.3626.97',
638 '72.0.3626.96',
639 '72.0.3626.95',
640 '73.0.3683.23',
641 '72.0.3626.94',
642 '73.0.3683.22',
643 '73.0.3683.21',
644 '72.0.3626.93',
645 '74.0.3694.2',
646 '72.0.3626.92',
647 '74.0.3694.1',
648 '74.0.3694.0',
649 '74.0.3693.6',
650 '73.0.3683.20',
651 '72.0.3626.91',
652 '74.0.3693.5',
653 '74.0.3693.4',
654 '74.0.3693.3',
655 '74.0.3693.2',
656 '73.0.3683.19',
657 '74.0.3693.1',
658 '74.0.3693.0',
659 '73.0.3683.18',
660 '72.0.3626.90',
661 '74.0.3692.1',
662 '74.0.3692.0',
663 '73.0.3683.17',
664 '72.0.3626.89',
665 '74.0.3687.3',
666 '74.0.3691.1',
667 '74.0.3691.0',
668 '73.0.3683.16',
669 '72.0.3626.88',
670 '72.0.3626.87',
671 '73.0.3683.15',
672 '74.0.3690.1',
673 '74.0.3690.0',
674 '73.0.3683.14',
675 '72.0.3626.86',
676 '73.0.3683.13',
677 '73.0.3683.12',
678 '74.0.3689.1',
679 '74.0.3689.0',
680 '73.0.3683.11',
681 '72.0.3626.85',
682 '73.0.3683.10',
683 '72.0.3626.84',
684 '73.0.3683.9',
685 '74.0.3688.1',
686 '74.0.3688.0',
687 '73.0.3683.8',
688 '72.0.3626.83',
689 '74.0.3687.2',
690 '74.0.3687.1',
691 '74.0.3687.0',
692 '73.0.3683.7',
693 '72.0.3626.82',
694 '74.0.3686.4',
695 '72.0.3626.81',
696 '74.0.3686.3',
697 '74.0.3686.2',
698 '74.0.3686.1',
699 '74.0.3686.0',
700 '73.0.3683.6',
701 '72.0.3626.80',
702 '74.0.3685.1',
703 '74.0.3685.0',
704 '73.0.3683.5',
705 '72.0.3626.79',
706 '74.0.3684.1',
707 '74.0.3684.0',
708 '73.0.3683.4',
709 '72.0.3626.78',
710 '72.0.3626.77',
711 '73.0.3683.3',
712 '73.0.3683.2',
713 '72.0.3626.76',
714 '73.0.3683.1',
715 '73.0.3683.0',
716 '72.0.3626.75',
717 '71.0.3578.141',
718 '73.0.3682.1',
719 '73.0.3682.0',
720 '72.0.3626.74',
721 '71.0.3578.140',
722 '73.0.3681.4',
723 '73.0.3681.3',
724 '73.0.3681.2',
725 '73.0.3681.1',
726 '73.0.3681.0',
727 '72.0.3626.73',
728 '71.0.3578.139',
729 '72.0.3626.72',
730 '72.0.3626.71',
731 '73.0.3680.1',
732 '73.0.3680.0',
733 '72.0.3626.70',
734 '71.0.3578.138',
735 '73.0.3678.2',
736 '73.0.3679.1',
737 '73.0.3679.0',
738 '72.0.3626.69',
739 '71.0.3578.137',
740 '73.0.3678.1',
741 '73.0.3678.0',
742 '71.0.3578.136',
743 '73.0.3677.1',
744 '73.0.3677.0',
745 '72.0.3626.68',
746 '72.0.3626.67',
747 '71.0.3578.135',
748 '73.0.3676.1',
749 '73.0.3676.0',
750 '73.0.3674.2',
751 '72.0.3626.66',
752 '71.0.3578.134',
753 '73.0.3674.1',
754 '73.0.3674.0',
755 '72.0.3626.65',
756 '71.0.3578.133',
757 '73.0.3673.2',
758 '73.0.3673.1',
759 '73.0.3673.0',
760 '72.0.3626.64',
761 '71.0.3578.132',
762 '72.0.3626.63',
763 '72.0.3626.62',
764 '72.0.3626.61',
765 '72.0.3626.60',
766 '73.0.3672.1',
767 '73.0.3672.0',
768 '72.0.3626.59',
769 '71.0.3578.131',
770 '73.0.3671.3',
771 '73.0.3671.2',
772 '73.0.3671.1',
773 '73.0.3671.0',
774 '72.0.3626.58',
775 '71.0.3578.130',
776 '73.0.3670.1',
777 '73.0.3670.0',
778 '72.0.3626.57',
779 '71.0.3578.129',
780 '73.0.3669.1',
781 '73.0.3669.0',
782 '72.0.3626.56',
783 '71.0.3578.128',
784 '73.0.3668.2',
785 '73.0.3668.1',
786 '73.0.3668.0',
787 '72.0.3626.55',
788 '71.0.3578.127',
789 '73.0.3667.2',
790 '73.0.3667.1',
791 '73.0.3667.0',
792 '72.0.3626.54',
793 '71.0.3578.126',
794 '73.0.3666.1',
795 '73.0.3666.0',
796 '72.0.3626.53',
797 '71.0.3578.125',
798 '73.0.3665.4',
799 '73.0.3665.3',
800 '72.0.3626.52',
801 '73.0.3665.2',
802 '73.0.3664.4',
803 '73.0.3665.1',
804 '73.0.3665.0',
805 '72.0.3626.51',
806 '71.0.3578.124',
807 '72.0.3626.50',
808 '73.0.3664.3',
809 '73.0.3664.2',
810 '73.0.3664.1',
811 '73.0.3664.0',
812 '73.0.3663.2',
813 '72.0.3626.49',
814 '71.0.3578.123',
815 '73.0.3663.1',
816 '73.0.3663.0',
817 '72.0.3626.48',
818 '71.0.3578.122',
819 '73.0.3662.1',
820 '73.0.3662.0',
821 '72.0.3626.47',
822 '71.0.3578.121',
823 '73.0.3661.1',
824 '72.0.3626.46',
825 '73.0.3661.0',
826 '72.0.3626.45',
827 '71.0.3578.120',
828 '73.0.3660.2',
829 '73.0.3660.1',
830 '73.0.3660.0',
831 '72.0.3626.44',
832 '71.0.3578.119',
833 '73.0.3659.1',
834 '73.0.3659.0',
835 '72.0.3626.43',
836 '71.0.3578.118',
837 '73.0.3658.1',
838 '73.0.3658.0',
839 '72.0.3626.42',
840 '71.0.3578.117',
841 '73.0.3657.1',
842 '73.0.3657.0',
843 '72.0.3626.41',
844 '71.0.3578.116',
845 '73.0.3656.1',
846 '73.0.3656.0',
847 '72.0.3626.40',
848 '71.0.3578.115',
849 '73.0.3655.1',
850 '73.0.3655.0',
851 '72.0.3626.39',
852 '71.0.3578.114',
853 '73.0.3654.1',
854 '73.0.3654.0',
855 '72.0.3626.38',
856 '71.0.3578.113',
857 '73.0.3653.1',
858 '73.0.3653.0',
859 '72.0.3626.37',
860 '71.0.3578.112',
861 '73.0.3652.1',
862 '73.0.3652.0',
863 '72.0.3626.36',
864 '71.0.3578.111',
865 '73.0.3651.1',
866 '73.0.3651.0',
867 '72.0.3626.35',
868 '71.0.3578.110',
869 '73.0.3650.1',
870 '73.0.3650.0',
871 '72.0.3626.34',
872 '71.0.3578.109',
873 '73.0.3649.1',
874 '73.0.3649.0',
875 '72.0.3626.33',
876 '71.0.3578.108',
877 '73.0.3648.2',
878 '73.0.3648.1',
879 '73.0.3648.0',
880 '72.0.3626.32',
881 '71.0.3578.107',
882 '73.0.3647.2',
883 '73.0.3647.1',
884 '73.0.3647.0',
885 '72.0.3626.31',
886 '71.0.3578.106',
887 '73.0.3635.3',
888 '73.0.3646.2',
889 '73.0.3646.1',
890 '73.0.3646.0',
891 '72.0.3626.30',
892 '71.0.3578.105',
893 '72.0.3626.29',
894 '73.0.3645.2',
895 '73.0.3645.1',
896 '73.0.3645.0',
897 '72.0.3626.28',
898 '71.0.3578.104',
899 '72.0.3626.27',
900 '72.0.3626.26',
901 '72.0.3626.25',
902 '72.0.3626.24',
903 '73.0.3644.0',
904 '73.0.3643.2',
905 '72.0.3626.23',
906 '71.0.3578.103',
907 '73.0.3643.1',
908 '73.0.3643.0',
909 '72.0.3626.22',
910 '71.0.3578.102',
911 '73.0.3642.1',
912 '73.0.3642.0',
913 '72.0.3626.21',
914 '71.0.3578.101',
915 '73.0.3641.1',
916 '73.0.3641.0',
917 '72.0.3626.20',
918 '71.0.3578.100',
919 '72.0.3626.19',
920 '73.0.3640.1',
921 '73.0.3640.0',
922 '72.0.3626.18',
923 '73.0.3639.1',
924 '71.0.3578.99',
925 '73.0.3639.0',
926 '72.0.3626.17',
927 '73.0.3638.2',
928 '72.0.3626.16',
929 '73.0.3638.1',
930 '73.0.3638.0',
931 '72.0.3626.15',
932 '71.0.3578.98',
933 '73.0.3635.2',
934 '71.0.3578.97',
935 '73.0.3637.1',
936 '73.0.3637.0',
937 '72.0.3626.14',
938 '71.0.3578.96',
939 '71.0.3578.95',
940 '72.0.3626.13',
941 '71.0.3578.94',
942 '73.0.3636.2',
943 '71.0.3578.93',
944 '73.0.3636.1',
945 '73.0.3636.0',
946 '72.0.3626.12',
947 '71.0.3578.92',
948 '73.0.3635.1',
949 '73.0.3635.0',
950 '72.0.3626.11',
951 '71.0.3578.91',
952 '73.0.3634.2',
953 '73.0.3634.1',
954 '73.0.3634.0',
955 '72.0.3626.10',
956 '71.0.3578.90',
957 '71.0.3578.89',
958 '73.0.3633.2',
959 '73.0.3633.1',
960 '73.0.3633.0',
961 '72.0.3610.4',
962 '72.0.3626.9',
963 '71.0.3578.88',
964 '73.0.3632.5',
965 '73.0.3632.4',
966 '73.0.3632.3',
967 '73.0.3632.2',
968 '73.0.3632.1',
969 '73.0.3632.0',
970 '72.0.3626.8',
971 '71.0.3578.87',
972 '73.0.3631.2',
973 '73.0.3631.1',
974 '73.0.3631.0',
975 '72.0.3626.7',
976 '71.0.3578.86',
977 '72.0.3626.6',
978 '73.0.3630.1',
979 '73.0.3630.0',
980 '72.0.3626.5',
981 '71.0.3578.85',
982 '72.0.3626.4',
983 '73.0.3628.3',
984 '73.0.3628.2',
985 '73.0.3629.1',
986 '73.0.3629.0',
987 '72.0.3626.3',
988 '71.0.3578.84',
989 '73.0.3628.1',
990 '73.0.3628.0',
991 '71.0.3578.83',
992 '73.0.3627.1',
993 '73.0.3627.0',
994 '72.0.3626.2',
995 '71.0.3578.82',
996 '71.0.3578.81',
997 '71.0.3578.80',
998 '72.0.3626.1',
999 '72.0.3626.0',
1000 '71.0.3578.79',
1001 '70.0.3538.124',
1002 '71.0.3578.78',
1003 '72.0.3623.4',
1004 '72.0.3625.2',
1005 '72.0.3625.1',
1006 '72.0.3625.0',
1007 '71.0.3578.77',
1008 '70.0.3538.123',
1009 '72.0.3624.4',
1010 '72.0.3624.3',
1011 '72.0.3624.2',
1012 '71.0.3578.76',
1013 '72.0.3624.1',
1014 '72.0.3624.0',
1015 '72.0.3623.3',
1016 '71.0.3578.75',
1017 '70.0.3538.122',
1018 '71.0.3578.74',
1019 '72.0.3623.2',
1020 '72.0.3610.3',
1021 '72.0.3623.1',
1022 '72.0.3623.0',
1023 '72.0.3622.3',
1024 '72.0.3622.2',
1025 '71.0.3578.73',
1026 '70.0.3538.121',
1027 '72.0.3622.1',
1028 '72.0.3622.0',
1029 '71.0.3578.72',
1030 '70.0.3538.120',
1031 '72.0.3621.1',
1032 '72.0.3621.0',
1033 '71.0.3578.71',
1034 '70.0.3538.119',
1035 '72.0.3620.1',
1036 '72.0.3620.0',
1037 '71.0.3578.70',
1038 '70.0.3538.118',
1039 '71.0.3578.69',
1040 '72.0.3619.1',
1041 '72.0.3619.0',
1042 '71.0.3578.68',
1043 '70.0.3538.117',
1044 '71.0.3578.67',
1045 '72.0.3618.1',
1046 '72.0.3618.0',
1047 '71.0.3578.66',
1048 '70.0.3538.116',
1049 '72.0.3617.1',
1050 '72.0.3617.0',
1051 '71.0.3578.65',
1052 '70.0.3538.115',
1053 '72.0.3602.3',
1054 '71.0.3578.64',
1055 '72.0.3616.1',
1056 '72.0.3616.0',
1057 '71.0.3578.63',
1058 '70.0.3538.114',
1059 '71.0.3578.62',
1060 '72.0.3615.1',
1061 '72.0.3615.0',
1062 '71.0.3578.61',
1063 '70.0.3538.113',
1064 '72.0.3614.1',
1065 '72.0.3614.0',
1066 '71.0.3578.60',
1067 '70.0.3538.112',
1068 '72.0.3613.1',
1069 '72.0.3613.0',
1070 '71.0.3578.59',
1071 '70.0.3538.111',
1072 '72.0.3612.2',
1073 '72.0.3612.1',
1074 '72.0.3612.0',
1075 '70.0.3538.110',
1076 '71.0.3578.58',
1077 '70.0.3538.109',
1078 '72.0.3611.2',
1079 '72.0.3611.1',
1080 '72.0.3611.0',
1081 '71.0.3578.57',
1082 '70.0.3538.108',
1083 '72.0.3610.2',
1084 '71.0.3578.56',
1085 '71.0.3578.55',
1086 '72.0.3610.1',
1087 '72.0.3610.0',
1088 '71.0.3578.54',
1089 '70.0.3538.107',
1090 '71.0.3578.53',
1091 '72.0.3609.3',
1092 '71.0.3578.52',
1093 '72.0.3609.2',
1094 '71.0.3578.51',
1095 '72.0.3608.5',
1096 '72.0.3609.1',
1097 '72.0.3609.0',
1098 '71.0.3578.50',
1099 '70.0.3538.106',
1100 '72.0.3608.4',
1101 '72.0.3608.3',
1102 '72.0.3608.2',
1103 '71.0.3578.49',
1104 '72.0.3608.1',
1105 '72.0.3608.0',
1106 '70.0.3538.105',
1107 '71.0.3578.48',
1108 '72.0.3607.1',
1109 '72.0.3607.0',
1110 '71.0.3578.47',
1111 '70.0.3538.104',
1112 '72.0.3606.2',
1113 '72.0.3606.1',
1114 '72.0.3606.0',
1115 '71.0.3578.46',
1116 '70.0.3538.103',
1117 '70.0.3538.102',
1118 '72.0.3605.3',
1119 '72.0.3605.2',
1120 '72.0.3605.1',
1121 '72.0.3605.0',
1122 '71.0.3578.45',
1123 '70.0.3538.101',
1124 '71.0.3578.44',
1125 '71.0.3578.43',
1126 '70.0.3538.100',
1127 '70.0.3538.99',
1128 '71.0.3578.42',
1129 '72.0.3604.1',
1130 '72.0.3604.0',
1131 '71.0.3578.41',
1132 '70.0.3538.98',
1133 '71.0.3578.40',
1134 '72.0.3603.2',
1135 '72.0.3603.1',
1136 '72.0.3603.0',
1137 '71.0.3578.39',
1138 '70.0.3538.97',
1139 '72.0.3602.2',
1140 '71.0.3578.38',
1141 '71.0.3578.37',
1142 '72.0.3602.1',
1143 '72.0.3602.0',
1144 '71.0.3578.36',
1145 '70.0.3538.96',
1146 '72.0.3601.1',
1147 '72.0.3601.0',
1148 '71.0.3578.35',
1149 '70.0.3538.95',
1150 '72.0.3600.1',
1151 '72.0.3600.0',
1152 '71.0.3578.34',
1153 '70.0.3538.94',
1154 '72.0.3599.3',
1155 '72.0.3599.2',
1156 '72.0.3599.1',
1157 '72.0.3599.0',
1158 '71.0.3578.33',
1159 '70.0.3538.93',
1160 '72.0.3598.1',
1161 '72.0.3598.0',
1162 '71.0.3578.32',
1163 '70.0.3538.87',
1164 '72.0.3597.1',
1165 '72.0.3597.0',
1166 '72.0.3596.2',
1167 '71.0.3578.31',
1168 '70.0.3538.86',
1169 '71.0.3578.30',
1170 '71.0.3578.29',
1171 '72.0.3596.1',
1172 '72.0.3596.0',
1173 '71.0.3578.28',
1174 '70.0.3538.85',
1175 '72.0.3595.2',
1176 '72.0.3591.3',
1177 '72.0.3595.1',
1178 '72.0.3595.0',
1179 '71.0.3578.27',
1180 '70.0.3538.84',
1181 '72.0.3594.1',
1182 '72.0.3594.0',
1183 '71.0.3578.26',
1184 '70.0.3538.83',
1185 '72.0.3593.2',
1186 '72.0.3593.1',
1187 '72.0.3593.0',
1188 '71.0.3578.25',
1189 '70.0.3538.82',
1190 '72.0.3589.3',
1191 '72.0.3592.2',
1192 '72.0.3592.1',
1193 '72.0.3592.0',
1194 '71.0.3578.24',
1195 '72.0.3589.2',
1196 '70.0.3538.81',
1197 '70.0.3538.80',
1198 '72.0.3591.2',
1199 '72.0.3591.1',
1200 '72.0.3591.0',
1201 '71.0.3578.23',
1202 '70.0.3538.79',
1203 '71.0.3578.22',
1204 '72.0.3590.1',
1205 '72.0.3590.0',
1206 '71.0.3578.21',
1207 '70.0.3538.78',
1208 '70.0.3538.77',
1209 '72.0.3589.1',
1210 '72.0.3589.0',
1211 '71.0.3578.20',
1212 '70.0.3538.76',
1213 '71.0.3578.19',
1214 '70.0.3538.75',
1215 '72.0.3588.1',
1216 '72.0.3588.0',
1217 '71.0.3578.18',
1218 '70.0.3538.74',
1219 '72.0.3586.2',
1220 '72.0.3587.0',
1221 '71.0.3578.17',
1222 '70.0.3538.73',
1223 '72.0.3586.1',
1224 '72.0.3586.0',
1225 '71.0.3578.16',
1226 '70.0.3538.72',
1227 '72.0.3585.1',
1228 '72.0.3585.0',
1229 '71.0.3578.15',
1230 '70.0.3538.71',
1231 '71.0.3578.14',
1232 '72.0.3584.1',
1233 '72.0.3584.0',
1234 '71.0.3578.13',
1235 '70.0.3538.70',
1236 '72.0.3583.2',
1237 '71.0.3578.12',
1238 '72.0.3583.1',
1239 '72.0.3583.0',
1240 '71.0.3578.11',
1241 '70.0.3538.69',
1242 '71.0.3578.10',
1243 '72.0.3582.0',
1244 '72.0.3581.4',
1245 '71.0.3578.9',
1246 '70.0.3538.67',
1247 '72.0.3581.3',
1248 '72.0.3581.2',
1249 '72.0.3581.1',
1250 '72.0.3581.0',
1251 '71.0.3578.8',
1252 '70.0.3538.66',
1253 '72.0.3580.1',
1254 '72.0.3580.0',
1255 '71.0.3578.7',
1256 '70.0.3538.65',
1257 '71.0.3578.6',
1258 '72.0.3579.1',
1259 '72.0.3579.0',
1260 '71.0.3578.5',
1261 '70.0.3538.64',
1262 '71.0.3578.4',
1263 '71.0.3578.3',
1264 '71.0.3578.2',
1265 '71.0.3578.1',
1266 '71.0.3578.0',
1267 '70.0.3538.63',
1268 '69.0.3497.128',
1269 '70.0.3538.62',
1270 '70.0.3538.61',
1271 '70.0.3538.60',
1272 '70.0.3538.59',
1273 '71.0.3577.1',
1274 '71.0.3577.0',
1275 '70.0.3538.58',
1276 '69.0.3497.127',
1277 '71.0.3576.2',
1278 '71.0.3576.1',
1279 '71.0.3576.0',
1280 '70.0.3538.57',
1281 '70.0.3538.56',
1282 '71.0.3575.2',
1283 '70.0.3538.55',
1284 '69.0.3497.126',
1285 '70.0.3538.54',
1286 '71.0.3575.1',
1287 '71.0.3575.0',
1288 '71.0.3574.1',
1289 '71.0.3574.0',
1290 '70.0.3538.53',
1291 '69.0.3497.125',
1292 '70.0.3538.52',
1293 '71.0.3573.1',
1294 '71.0.3573.0',
1295 '70.0.3538.51',
1296 '69.0.3497.124',
1297 '71.0.3572.1',
1298 '71.0.3572.0',
1299 '70.0.3538.50',
1300 '69.0.3497.123',
1301 '71.0.3571.2',
1302 '70.0.3538.49',
1303 '69.0.3497.122',
1304 '71.0.3571.1',
1305 '71.0.3571.0',
1306 '70.0.3538.48',
1307 '69.0.3497.121',
1308 '71.0.3570.1',
1309 '71.0.3570.0',
1310 '70.0.3538.47',
1311 '69.0.3497.120',
1312 '71.0.3568.2',
1313 '71.0.3569.1',
1314 '71.0.3569.0',
1315 '70.0.3538.46',
1316 '69.0.3497.119',
1317 '70.0.3538.45',
1318 '71.0.3568.1',
1319 '71.0.3568.0',
1320 '70.0.3538.44',
1321 '69.0.3497.118',
1322 '70.0.3538.43',
1323 '70.0.3538.42',
1324 '71.0.3567.1',
1325 '71.0.3567.0',
1326 '70.0.3538.41',
1327 '69.0.3497.117',
1328 '71.0.3566.1',
1329 '71.0.3566.0',
1330 '70.0.3538.40',
1331 '69.0.3497.116',
1332 '71.0.3565.1',
1333 '71.0.3565.0',
1334 '70.0.3538.39',
1335 '69.0.3497.115',
1336 '71.0.3564.1',
1337 '71.0.3564.0',
1338 '70.0.3538.38',
1339 '69.0.3497.114',
1340 '71.0.3563.0',
1341 '71.0.3562.2',
1342 '70.0.3538.37',
1343 '69.0.3497.113',
1344 '70.0.3538.36',
1345 '70.0.3538.35',
1346 '71.0.3562.1',
1347 '71.0.3562.0',
1348 '70.0.3538.34',
1349 '69.0.3497.112',
1350 '70.0.3538.33',
1351 '71.0.3561.1',
1352 '71.0.3561.0',
1353 '70.0.3538.32',
1354 '69.0.3497.111',
1355 '71.0.3559.6',
1356 '71.0.3560.1',
1357 '71.0.3560.0',
1358 '71.0.3559.5',
1359 '71.0.3559.4',
1360 '70.0.3538.31',
1361 '69.0.3497.110',
1362 '71.0.3559.3',
1363 '70.0.3538.30',
1364 '69.0.3497.109',
1365 '71.0.3559.2',
1366 '71.0.3559.1',
1367 '71.0.3559.0',
1368 '70.0.3538.29',
1369 '69.0.3497.108',
1370 '71.0.3558.2',
1371 '71.0.3558.1',
1372 '71.0.3558.0',
1373 '70.0.3538.28',
1374 '69.0.3497.107',
1375 '71.0.3557.2',
1376 '71.0.3557.1',
1377 '71.0.3557.0',
1378 '70.0.3538.27',
1379 '69.0.3497.106',
1380 '71.0.3554.4',
1381 '70.0.3538.26',
1382 '71.0.3556.1',
1383 '71.0.3556.0',
1384 '70.0.3538.25',
1385 '71.0.3554.3',
1386 '69.0.3497.105',
1387 '71.0.3554.2',
1388 '70.0.3538.24',
1389 '69.0.3497.104',
1390 '71.0.3555.2',
1391 '70.0.3538.23',
1392 '71.0.3555.1',
1393 '71.0.3555.0',
1394 '70.0.3538.22',
1395 '69.0.3497.103',
1396 '71.0.3554.1',
1397 '71.0.3554.0',
1398 '70.0.3538.21',
1399 '69.0.3497.102',
1400 '71.0.3553.3',
1401 '70.0.3538.20',
1402 '69.0.3497.101',
1403 '71.0.3553.2',
1404 '69.0.3497.100',
1405 '71.0.3553.1',
1406 '71.0.3553.0',
1407 '70.0.3538.19',
1408 '69.0.3497.99',
1409 '69.0.3497.98',
1410 '69.0.3497.97',
1411 '71.0.3552.6',
1412 '71.0.3552.5',
1413 '71.0.3552.4',
1414 '71.0.3552.3',
1415 '71.0.3552.2',
1416 '71.0.3552.1',
1417 '71.0.3552.0',
1418 '70.0.3538.18',
1419 '69.0.3497.96',
1420 '71.0.3551.3',
1421 '71.0.3551.2',
1422 '71.0.3551.1',
1423 '71.0.3551.0',
1424 '70.0.3538.17',
1425 '69.0.3497.95',
1426 '71.0.3550.3',
1427 '71.0.3550.2',
1428 '71.0.3550.1',
1429 '71.0.3550.0',
1430 '70.0.3538.16',
1431 '69.0.3497.94',
1432 '71.0.3549.1',
1433 '71.0.3549.0',
1434 '70.0.3538.15',
1435 '69.0.3497.93',
1436 '69.0.3497.92',
1437 '71.0.3548.1',
1438 '71.0.3548.0',
1439 '70.0.3538.14',
1440 '69.0.3497.91',
1441 '71.0.3547.1',
1442 '71.0.3547.0',
1443 '70.0.3538.13',
1444 '69.0.3497.90',
1445 '71.0.3546.2',
1446 '69.0.3497.89',
1447 '71.0.3546.1',
1448 '71.0.3546.0',
1449 '70.0.3538.12',
1450 '69.0.3497.88',
1451 '71.0.3545.4',
1452 '71.0.3545.3',
1453 '71.0.3545.2',
1454 '71.0.3545.1',
1455 '71.0.3545.0',
1456 '70.0.3538.11',
1457 '69.0.3497.87',
1458 '71.0.3544.5',
1459 '71.0.3544.4',
1460 '71.0.3544.3',
1461 '71.0.3544.2',
1462 '71.0.3544.1',
1463 '71.0.3544.0',
1464 '69.0.3497.86',
1465 '70.0.3538.10',
1466 '69.0.3497.85',
1467 '70.0.3538.9',
1468 '69.0.3497.84',
1469 '71.0.3543.4',
1470 '70.0.3538.8',
1471 '71.0.3543.3',
1472 '71.0.3543.2',
1473 '71.0.3543.1',
1474 '71.0.3543.0',
1475 '70.0.3538.7',
1476 '69.0.3497.83',
1477 '71.0.3542.2',
1478 '71.0.3542.1',
1479 '71.0.3542.0',
1480 '70.0.3538.6',
1481 '69.0.3497.82',
1482 '69.0.3497.81',
1483 '71.0.3541.1',
1484 '71.0.3541.0',
1485 '70.0.3538.5',
1486 '69.0.3497.80',
1487 '71.0.3540.1',
1488 '71.0.3540.0',
1489 '70.0.3538.4',
1490 '69.0.3497.79',
1491 '70.0.3538.3',
1492 '71.0.3539.1',
1493 '71.0.3539.0',
1494 '69.0.3497.78',
1495 '68.0.3440.134',
1496 '69.0.3497.77',
1497 '70.0.3538.2',
1498 '70.0.3538.1',
1499 '70.0.3538.0',
1500 '69.0.3497.76',
1501 '68.0.3440.133',
1502 '69.0.3497.75',
1503 '70.0.3537.2',
1504 '70.0.3537.1',
1505 '70.0.3537.0',
1506 '69.0.3497.74',
1507 '68.0.3440.132',
1508 '70.0.3536.0',
1509 '70.0.3535.5',
1510 '70.0.3535.4',
1511 '70.0.3535.3',
1512 '69.0.3497.73',
1513 '68.0.3440.131',
1514 '70.0.3532.8',
1515 '70.0.3532.7',
1516 '69.0.3497.72',
1517 '69.0.3497.71',
1518 '70.0.3535.2',
1519 '70.0.3535.1',
1520 '70.0.3535.0',
1521 '69.0.3497.70',
1522 '68.0.3440.130',
1523 '69.0.3497.69',
1524 '68.0.3440.129',
1525 '70.0.3534.4',
1526 '70.0.3534.3',
1527 '70.0.3534.2',
1528 '70.0.3534.1',
1529 '70.0.3534.0',
1530 '69.0.3497.68',
1531 '68.0.3440.128',
1532 '70.0.3533.2',
1533 '70.0.3533.1',
1534 '70.0.3533.0',
1535 '69.0.3497.67',
1536 '68.0.3440.127',
1537 '70.0.3532.6',
1538 '70.0.3532.5',
1539 '70.0.3532.4',
1540 '69.0.3497.66',
1541 '68.0.3440.126',
1542 '70.0.3532.3',
1543 '70.0.3532.2',
1544 '70.0.3532.1',
1545 '69.0.3497.60',
1546 '69.0.3497.65',
1547 '69.0.3497.64',
1548 '70.0.3532.0',
1549 '70.0.3531.0',
1550 '70.0.3530.4',
1551 '70.0.3530.3',
1552 '70.0.3530.2',
1553 '69.0.3497.58',
1554 '68.0.3440.125',
1555 '69.0.3497.57',
1556 '69.0.3497.56',
1557 '69.0.3497.55',
1558 '69.0.3497.54',
1559 '70.0.3530.1',
1560 '70.0.3530.0',
1561 '69.0.3497.53',
1562 '68.0.3440.124',
1563 '69.0.3497.52',
1564 '70.0.3529.3',
1565 '70.0.3529.2',
1566 '70.0.3529.1',
1567 '70.0.3529.0',
1568 '69.0.3497.51',
1569 '70.0.3528.4',
1570 '68.0.3440.123',
1571 '70.0.3528.3',
1572 '70.0.3528.2',
1573 '70.0.3528.1',
1574 '70.0.3528.0',
1575 '69.0.3497.50',
1576 '68.0.3440.122',
1577 '70.0.3527.1',
1578 '70.0.3527.0',
1579 '69.0.3497.49',
1580 '68.0.3440.121',
1581 '70.0.3526.1',
1582 '70.0.3526.0',
1583 '68.0.3440.120',
1584 '69.0.3497.48',
1585 '69.0.3497.47',
1586 '68.0.3440.119',
1587 '68.0.3440.118',
1588 '70.0.3525.5',
1589 '70.0.3525.4',
1590 '70.0.3525.3',
1591 '68.0.3440.117',
1592 '69.0.3497.46',
1593 '70.0.3525.2',
1594 '70.0.3525.1',
1595 '70.0.3525.0',
1596 '69.0.3497.45',
1597 '68.0.3440.116',
1598 '70.0.3524.4',
1599 '70.0.3524.3',
1600 '69.0.3497.44',
1601 '70.0.3524.2',
1602 '70.0.3524.1',
1603 '70.0.3524.0',
1604 '70.0.3523.2',
1605 '69.0.3497.43',
1606 '68.0.3440.115',
1607 '70.0.3505.9',
1608 '69.0.3497.42',
1609 '70.0.3505.8',
1610 '70.0.3523.1',
1611 '70.0.3523.0',
1612 '69.0.3497.41',
1613 '68.0.3440.114',
1614 '70.0.3505.7',
1615 '69.0.3497.40',
1616 '70.0.3522.1',
1617 '70.0.3522.0',
1618 '70.0.3521.2',
1619 '69.0.3497.39',
1620 '68.0.3440.113',
1621 '70.0.3505.6',
1622 '70.0.3521.1',
1623 '70.0.3521.0',
1624 '69.0.3497.38',
1625 '68.0.3440.112',
1626 '70.0.3520.1',
1627 '70.0.3520.0',
1628 '69.0.3497.37',
1629 '68.0.3440.111',
1630 '70.0.3519.3',
1631 '70.0.3519.2',
1632 '70.0.3519.1',
1633 '70.0.3519.0',
1634 '69.0.3497.36',
1635 '68.0.3440.110',
1636 '70.0.3518.1',
1637 '70.0.3518.0',
1638 '69.0.3497.35',
1639 '69.0.3497.34',
1640 '68.0.3440.109',
1641 '70.0.3517.1',
1642 '70.0.3517.0',
1643 '69.0.3497.33',
1644 '68.0.3440.108',
1645 '69.0.3497.32',
1646 '70.0.3516.3',
1647 '70.0.3516.2',
1648 '70.0.3516.1',
1649 '70.0.3516.0',
1650 '69.0.3497.31',
1651 '68.0.3440.107',
1652 '70.0.3515.4',
1653 '68.0.3440.106',
1654 '70.0.3515.3',
1655 '70.0.3515.2',
1656 '70.0.3515.1',
1657 '70.0.3515.0',
1658 '69.0.3497.30',
1659 '68.0.3440.105',
1660 '68.0.3440.104',
1661 '70.0.3514.2',
1662 '70.0.3514.1',
1663 '70.0.3514.0',
1664 '69.0.3497.29',
1665 '68.0.3440.103',
1666 '70.0.3513.1',
1667 '70.0.3513.0',
1668 '69.0.3497.28',
1669 )
1670 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1671
1672
1673 std_headers = {
1674 'User-Agent': random_user_agent(),
1675 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1676 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1677 'Accept-Encoding': 'gzip, deflate',
1678 'Accept-Language': 'en-us,en;q=0.5',
1679 }
1680
1681
1682 USER_AGENTS = {
1683 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1684 }
1685
1686
1687 NO_DEFAULT = object()
1688
1689 ENGLISH_MONTH_NAMES = [
1690 'January', 'February', 'March', 'April', 'May', 'June',
1691 'July', 'August', 'September', 'October', 'November', 'December']
1692
1693 MONTH_NAMES = {
1694 'en': ENGLISH_MONTH_NAMES,
1695 'fr': [
1696 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1697 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1698 }
1699
1700 # Timezone names for RFC2822 obs-zone
1701 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
1702 TIMEZONE_NAMES = {
1703 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
1704 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
1705 'EST': -5, 'EDT': -4, # Eastern
1706 'CST': -6, 'CDT': -5, # Central
1707 'MST': -7, 'MDT': -6, # Mountain
1708 'PST': -8, 'PDT': -7 # Pacific
1709 }
1710
1711 KNOWN_EXTENSIONS = (
1712 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1713 'flv', 'f4v', 'f4a', 'f4b',
1714 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1715 'mkv', 'mka', 'mk3d',
1716 'avi', 'divx',
1717 'mov',
1718 'asf', 'wmv', 'wma',
1719 '3gp', '3g2',
1720 'mp3',
1721 'flac',
1722 'ape',
1723 'wav',
1724 'f4f', 'f4m', 'm3u8', 'smil')
1725
1726 # needed for sanitizing filenames in restricted mode
1727 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1728 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1729 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1730
1731 DATE_FORMATS = (
1732 '%d %B %Y',
1733 '%d %b %Y',
1734 '%B %d %Y',
1735 '%B %dst %Y',
1736 '%B %dnd %Y',
1737 '%B %drd %Y',
1738 '%B %dth %Y',
1739 '%b %d %Y',
1740 '%b %dst %Y',
1741 '%b %dnd %Y',
1742 '%b %drd %Y',
1743 '%b %dth %Y',
1744 '%b %dst %Y %I:%M',
1745 '%b %dnd %Y %I:%M',
1746 '%b %drd %Y %I:%M',
1747 '%b %dth %Y %I:%M',
1748 '%Y %m %d',
1749 '%Y-%m-%d',
1750 '%Y.%m.%d.',
1751 '%Y/%m/%d',
1752 '%Y/%m/%d %H:%M',
1753 '%Y/%m/%d %H:%M:%S',
1754 '%Y%m%d%H%M',
1755 '%Y%m%d%H%M%S',
1756 '%Y%m%d',
1757 '%Y-%m-%d %H:%M',
1758 '%Y-%m-%d %H:%M:%S',
1759 '%Y-%m-%d %H:%M:%S.%f',
1760 '%Y-%m-%d %H:%M:%S:%f',
1761 '%d.%m.%Y %H:%M',
1762 '%d.%m.%Y %H.%M',
1763 '%Y-%m-%dT%H:%M:%SZ',
1764 '%Y-%m-%dT%H:%M:%S.%fZ',
1765 '%Y-%m-%dT%H:%M:%S.%f0Z',
1766 '%Y-%m-%dT%H:%M:%S',
1767 '%Y-%m-%dT%H:%M:%S.%f',
1768 '%Y-%m-%dT%H:%M',
1769 '%b %d %Y at %H:%M',
1770 '%b %d %Y at %H:%M:%S',
1771 '%B %d %Y at %H:%M',
1772 '%B %d %Y at %H:%M:%S',
1773 '%H:%M %d-%b-%Y',
1774 )
1775
1776 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1777 DATE_FORMATS_DAY_FIRST.extend([
1778 '%d-%m-%Y',
1779 '%d.%m.%Y',
1780 '%d.%m.%y',
1781 '%d/%m/%Y',
1782 '%d/%m/%y',
1783 '%d/%m/%Y %H:%M:%S',
1784 '%d-%m-%Y %H:%M',
1785 ])
1786
1787 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1788 DATE_FORMATS_MONTH_FIRST.extend([
1789 '%m-%d-%Y',
1790 '%m.%d.%Y',
1791 '%m/%d/%Y',
1792 '%m/%d/%y',
1793 '%m/%d/%Y %H:%M:%S',
1794 ])
1795
1796 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1797 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1798
1799
1800 def preferredencoding():
1801 """Get preferred encoding.
1802
1803 Returns the best encoding scheme for the system, based on
1804 locale.getpreferredencoding() and some further tweaks.
1805 """
1806 try:
1807 pref = locale.getpreferredencoding()
1808 'TEST'.encode(pref)
1809 except Exception:
1810 pref = 'UTF-8'
1811
1812 return pref
1813
1814
1815 def write_json_file(obj, fn):
1816 """ Encode obj as JSON and write it to fn, atomically if possible """
1817
1818 fn = encodeFilename(fn)
1819 if sys.version_info < (3, 0) and sys.platform != 'win32':
1820 encoding = get_filesystem_encoding()
1821 # os.path.basename returns a bytes object, but NamedTemporaryFile
1822 # will fail if the filename contains non ascii characters unless we
1823 # use a unicode object
1824 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1825 # the same for os.path.dirname
1826 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1827 else:
1828 path_basename = os.path.basename
1829 path_dirname = os.path.dirname
1830
1831 args = {
1832 'suffix': '.tmp',
1833 'prefix': path_basename(fn) + '.',
1834 'dir': path_dirname(fn),
1835 'delete': False,
1836 }
1837
1838 # In Python 2.x, json.dump expects a bytestream.
1839 # In Python 3.x, it writes to a character stream
1840 if sys.version_info < (3, 0):
1841 args['mode'] = 'wb'
1842 else:
1843 args.update({
1844 'mode': 'w',
1845 'encoding': 'utf-8',
1846 })
1847
1848 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1849
1850 try:
1851 with tf:
1852 json.dump(obj, tf)
1853 if sys.platform == 'win32':
1854 # Need to remove existing file on Windows, else os.rename raises
1855 # WindowsError or FileExistsError.
1856 try:
1857 os.unlink(fn)
1858 except OSError:
1859 pass
1860 try:
1861 mask = os.umask(0)
1862 os.umask(mask)
1863 os.chmod(tf.name, 0o666 & ~mask)
1864 except OSError:
1865 pass
1866 os.rename(tf.name, fn)
1867 except Exception:
1868 try:
1869 os.remove(tf.name)
1870 except OSError:
1871 pass
1872 raise
1873
1874
1875 if sys.version_info >= (2, 7):
1876 def find_xpath_attr(node, xpath, key, val=None):
1877 """ Find the xpath xpath[@key=val] """
1878 assert re.match(r'^[a-zA-Z_-]+$', key)
1879 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1880 return node.find(expr)
1881 else:
1882 def find_xpath_attr(node, xpath, key, val=None):
1883 for f in node.findall(compat_xpath(xpath)):
1884 if key not in f.attrib:
1885 continue
1886 if val is None or f.attrib.get(key) == val:
1887 return f
1888 return None
1889
1890 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1891 # the namespace parameter
1892
1893
1894 def xpath_with_ns(path, ns_map):
1895 components = [c.split(':') for c in path.split('/')]
1896 replaced = []
1897 for c in components:
1898 if len(c) == 1:
1899 replaced.append(c[0])
1900 else:
1901 ns, tag = c
1902 replaced.append('{%s}%s' % (ns_map[ns], tag))
1903 return '/'.join(replaced)
1904
1905
1906 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1907 def _find_xpath(xpath):
1908 return node.find(compat_xpath(xpath))
1909
1910 if isinstance(xpath, (str, compat_str)):
1911 n = _find_xpath(xpath)
1912 else:
1913 for xp in xpath:
1914 n = _find_xpath(xp)
1915 if n is not None:
1916 break
1917
1918 if n is None:
1919 if default is not NO_DEFAULT:
1920 return default
1921 elif fatal:
1922 name = xpath if name is None else name
1923 raise ExtractorError('Could not find XML element %s' % name)
1924 else:
1925 return None
1926 return n
1927
1928
1929 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1930 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1931 if n is None or n == default:
1932 return n
1933 if n.text is None:
1934 if default is not NO_DEFAULT:
1935 return default
1936 elif fatal:
1937 name = xpath if name is None else name
1938 raise ExtractorError('Could not find XML element\'s text %s' % name)
1939 else:
1940 return None
1941 return n.text
1942
1943
1944 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1945 n = find_xpath_attr(node, xpath, key)
1946 if n is None:
1947 if default is not NO_DEFAULT:
1948 return default
1949 elif fatal:
1950 name = '%s[@%s]' % (xpath, key) if name is None else name
1951 raise ExtractorError('Could not find XML attribute %s' % name)
1952 else:
1953 return None
1954 return n.attrib[key]
1955
1956
1957 def get_element_by_id(id, html):
1958 """Return the content of the tag with the specified ID in the passed HTML document"""
1959 return get_element_by_attribute('id', id, html)
1960
1961
1962 def get_element_by_class(class_name, html):
1963 """Return the content of the first tag with the specified class in the passed HTML document"""
1964 retval = get_elements_by_class(class_name, html)
1965 return retval[0] if retval else None
1966
1967
1968 def get_element_by_attribute(attribute, value, html, escape_value=True):
1969 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1970 return retval[0] if retval else None
1971
1972
1973 def get_elements_by_class(class_name, html):
1974 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1975 return get_elements_by_attribute(
1976 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1977 html, escape_value=False)
1978
1979
1980 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1981 """Return the content of the tag with the specified attribute in the passed HTML document"""
1982
1983 value = re.escape(value) if escape_value else value
1984
1985 retlist = []
1986 for m in re.finditer(r'''(?xs)
1987 <([a-zA-Z0-9:._-]+)
1988 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1989 \s+%s=['"]?%s['"]?
1990 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1991 \s*>
1992 (?P<content>.*?)
1993 </\1>
1994 ''' % (re.escape(attribute), value), html):
1995 res = m.group('content')
1996
1997 if res.startswith('"') or res.startswith("'"):
1998 res = res[1:-1]
1999
2000 retlist.append(unescapeHTML(res))
2001
2002 return retlist
2003
2004
2005 class HTMLAttributeParser(compat_HTMLParser):
2006 """Trivial HTML parser to gather the attributes for a single element"""
2007 def __init__(self):
2008 self.attrs = {}
2009 compat_HTMLParser.__init__(self)
2010
2011 def handle_starttag(self, tag, attrs):
2012 self.attrs = dict(attrs)
2013
2014
2015 def extract_attributes(html_element):
2016 """Given a string for an HTML element such as
2017 <el
2018 a="foo" B="bar" c="&98;az" d=boz
2019 empty= noval entity="&"
2020 sq='"' dq="'"
2021 >
2022 Decode and return a dictionary of attributes.
2023 {
2024 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2025 'empty': '', 'noval': None, 'entity': '&',
2026 'sq': '"', 'dq': '\''
2027 }.
2028 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2029 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2030 """
2031 parser = HTMLAttributeParser()
2032 try:
2033 parser.feed(html_element)
2034 parser.close()
2035 # Older Python may throw HTMLParseError in case of malformed HTML
2036 except compat_HTMLParseError:
2037 pass
2038 return parser.attrs
2039
2040
2041 def clean_html(html):
2042 """Clean an HTML snippet into a readable string"""
2043
2044 if html is None: # Convenience for sanitizing descriptions etc.
2045 return html
2046
2047 # Newline vs <br />
2048 html = html.replace('\n', ' ')
2049 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2050 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2051 # Strip html tags
2052 html = re.sub('<.*?>', '', html)
2053 # Replace html entities
2054 html = unescapeHTML(html)
2055 return html.strip()
2056
2057
2058 def sanitize_open(filename, open_mode):
2059 """Try to open the given filename, and slightly tweak it if this fails.
2060
2061 Attempts to open the given filename. If this fails, it tries to change
2062 the filename slightly, step by step, until it's either able to open it
2063 or it fails and raises a final exception, like the standard open()
2064 function.
2065
2066 It returns the tuple (stream, definitive_file_name).
2067 """
2068 try:
2069 if filename == '-':
2070 if sys.platform == 'win32':
2071 import msvcrt
2072 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2073 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2074 stream = open(encodeFilename(filename), open_mode)
2075 return (stream, filename)
2076 except (IOError, OSError) as err:
2077 if err.errno in (errno.EACCES,):
2078 raise
2079
2080 # In case of error, try to remove win32 forbidden chars
2081 alt_filename = sanitize_path(filename)
2082 if alt_filename == filename:
2083 raise
2084 else:
2085 # An exception here should be caught in the caller
2086 stream = open(encodeFilename(alt_filename), open_mode)
2087 return (stream, alt_filename)
2088
2089
2090 def timeconvert(timestr):
2091 """Convert RFC 2822 defined time string into system timestamp"""
2092 timestamp = None
2093 timetuple = email.utils.parsedate_tz(timestr)
2094 if timetuple is not None:
2095 timestamp = email.utils.mktime_tz(timetuple)
2096 return timestamp
2097
2098
2099 def sanitize_filename(s, restricted=False, is_id=False):
2100 """Sanitizes a string so it could be used as part of a filename.
2101 If restricted is set, use a stricter subset of allowed characters.
2102 Set is_id if this is not an arbitrary string, but an ID that should be kept
2103 if possible.
2104 """
2105 def replace_insane(char):
2106 if restricted and char in ACCENT_CHARS:
2107 return ACCENT_CHARS[char]
2108 if char == '?' or ord(char) < 32 or ord(char) == 127:
2109 return ''
2110 elif char == '"':
2111 return '' if restricted else '\''
2112 elif char == ':':
2113 return '_-' if restricted else ' -'
2114 elif char in '\\/|*<>':
2115 return '_'
2116 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2117 return '_'
2118 if restricted and ord(char) > 127:
2119 return '_'
2120 return char
2121
2122 # Replace look-alike Unicode glyphs
2123 if restricted and not is_id:
2124 s = unicodedata.normalize('NFKC', s)
2125 # Handle timestamps
2126 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2127 result = ''.join(map(replace_insane, s))
2128 if not is_id:
2129 while '__' in result:
2130 result = result.replace('__', '_')
2131 result = result.strip('_')
2132 # Common case of "Foreign band name - English song title"
2133 if restricted and result.startswith('-_'):
2134 result = result[2:]
2135 if result.startswith('-'):
2136 result = '_' + result[len('-'):]
2137 result = result.lstrip('.')
2138 if not result:
2139 result = '_'
2140 return result
2141
2142
2143 def sanitize_path(s):
2144 """Sanitizes and normalizes path on Windows"""
2145 if sys.platform != 'win32':
2146 return s
2147 drive_or_unc, _ = os.path.splitdrive(s)
2148 if sys.version_info < (2, 7) and not drive_or_unc:
2149 drive_or_unc, _ = os.path.splitunc(s)
2150 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2151 if drive_or_unc:
2152 norm_path.pop(0)
2153 sanitized_path = [
2154 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2155 for path_part in norm_path]
2156 if drive_or_unc:
2157 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2158 return os.path.join(*sanitized_path)
2159
2160
2161 def sanitize_url(url):
2162 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2163 # the number of unwanted failures due to missing protocol
2164 if url.startswith('//'):
2165 return 'http:%s' % url
2166 # Fix some common typos seen so far
2167 COMMON_TYPOS = (
2168 # https://github.com/ytdl-org/youtube-dl/issues/15649
2169 (r'^httpss://', r'https://'),
2170 # https://bx1.be/lives/direct-tv/
2171 (r'^rmtp([es]?)://', r'rtmp\1://'),
2172 )
2173 for mistake, fixup in COMMON_TYPOS:
2174 if re.match(mistake, url):
2175 return re.sub(mistake, fixup, url)
2176 return escape_url(url)
2177
2178
2179 def sanitized_Request(url, *args, **kwargs):
2180 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2181
2182
2183 def expand_path(s):
2184 """Expand shell variables and ~"""
2185 return os.path.expandvars(compat_expanduser(s))
2186
2187
2188 def orderedSet(iterable):
2189 """ Remove all duplicates from the input iterable """
2190 res = []
2191 for el in iterable:
2192 if el not in res:
2193 res.append(el)
2194 return res
2195
2196
2197 def _htmlentity_transform(entity_with_semicolon):
2198 """Transforms an HTML entity to a character."""
2199 entity = entity_with_semicolon[:-1]
2200
2201 # Known non-numeric HTML entity
2202 if entity in compat_html_entities.name2codepoint:
2203 return compat_chr(compat_html_entities.name2codepoint[entity])
2204
2205 # TODO: HTML5 allows entities without a semicolon. For example,
2206 # 'Éric' should be decoded as 'Éric'.
2207 if entity_with_semicolon in compat_html_entities_html5:
2208 return compat_html_entities_html5[entity_with_semicolon]
2209
2210 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2211 if mobj is not None:
2212 numstr = mobj.group(1)
2213 if numstr.startswith('x'):
2214 base = 16
2215 numstr = '0%s' % numstr
2216 else:
2217 base = 10
2218 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2219 try:
2220 return compat_chr(int(numstr, base))
2221 except ValueError:
2222 pass
2223
2224 # Unknown entity in name, return its literal representation
2225 return '&%s;' % entity
2226
2227
2228 def unescapeHTML(s):
2229 if s is None:
2230 return None
2231 assert type(s) == compat_str
2232
2233 return re.sub(
2234 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2235
2236
2237 def process_communicate_or_kill(p, *args, **kwargs):
2238 try:
2239 return p.communicate(*args, **kwargs)
2240 except BaseException: # Including KeyboardInterrupt
2241 p.kill()
2242 p.wait()
2243 raise
2244
2245
2246 def get_subprocess_encoding():
2247 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2248 # For subprocess calls, encode with locale encoding
2249 # Refer to http://stackoverflow.com/a/9951851/35070
2250 encoding = preferredencoding()
2251 else:
2252 encoding = sys.getfilesystemencoding()
2253 if encoding is None:
2254 encoding = 'utf-8'
2255 return encoding
2256
2257
2258 def encodeFilename(s, for_subprocess=False):
2259 """
2260 @param s The name of the file
2261 """
2262
2263 assert type(s) == compat_str
2264
2265 # Python 3 has a Unicode API
2266 if sys.version_info >= (3, 0):
2267 return s
2268
2269 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2270 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2271 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2272 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2273 return s
2274
2275 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2276 if sys.platform.startswith('java'):
2277 return s
2278
2279 return s.encode(get_subprocess_encoding(), 'ignore')
2280
2281
2282 def decodeFilename(b, for_subprocess=False):
2283
2284 if sys.version_info >= (3, 0):
2285 return b
2286
2287 if not isinstance(b, bytes):
2288 return b
2289
2290 return b.decode(get_subprocess_encoding(), 'ignore')
2291
2292
2293 def encodeArgument(s):
2294 if not isinstance(s, compat_str):
2295 # Legacy code that uses byte strings
2296 # Uncomment the following line after fixing all post processors
2297 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2298 s = s.decode('ascii')
2299 return encodeFilename(s, True)
2300
2301
2302 def decodeArgument(b):
2303 return decodeFilename(b, True)
2304
2305
2306 def decodeOption(optval):
2307 if optval is None:
2308 return optval
2309 if isinstance(optval, bytes):
2310 optval = optval.decode(preferredencoding())
2311
2312 assert isinstance(optval, compat_str)
2313 return optval
2314
2315
2316 def formatSeconds(secs):
2317 if secs > 3600:
2318 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2319 elif secs > 60:
2320 return '%d:%02d' % (secs // 60, secs % 60)
2321 else:
2322 return '%d' % secs
2323
2324
2325 def make_HTTPS_handler(params, **kwargs):
2326
2327 # https://www.rfc-editor.org/info/rfc7301
2328 ALPN_PROTOCOLS = ['http/1.1']
2329
2330 def set_alpn_protocols(ctx):
2331 # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
2332 # Thanks @coletdjnz
2333 # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
2334 # https://github.com/python/cpython/issues/85140
2335 # https://github.com/yt-dlp/yt-dlp/issues/3878
2336 try:
2337 ctx.set_alpn_protocols(ALPN_PROTOCOLS)
2338 except (AttributeError, NotImplementedError):
2339 # Python < 2.7.10, not ssl.HAS_ALPN
2340 pass
2341
2342 opts_no_check_certificate = params.get('nocheckcertificate', False)
2343 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2344 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2345 set_alpn_protocols(context)
2346 if opts_no_check_certificate:
2347 context.check_hostname = False
2348 context.verify_mode = ssl.CERT_NONE
2349
2350 try:
2351 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2352 except TypeError:
2353 # Python 2.7.8
2354 # (create_default_context present but HTTPSHandler has no context=)
2355 pass
2356
2357 if sys.version_info < (3, 2):
2358 return YoutubeDLHTTPSHandler(params, **kwargs)
2359 else: # Python < 3.4
2360 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2361 context.verify_mode = (ssl.CERT_NONE
2362 if opts_no_check_certificate
2363 else ssl.CERT_REQUIRED)
2364 context.set_default_verify_paths()
2365 set_alpn_protocols(context)
2366 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2367
2368
2369 def bug_reports_message():
2370 if ytdl_is_updateable():
2371 update_cmd = 'type youtube-dl -U to update'
2372 else:
2373 update_cmd = 'see https://yt-dl.org/update on how to update'
2374 msg = '; please report this issue on https://yt-dl.org/bug .'
2375 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2376 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2377 return msg
2378
2379
2380 class YoutubeDLError(Exception):
2381 """Base exception for YoutubeDL errors."""
2382 pass
2383
2384
2385 class ExtractorError(YoutubeDLError):
2386 """Error during info extraction."""
2387
2388 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2389 """ tb, if given, is the original traceback (so that it can be printed out).
2390 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2391 """
2392
2393 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2394 expected = True
2395 if video_id is not None:
2396 msg = video_id + ': ' + msg
2397 if cause:
2398 msg += ' (caused by %r)' % cause
2399 if not expected:
2400 msg += bug_reports_message()
2401 super(ExtractorError, self).__init__(msg)
2402
2403 self.traceback = tb
2404 self.exc_info = sys.exc_info() # preserve original exception
2405 self.cause = cause
2406 self.video_id = video_id
2407
2408 def format_traceback(self):
2409 if self.traceback is None:
2410 return None
2411 return ''.join(traceback.format_tb(self.traceback))
2412
2413
2414 class UnsupportedError(ExtractorError):
2415 def __init__(self, url):
2416 super(UnsupportedError, self).__init__(
2417 'Unsupported URL: %s' % url, expected=True)
2418 self.url = url
2419
2420
2421 class RegexNotFoundError(ExtractorError):
2422 """Error when a regex didn't match"""
2423 pass
2424
2425
2426 class GeoRestrictedError(ExtractorError):
2427 """Geographic restriction Error exception.
2428
2429 This exception may be thrown when a video is not available from your
2430 geographic location due to geographic restrictions imposed by a website.
2431 """
2432 def __init__(self, msg, countries=None):
2433 super(GeoRestrictedError, self).__init__(msg, expected=True)
2434 self.msg = msg
2435 self.countries = countries
2436
2437
2438 class DownloadError(YoutubeDLError):
2439 """Download Error exception.
2440
2441 This exception may be thrown by FileDownloader objects if they are not
2442 configured to continue on errors. They will contain the appropriate
2443 error message.
2444 """
2445
2446 def __init__(self, msg, exc_info=None):
2447 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2448 super(DownloadError, self).__init__(msg)
2449 self.exc_info = exc_info
2450
2451
2452 class SameFileError(YoutubeDLError):
2453 """Same File exception.
2454
2455 This exception will be thrown by FileDownloader objects if they detect
2456 multiple files would have to be downloaded to the same file on disk.
2457 """
2458 pass
2459
2460
2461 class PostProcessingError(YoutubeDLError):
2462 """Post Processing exception.
2463
2464 This exception may be raised by PostProcessor's .run() method to
2465 indicate an error in the postprocessing task.
2466 """
2467
2468 def __init__(self, msg):
2469 super(PostProcessingError, self).__init__(msg)
2470 self.msg = msg
2471
2472
2473 class MaxDownloadsReached(YoutubeDLError):
2474 """ --max-downloads limit has been reached. """
2475 pass
2476
2477
2478 class UnavailableVideoError(YoutubeDLError):
2479 """Unavailable Format exception.
2480
2481 This exception will be thrown when a video is requested
2482 in a format that is not available for that video.
2483 """
2484 pass
2485
2486
2487 class ContentTooShortError(YoutubeDLError):
2488 """Content Too Short exception.
2489
2490 This exception may be raised by FileDownloader objects when a file they
2491 download is too small for what the server announced first, indicating
2492 the connection was probably interrupted.
2493 """
2494
2495 def __init__(self, downloaded, expected):
2496 super(ContentTooShortError, self).__init__(
2497 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2498 )
2499 # Both in bytes
2500 self.downloaded = downloaded
2501 self.expected = expected
2502
2503
2504 class XAttrMetadataError(YoutubeDLError):
2505 def __init__(self, code=None, msg='Unknown error'):
2506 super(XAttrMetadataError, self).__init__(msg)
2507 self.code = code
2508 self.msg = msg
2509
2510 # Parsing code and msg
2511 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2512 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2513 self.reason = 'NO_SPACE'
2514 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2515 self.reason = 'VALUE_TOO_LONG'
2516 else:
2517 self.reason = 'NOT_SUPPORTED'
2518
2519
2520 class XAttrUnavailableError(YoutubeDLError):
2521 pass
2522
2523
2524 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2525 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2526 # expected HTTP responses to meet HTTP/1.0 or later (see also
2527 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2528 if sys.version_info < (3, 0):
2529 kwargs['strict'] = True
2530 hc = http_class(*args, **compat_kwargs(kwargs))
2531 source_address = ydl_handler._params.get('source_address')
2532
2533 if source_address is not None:
2534 # This is to workaround _create_connection() from socket where it will try all
2535 # address data from getaddrinfo() including IPv6. This filters the result from
2536 # getaddrinfo() based on the source_address value.
2537 # This is based on the cpython socket.create_connection() function.
2538 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2539 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2540 host, port = address
2541 err = None
2542 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2543 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2544 ip_addrs = [addr for addr in addrs if addr[0] == af]
2545 if addrs and not ip_addrs:
2546 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2547 raise socket.error(
2548 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2549 % (ip_version, source_address[0]))
2550 for res in ip_addrs:
2551 af, socktype, proto, canonname, sa = res
2552 sock = None
2553 try:
2554 sock = socket.socket(af, socktype, proto)
2555 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2556 sock.settimeout(timeout)
2557 sock.bind(source_address)
2558 sock.connect(sa)
2559 err = None # Explicitly break reference cycle
2560 return sock
2561 except socket.error as _:
2562 err = _
2563 if sock is not None:
2564 sock.close()
2565 if err is not None:
2566 raise err
2567 else:
2568 raise socket.error('getaddrinfo returns an empty list')
2569 if hasattr(hc, '_create_connection'):
2570 hc._create_connection = _create_connection
2571 sa = (source_address, 0)
2572 if hasattr(hc, 'source_address'): # Python 2.7+
2573 hc.source_address = sa
2574 else: # Python 2.6
2575 def _hc_connect(self, *args, **kwargs):
2576 sock = _create_connection(
2577 (self.host, self.port), self.timeout, sa)
2578 if is_https:
2579 self.sock = ssl.wrap_socket(
2580 sock, self.key_file, self.cert_file,
2581 ssl_version=ssl.PROTOCOL_TLSv1)
2582 else:
2583 self.sock = sock
2584 hc.connect = functools.partial(_hc_connect, hc)
2585
2586 return hc
2587
2588
2589 def handle_youtubedl_headers(headers):
2590 filtered_headers = headers
2591
2592 if 'Youtubedl-no-compression' in filtered_headers:
2593 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2594 del filtered_headers['Youtubedl-no-compression']
2595
2596 return filtered_headers
2597
2598
2599 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2600 """Handler for HTTP requests and responses.
2601
2602 This class, when installed with an OpenerDirector, automatically adds
2603 the standard headers to every HTTP request and handles gzipped and
2604 deflated responses from web servers. If compression is to be avoided in
2605 a particular request, the original request in the program code only has
2606 to include the HTTP header "Youtubedl-no-compression", which will be
2607 removed before making the real request.
2608
2609 Part of this code was copied from:
2610
2611 http://techknack.net/python-urllib2-handlers/
2612
2613 Andrew Rowls, the author of that code, agreed to release it to the
2614 public domain.
2615 """
2616
2617 def __init__(self, params, *args, **kwargs):
2618 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2619 self._params = params
2620
2621 def http_open(self, req):
2622 conn_class = compat_http_client.HTTPConnection
2623
2624 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2625 if socks_proxy:
2626 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2627 del req.headers['Ytdl-socks-proxy']
2628
2629 return self.do_open(functools.partial(
2630 _create_http_connection, self, conn_class, False),
2631 req)
2632
2633 @staticmethod
2634 def deflate(data):
2635 try:
2636 return zlib.decompress(data, -zlib.MAX_WBITS)
2637 except zlib.error:
2638 return zlib.decompress(data)
2639
2640 def http_request(self, req):
2641 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2642 # always respected by websites, some tend to give out URLs with non percent-encoded
2643 # non-ASCII characters (see telemb.py, ard.py [#3412])
2644 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2645 # To work around aforementioned issue we will replace request's original URL with
2646 # percent-encoded one
2647 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2648 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2649 url = req.get_full_url()
2650 url_escaped = escape_url(url)
2651
2652 # Substitute URL if any change after escaping
2653 if url != url_escaped:
2654 req = update_Request(req, url=url_escaped)
2655
2656 for h, v in std_headers.items():
2657 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2658 # The dict keys are capitalized because of this bug by urllib
2659 if h.capitalize() not in req.headers:
2660 req.add_header(h, v)
2661
2662 req.headers = handle_youtubedl_headers(req.headers)
2663
2664 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2665 # Python 2.6 is brain-dead when it comes to fragments
2666 req._Request__original = req._Request__original.partition('#')[0]
2667 req._Request__r_type = req._Request__r_type.partition('#')[0]
2668
2669 return req
2670
2671 def http_response(self, req, resp):
2672 old_resp = resp
2673 # gzip
2674 if resp.headers.get('Content-encoding', '') == 'gzip':
2675 content = resp.read()
2676 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2677 try:
2678 uncompressed = io.BytesIO(gz.read())
2679 except IOError as original_ioerror:
2680 # There may be junk add the end of the file
2681 # See http://stackoverflow.com/q/4928560/35070 for details
2682 for i in range(1, 1024):
2683 try:
2684 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2685 uncompressed = io.BytesIO(gz.read())
2686 except IOError:
2687 continue
2688 break
2689 else:
2690 raise original_ioerror
2691 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2692 resp.msg = old_resp.msg
2693 del resp.headers['Content-encoding']
2694 # deflate
2695 if resp.headers.get('Content-encoding', '') == 'deflate':
2696 gz = io.BytesIO(self.deflate(resp.read()))
2697 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2698 resp.msg = old_resp.msg
2699 del resp.headers['Content-encoding']
2700 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2701 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2702 if 300 <= resp.code < 400:
2703 location = resp.headers.get('Location')
2704 if location:
2705 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2706 if sys.version_info >= (3, 0):
2707 location = location.encode('iso-8859-1').decode('utf-8')
2708 else:
2709 location = location.decode('utf-8')
2710 location_escaped = escape_url(location)
2711 if location != location_escaped:
2712 del resp.headers['Location']
2713 if sys.version_info < (3, 0):
2714 location_escaped = location_escaped.encode('utf-8')
2715 resp.headers['Location'] = location_escaped
2716 return resp
2717
2718 https_request = http_request
2719 https_response = http_response
2720
2721
2722 def make_socks_conn_class(base_class, socks_proxy):
2723 assert issubclass(base_class, (
2724 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2725
2726 url_components = compat_urlparse.urlparse(socks_proxy)
2727 if url_components.scheme.lower() == 'socks5':
2728 socks_type = ProxyType.SOCKS5
2729 elif url_components.scheme.lower() in ('socks', 'socks4'):
2730 socks_type = ProxyType.SOCKS4
2731 elif url_components.scheme.lower() == 'socks4a':
2732 socks_type = ProxyType.SOCKS4A
2733
2734 def unquote_if_non_empty(s):
2735 if not s:
2736 return s
2737 return compat_urllib_parse_unquote_plus(s)
2738
2739 proxy_args = (
2740 socks_type,
2741 url_components.hostname, url_components.port or 1080,
2742 True, # Remote DNS
2743 unquote_if_non_empty(url_components.username),
2744 unquote_if_non_empty(url_components.password),
2745 )
2746
2747 class SocksConnection(base_class):
2748 def connect(self):
2749 self.sock = sockssocket()
2750 self.sock.setproxy(*proxy_args)
2751 if type(self.timeout) in (int, float):
2752 self.sock.settimeout(self.timeout)
2753 self.sock.connect((self.host, self.port))
2754
2755 if isinstance(self, compat_http_client.HTTPSConnection):
2756 if hasattr(self, '_context'): # Python > 2.6
2757 self.sock = self._context.wrap_socket(
2758 self.sock, server_hostname=self.host)
2759 else:
2760 self.sock = ssl.wrap_socket(self.sock)
2761
2762 return SocksConnection
2763
2764
2765 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2766 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2767 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2768 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2769 self._params = params
2770
2771 def https_open(self, req):
2772 kwargs = {}
2773 conn_class = self._https_conn_class
2774
2775 if hasattr(self, '_context'): # python > 2.6
2776 kwargs['context'] = self._context
2777 if hasattr(self, '_check_hostname'): # python 3.x
2778 kwargs['check_hostname'] = self._check_hostname
2779
2780 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2781 if socks_proxy:
2782 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2783 del req.headers['Ytdl-socks-proxy']
2784
2785 return self.do_open(functools.partial(
2786 _create_http_connection, self, conn_class, True),
2787 req, **kwargs)
2788
2789
2790 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2791 """
2792 See [1] for cookie file format.
2793
2794 1. https://curl.haxx.se/docs/http-cookies.html
2795 """
2796 _HTTPONLY_PREFIX = '#HttpOnly_'
2797 _ENTRY_LEN = 7
2798 _HEADER = '''# Netscape HTTP Cookie File
2799 # This file is generated by youtube-dl. Do not edit.
2800
2801 '''
2802 _CookieFileEntry = collections.namedtuple(
2803 'CookieFileEntry',
2804 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2805
2806 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2807 """
2808 Save cookies to a file.
2809
2810 Most of the code is taken from CPython 3.8 and slightly adapted
2811 to support cookie files with UTF-8 in both python 2 and 3.
2812 """
2813 if filename is None:
2814 if self.filename is not None:
2815 filename = self.filename
2816 else:
2817 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2818
2819 # Store session cookies with `expires` set to 0 instead of an empty
2820 # string
2821 for cookie in self:
2822 if cookie.expires is None:
2823 cookie.expires = 0
2824
2825 with io.open(filename, 'w', encoding='utf-8') as f:
2826 f.write(self._HEADER)
2827 now = time.time()
2828 for cookie in self:
2829 if not ignore_discard and cookie.discard:
2830 continue
2831 if not ignore_expires and cookie.is_expired(now):
2832 continue
2833 if cookie.secure:
2834 secure = 'TRUE'
2835 else:
2836 secure = 'FALSE'
2837 if cookie.domain.startswith('.'):
2838 initial_dot = 'TRUE'
2839 else:
2840 initial_dot = 'FALSE'
2841 if cookie.expires is not None:
2842 expires = compat_str(cookie.expires)
2843 else:
2844 expires = ''
2845 if cookie.value is None:
2846 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2847 # with no name, whereas http.cookiejar regards it as a
2848 # cookie with no value.
2849 name = ''
2850 value = cookie.name
2851 else:
2852 name = cookie.name
2853 value = cookie.value
2854 f.write(
2855 '\t'.join([cookie.domain, initial_dot, cookie.path,
2856 secure, expires, name, value]) + '\n')
2857
2858 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2859 """Load cookies from a file."""
2860 if filename is None:
2861 if self.filename is not None:
2862 filename = self.filename
2863 else:
2864 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2865
2866 def prepare_line(line):
2867 if line.startswith(self._HTTPONLY_PREFIX):
2868 line = line[len(self._HTTPONLY_PREFIX):]
2869 # comments and empty lines are fine
2870 if line.startswith('#') or not line.strip():
2871 return line
2872 cookie_list = line.split('\t')
2873 if len(cookie_list) != self._ENTRY_LEN:
2874 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2875 cookie = self._CookieFileEntry(*cookie_list)
2876 if cookie.expires_at and not cookie.expires_at.isdigit():
2877 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2878 return line
2879
2880 cf = io.StringIO()
2881 with io.open(filename, encoding='utf-8') as f:
2882 for line in f:
2883 try:
2884 cf.write(prepare_line(line))
2885 except compat_cookiejar.LoadError as e:
2886 write_string(
2887 'WARNING: skipping cookie file entry due to %s: %r\n'
2888 % (e, line), sys.stderr)
2889 continue
2890 cf.seek(0)
2891 self._really_load(cf, filename, ignore_discard, ignore_expires)
2892 # Session cookies are denoted by either `expires` field set to
2893 # an empty string or 0. MozillaCookieJar only recognizes the former
2894 # (see [1]). So we need force the latter to be recognized as session
2895 # cookies on our own.
2896 # Session cookies may be important for cookies-based authentication,
2897 # e.g. usually, when user does not check 'Remember me' check box while
2898 # logging in on a site, some important cookies are stored as session
2899 # cookies so that not recognizing them will result in failed login.
2900 # 1. https://bugs.python.org/issue17164
2901 for cookie in self:
2902 # Treat `expires=0` cookies as session cookies
2903 if cookie.expires == 0:
2904 cookie.expires = None
2905 cookie.discard = True
2906
2907
2908 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2909 def __init__(self, cookiejar=None):
2910 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2911
2912 def http_response(self, request, response):
2913 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2914 # characters in Set-Cookie HTTP header of last response (see
2915 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2916 # In order to at least prevent crashing we will percent encode Set-Cookie
2917 # header before HTTPCookieProcessor starts processing it.
2918 # if sys.version_info < (3, 0) and response.headers:
2919 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2920 # set_cookie = response.headers.get(set_cookie_header)
2921 # if set_cookie:
2922 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2923 # if set_cookie != set_cookie_escaped:
2924 # del response.headers[set_cookie_header]
2925 # response.headers[set_cookie_header] = set_cookie_escaped
2926 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2927
2928 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2929 https_response = http_response
2930
2931
2932 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2933 """YoutubeDL redirect handler
2934
2935 The code is based on HTTPRedirectHandler implementation from CPython [1].
2936
2937 This redirect handler solves two issues:
2938 - ensures redirect URL is always unicode under python 2
2939 - introduces support for experimental HTTP response status code
2940 308 Permanent Redirect [2] used by some sites [3]
2941
2942 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2943 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2944 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2945 """
2946
2947 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2948
2949 def redirect_request(self, req, fp, code, msg, headers, newurl):
2950 """Return a Request or None in response to a redirect.
2951
2952 This is called by the http_error_30x methods when a
2953 redirection response is received. If a redirection should
2954 take place, return a new Request to allow http_error_30x to
2955 perform the redirect. Otherwise, raise HTTPError if no-one
2956 else should try to handle this url. Return None if you can't
2957 but another Handler might.
2958 """
2959 m = req.get_method()
2960 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2961 or code in (301, 302, 303) and m == "POST")):
2962 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2963 # Strictly (according to RFC 2616), 301 or 302 in response to
2964 # a POST MUST NOT cause a redirection without confirmation
2965 # from the user (of urllib.request, in this case). In practice,
2966 # essentially all clients do redirect in this case, so we do
2967 # the same.
2968
2969 # On python 2 urlh.geturl() may sometimes return redirect URL
2970 # as byte string instead of unicode. This workaround allows
2971 # to force it always return unicode.
2972 if sys.version_info[0] < 3:
2973 newurl = compat_str(newurl)
2974
2975 # Be conciliant with URIs containing a space. This is mainly
2976 # redundant with the more complete encoding done in http_error_302(),
2977 # but it is kept for compatibility with other callers.
2978 newurl = newurl.replace(' ', '%20')
2979
2980 CONTENT_HEADERS = ("content-length", "content-type")
2981 # NB: don't use dict comprehension for python 2.6 compatibility
2982 newheaders = dict((k, v) for k, v in req.headers.items()
2983 if k.lower() not in CONTENT_HEADERS)
2984 return compat_urllib_request.Request(
2985 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2986 unverifiable=True)
2987
2988
2989 def extract_timezone(date_str):
2990 m = re.search(
2991 r'''(?x)
2992 ^.{8,}? # >=8 char non-TZ prefix, if present
2993 (?P<tz>Z| # just the UTC Z, or
2994 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
2995 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
2996 [ ]? # optional space
2997 (?P<sign>\+|-) # +/-
2998 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
2999 $)
3000 ''', date_str)
3001 if not m:
3002 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3003 timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
3004 if timezone is not None:
3005 date_str = date_str[:-len(m.group('tz'))]
3006 timezone = datetime.timedelta(hours=timezone or 0)
3007 else:
3008 date_str = date_str[:-len(m.group('tz'))]
3009 if not m.group('sign'):
3010 timezone = datetime.timedelta()
3011 else:
3012 sign = 1 if m.group('sign') == '+' else -1
3013 timezone = datetime.timedelta(
3014 hours=sign * int(m.group('hours')),
3015 minutes=sign * int(m.group('minutes')))
3016 return timezone, date_str
3017
3018
3019 def parse_iso8601(date_str, delimiter='T', timezone=None):
3020 """ Return a UNIX timestamp from the given date """
3021
3022 if date_str is None:
3023 return None
3024
3025 date_str = re.sub(r'\.[0-9]+', '', date_str)
3026
3027 if timezone is None:
3028 timezone, date_str = extract_timezone(date_str)
3029
3030 try:
3031 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3032 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3033 return calendar.timegm(dt.timetuple())
3034 except ValueError:
3035 pass
3036
3037
3038 def date_formats(day_first=True):
3039 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3040
3041
3042 def unified_strdate(date_str, day_first=True):
3043 """Return a string with the date in the format YYYYMMDD"""
3044
3045 if date_str is None:
3046 return None
3047 upload_date = None
3048 # Replace commas
3049 date_str = date_str.replace(',', ' ')
3050 # Remove AM/PM + timezone
3051 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3052 _, date_str = extract_timezone(date_str)
3053
3054 for expression in date_formats(day_first):
3055 try:
3056 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3057 except ValueError:
3058 pass
3059 if upload_date is None:
3060 timetuple = email.utils.parsedate_tz(date_str)
3061 if timetuple:
3062 try:
3063 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3064 except ValueError:
3065 pass
3066 if upload_date is not None:
3067 return compat_str(upload_date)
3068
3069
3070 def unified_timestamp(date_str, day_first=True):
3071 if date_str is None:
3072 return None
3073
3074 date_str = re.sub(r'\s+', ' ', re.sub(
3075 r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
3076
3077 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3078 timezone, date_str = extract_timezone(date_str)
3079
3080 # Remove AM/PM + timezone
3081 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3082
3083 # Remove unrecognized timezones from ISO 8601 alike timestamps
3084 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3085 if m:
3086 date_str = date_str[:-len(m.group('tz'))]
3087
3088 # Python only supports microseconds, so remove nanoseconds
3089 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3090 if m:
3091 date_str = m.group(1)
3092
3093 for expression in date_formats(day_first):
3094 try:
3095 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3096 return calendar.timegm(dt.timetuple())
3097 except ValueError:
3098 pass
3099 timetuple = email.utils.parsedate_tz(date_str)
3100 if timetuple:
3101 return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
3102
3103
3104 def determine_ext(url, default_ext='unknown_video'):
3105 if url is None or '.' not in url:
3106 return default_ext
3107 guess = url.partition('?')[0].rpartition('.')[2]
3108 if re.match(r'^[A-Za-z0-9]+$', guess):
3109 return guess
3110 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3111 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3112 return guess.rstrip('/')
3113 else:
3114 return default_ext
3115
3116
3117 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3118 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3119
3120
3121 def date_from_str(date_str):
3122 """
3123 Return a datetime object from a string in the format YYYYMMDD or
3124 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3125 today = datetime.date.today()
3126 if date_str in ('now', 'today'):
3127 return today
3128 if date_str == 'yesterday':
3129 return today - datetime.timedelta(days=1)
3130 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3131 if match is not None:
3132 sign = match.group('sign')
3133 time = int(match.group('time'))
3134 if sign == '-':
3135 time = -time
3136 unit = match.group('unit')
3137 # A bad approximation?
3138 if unit == 'month':
3139 unit = 'day'
3140 time *= 30
3141 elif unit == 'year':
3142 unit = 'day'
3143 time *= 365
3144 unit += 's'
3145 delta = datetime.timedelta(**{unit: time})
3146 return today + delta
3147 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3148
3149
3150 def hyphenate_date(date_str):
3151 """
3152 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3153 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3154 if match is not None:
3155 return '-'.join(match.groups())
3156 else:
3157 return date_str
3158
3159
3160 class DateRange(object):
3161 """Represents a time interval between two dates"""
3162
3163 def __init__(self, start=None, end=None):
3164 """start and end must be strings in the format accepted by date"""
3165 if start is not None:
3166 self.start = date_from_str(start)
3167 else:
3168 self.start = datetime.datetime.min.date()
3169 if end is not None:
3170 self.end = date_from_str(end)
3171 else:
3172 self.end = datetime.datetime.max.date()
3173 if self.start > self.end:
3174 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3175
3176 @classmethod
3177 def day(cls, day):
3178 """Returns a range that only contains the given day"""
3179 return cls(day, day)
3180
3181 def __contains__(self, date):
3182 """Check if the date is in the range"""
3183 if not isinstance(date, datetime.date):
3184 date = date_from_str(date)
3185 return self.start <= date <= self.end
3186
3187 def __str__(self):
3188 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3189
3190
3191 def platform_name():
3192 """ Returns the platform name as a compat_str """
3193 res = platform.platform()
3194 if isinstance(res, bytes):
3195 res = res.decode(preferredencoding())
3196
3197 assert isinstance(res, compat_str)
3198 return res
3199
3200
3201 def _windows_write_string(s, out):
3202 """ Returns True if the string was written using special methods,
3203 False if it has yet to be written out."""
3204 # Adapted from http://stackoverflow.com/a/3259271/35070
3205
3206 import ctypes
3207 import ctypes.wintypes
3208
3209 WIN_OUTPUT_IDS = {
3210 1: -11,
3211 2: -12,
3212 }
3213
3214 try:
3215 fileno = out.fileno()
3216 except AttributeError:
3217 # If the output stream doesn't have a fileno, it's virtual
3218 return False
3219 except io.UnsupportedOperation:
3220 # Some strange Windows pseudo files?
3221 return False
3222 if fileno not in WIN_OUTPUT_IDS:
3223 return False
3224
3225 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3226 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3227 ('GetStdHandle', ctypes.windll.kernel32))
3228 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3229
3230 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3231 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3232 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3233 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3234 written = ctypes.wintypes.DWORD(0)
3235
3236 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3237 FILE_TYPE_CHAR = 0x0002
3238 FILE_TYPE_REMOTE = 0x8000
3239 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3240 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3241 ctypes.POINTER(ctypes.wintypes.DWORD))(
3242 ('GetConsoleMode', ctypes.windll.kernel32))
3243 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3244
3245 def not_a_console(handle):
3246 if handle == INVALID_HANDLE_VALUE or handle is None:
3247 return True
3248 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3249 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3250
3251 if not_a_console(h):
3252 return False
3253
3254 def next_nonbmp_pos(s):
3255 try:
3256 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3257 except StopIteration:
3258 return len(s)
3259
3260 while s:
3261 count = min(next_nonbmp_pos(s), 1024)
3262
3263 ret = WriteConsoleW(
3264 h, s, count if count else 2, ctypes.byref(written), None)
3265 if ret == 0:
3266 raise OSError('Failed to write string')
3267 if not count: # We just wrote a non-BMP character
3268 assert written.value == 2
3269 s = s[1:]
3270 else:
3271 assert written.value > 0
3272 s = s[written.value:]
3273 return True
3274
3275
3276 def write_string(s, out=None, encoding=None):
3277 if out is None:
3278 out = sys.stderr
3279 assert type(s) == compat_str
3280
3281 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3282 if _windows_write_string(s, out):
3283 return
3284
3285 if ('b' in getattr(out, 'mode', '')
3286 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3287 byt = s.encode(encoding or preferredencoding(), 'ignore')
3288 out.write(byt)
3289 elif hasattr(out, 'buffer'):
3290 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3291 byt = s.encode(enc, 'ignore')
3292 out.buffer.write(byt)
3293 else:
3294 out.write(s)
3295 out.flush()
3296
3297
3298 def bytes_to_intlist(bs):
3299 if not bs:
3300 return []
3301 if isinstance(bs[0], int): # Python 3
3302 return list(bs)
3303 else:
3304 return [ord(c) for c in bs]
3305
3306
3307 def intlist_to_bytes(xs):
3308 if not xs:
3309 return b''
3310 return compat_struct_pack('%dB' % len(xs), *xs)
3311
3312
3313 # Cross-platform file locking
3314 if sys.platform == 'win32':
3315 import ctypes.wintypes
3316 import msvcrt
3317
3318 class OVERLAPPED(ctypes.Structure):
3319 _fields_ = [
3320 ('Internal', ctypes.wintypes.LPVOID),
3321 ('InternalHigh', ctypes.wintypes.LPVOID),
3322 ('Offset', ctypes.wintypes.DWORD),
3323 ('OffsetHigh', ctypes.wintypes.DWORD),
3324 ('hEvent', ctypes.wintypes.HANDLE),
3325 ]
3326
3327 kernel32 = ctypes.windll.kernel32
3328 LockFileEx = kernel32.LockFileEx
3329 LockFileEx.argtypes = [
3330 ctypes.wintypes.HANDLE, # hFile
3331 ctypes.wintypes.DWORD, # dwFlags
3332 ctypes.wintypes.DWORD, # dwReserved
3333 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3334 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3335 ctypes.POINTER(OVERLAPPED) # Overlapped
3336 ]
3337 LockFileEx.restype = ctypes.wintypes.BOOL
3338 UnlockFileEx = kernel32.UnlockFileEx
3339 UnlockFileEx.argtypes = [
3340 ctypes.wintypes.HANDLE, # hFile
3341 ctypes.wintypes.DWORD, # dwReserved
3342 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3343 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3344 ctypes.POINTER(OVERLAPPED) # Overlapped
3345 ]
3346 UnlockFileEx.restype = ctypes.wintypes.BOOL
3347 whole_low = 0xffffffff
3348 whole_high = 0x7fffffff
3349
3350 def _lock_file(f, exclusive):
3351 overlapped = OVERLAPPED()
3352 overlapped.Offset = 0
3353 overlapped.OffsetHigh = 0
3354 overlapped.hEvent = 0
3355 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3356 handle = msvcrt.get_osfhandle(f.fileno())
3357 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3358 whole_low, whole_high, f._lock_file_overlapped_p):
3359 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3360
3361 def _unlock_file(f):
3362 assert f._lock_file_overlapped_p
3363 handle = msvcrt.get_osfhandle(f.fileno())
3364 if not UnlockFileEx(handle, 0,
3365 whole_low, whole_high, f._lock_file_overlapped_p):
3366 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3367
3368 else:
3369 # Some platforms, such as Jython, is missing fcntl
3370 try:
3371 import fcntl
3372
3373 def _lock_file(f, exclusive):
3374 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3375
3376 def _unlock_file(f):
3377 fcntl.flock(f, fcntl.LOCK_UN)
3378 except ImportError:
3379 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3380
3381 def _lock_file(f, exclusive):
3382 raise IOError(UNSUPPORTED_MSG)
3383
3384 def _unlock_file(f):
3385 raise IOError(UNSUPPORTED_MSG)
3386
3387
3388 class locked_file(object):
3389 def __init__(self, filename, mode, encoding=None):
3390 assert mode in ['r', 'a', 'w']
3391 self.f = io.open(filename, mode, encoding=encoding)
3392 self.mode = mode
3393
3394 def __enter__(self):
3395 exclusive = self.mode != 'r'
3396 try:
3397 _lock_file(self.f, exclusive)
3398 except IOError:
3399 self.f.close()
3400 raise
3401 return self
3402
3403 def __exit__(self, etype, value, traceback):
3404 try:
3405 _unlock_file(self.f)
3406 finally:
3407 self.f.close()
3408
3409 def __iter__(self):
3410 return iter(self.f)
3411
3412 def write(self, *args):
3413 return self.f.write(*args)
3414
3415 def read(self, *args):
3416 return self.f.read(*args)
3417
3418
3419 def get_filesystem_encoding():
3420 encoding = sys.getfilesystemencoding()
3421 return encoding if encoding is not None else 'utf-8'
3422
3423
3424 def shell_quote(args):
3425 quoted_args = []
3426 encoding = get_filesystem_encoding()
3427 for a in args:
3428 if isinstance(a, bytes):
3429 # We may get a filename encoded with 'encodeFilename'
3430 a = a.decode(encoding)
3431 quoted_args.append(compat_shlex_quote(a))
3432 return ' '.join(quoted_args)
3433
3434
3435 def smuggle_url(url, data):
3436 """ Pass additional data in a URL for internal use. """
3437
3438 url, idata = unsmuggle_url(url, {})
3439 data.update(idata)
3440 sdata = compat_urllib_parse_urlencode(
3441 {'__youtubedl_smuggle': json.dumps(data)})
3442 return url + '#' + sdata
3443
3444
3445 def unsmuggle_url(smug_url, default=None):
3446 if '#__youtubedl_smuggle' not in smug_url:
3447 return smug_url, default
3448 url, _, sdata = smug_url.rpartition('#')
3449 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3450 data = json.loads(jsond)
3451 return url, data
3452
3453
3454 def format_bytes(bytes):
3455 if bytes is None:
3456 return 'N/A'
3457 if type(bytes) is str:
3458 bytes = float(bytes)
3459 if bytes == 0.0:
3460 exponent = 0
3461 else:
3462 exponent = int(math.log(bytes, 1024.0))
3463 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3464 converted = float(bytes) / float(1024 ** exponent)
3465 return '%.2f%s' % (converted, suffix)
3466
3467
3468 def lookup_unit_table(unit_table, s):
3469 units_re = '|'.join(re.escape(u) for u in unit_table)
3470 m = re.match(
3471 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3472 if not m:
3473 return None
3474 num_str = m.group('num').replace(',', '.')
3475 mult = unit_table[m.group('unit')]
3476 return int(float(num_str) * mult)
3477
3478
3479 def parse_filesize(s):
3480 if s is None:
3481 return None
3482
3483 # The lower-case forms are of course incorrect and unofficial,
3484 # but we support those too
3485 _UNIT_TABLE = {
3486 'B': 1,
3487 'b': 1,
3488 'bytes': 1,
3489 'KiB': 1024,
3490 'KB': 1000,
3491 'kB': 1024,
3492 'Kb': 1000,
3493 'kb': 1000,
3494 'kilobytes': 1000,
3495 'kibibytes': 1024,
3496 'MiB': 1024 ** 2,
3497 'MB': 1000 ** 2,
3498 'mB': 1024 ** 2,
3499 'Mb': 1000 ** 2,
3500 'mb': 1000 ** 2,
3501 'megabytes': 1000 ** 2,
3502 'mebibytes': 1024 ** 2,
3503 'GiB': 1024 ** 3,
3504 'GB': 1000 ** 3,
3505 'gB': 1024 ** 3,
3506 'Gb': 1000 ** 3,
3507 'gb': 1000 ** 3,
3508 'gigabytes': 1000 ** 3,
3509 'gibibytes': 1024 ** 3,
3510 'TiB': 1024 ** 4,
3511 'TB': 1000 ** 4,
3512 'tB': 1024 ** 4,
3513 'Tb': 1000 ** 4,
3514 'tb': 1000 ** 4,
3515 'terabytes': 1000 ** 4,
3516 'tebibytes': 1024 ** 4,
3517 'PiB': 1024 ** 5,
3518 'PB': 1000 ** 5,
3519 'pB': 1024 ** 5,
3520 'Pb': 1000 ** 5,
3521 'pb': 1000 ** 5,
3522 'petabytes': 1000 ** 5,
3523 'pebibytes': 1024 ** 5,
3524 'EiB': 1024 ** 6,
3525 'EB': 1000 ** 6,
3526 'eB': 1024 ** 6,
3527 'Eb': 1000 ** 6,
3528 'eb': 1000 ** 6,
3529 'exabytes': 1000 ** 6,
3530 'exbibytes': 1024 ** 6,
3531 'ZiB': 1024 ** 7,
3532 'ZB': 1000 ** 7,
3533 'zB': 1024 ** 7,
3534 'Zb': 1000 ** 7,
3535 'zb': 1000 ** 7,
3536 'zettabytes': 1000 ** 7,
3537 'zebibytes': 1024 ** 7,
3538 'YiB': 1024 ** 8,
3539 'YB': 1000 ** 8,
3540 'yB': 1024 ** 8,
3541 'Yb': 1000 ** 8,
3542 'yb': 1000 ** 8,
3543 'yottabytes': 1000 ** 8,
3544 'yobibytes': 1024 ** 8,
3545 }
3546
3547 return lookup_unit_table(_UNIT_TABLE, s)
3548
3549
3550 def parse_count(s):
3551 if s is None:
3552 return None
3553
3554 s = s.strip()
3555
3556 if re.match(r'^[\d,.]+$', s):
3557 return str_to_int(s)
3558
3559 _UNIT_TABLE = {
3560 'k': 1000,
3561 'K': 1000,
3562 'm': 1000 ** 2,
3563 'M': 1000 ** 2,
3564 'kk': 1000 ** 2,
3565 'KK': 1000 ** 2,
3566 }
3567
3568 return lookup_unit_table(_UNIT_TABLE, s)
3569
3570
3571 def parse_resolution(s):
3572 if s is None:
3573 return {}
3574
3575 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3576 if mobj:
3577 return {
3578 'width': int(mobj.group('w')),
3579 'height': int(mobj.group('h')),
3580 }
3581
3582 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3583 if mobj:
3584 return {'height': int(mobj.group(1))}
3585
3586 mobj = re.search(r'\b([48])[kK]\b', s)
3587 if mobj:
3588 return {'height': int(mobj.group(1)) * 540}
3589
3590 return {}
3591
3592
3593 def parse_bitrate(s):
3594 if not isinstance(s, compat_str):
3595 return
3596 mobj = re.search(r'\b(\d+)\s*kbps', s)
3597 if mobj:
3598 return int(mobj.group(1))
3599
3600
3601 def month_by_name(name, lang='en'):
3602 """ Return the number of a month by (locale-independently) English name """
3603
3604 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3605
3606 try:
3607 return month_names.index(name) + 1
3608 except ValueError:
3609 return None
3610
3611
3612 def month_by_abbreviation(abbrev):
3613 """ Return the number of a month by (locale-independently) English
3614 abbreviations """
3615
3616 try:
3617 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3618 except ValueError:
3619 return None
3620
3621
3622 def fix_xml_ampersands(xml_str):
3623 """Replace all the '&' by '&' in XML"""
3624 return re.sub(
3625 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3626 '&',
3627 xml_str)
3628
3629
3630 def setproctitle(title):
3631 assert isinstance(title, compat_str)
3632
3633 # ctypes in Jython is not complete
3634 # http://bugs.jython.org/issue2148
3635 if sys.platform.startswith('java'):
3636 return
3637
3638 try:
3639 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3640 except OSError:
3641 return
3642 except TypeError:
3643 # LoadLibrary in Windows Python 2.7.13 only expects
3644 # a bytestring, but since unicode_literals turns
3645 # every string into a unicode string, it fails.
3646 return
3647 title_bytes = title.encode('utf-8')
3648 buf = ctypes.create_string_buffer(len(title_bytes))
3649 buf.value = title_bytes
3650 try:
3651 libc.prctl(15, buf, 0, 0, 0)
3652 except AttributeError:
3653 return # Strange libc, just skip this
3654
3655
3656 def remove_start(s, start):
3657 return s[len(start):] if s is not None and s.startswith(start) else s
3658
3659
3660 def remove_end(s, end):
3661 return s[:-len(end)] if s is not None and s.endswith(end) else s
3662
3663
3664 def remove_quotes(s):
3665 if s is None or len(s) < 2:
3666 return s
3667 for quote in ('"', "'", ):
3668 if s[0] == quote and s[-1] == quote:
3669 return s[1:-1]
3670 return s
3671
3672
3673 def url_basename(url):
3674 path = compat_urlparse.urlparse(url).path
3675 return path.strip('/').split('/')[-1]
3676
3677
3678 def base_url(url):
3679 return re.match(r'https?://[^?#&]+/', url).group()
3680
3681
3682 def urljoin(base, path):
3683 if isinstance(path, bytes):
3684 path = path.decode('utf-8')
3685 if not isinstance(path, compat_str) or not path:
3686 return None
3687 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3688 return path
3689 if isinstance(base, bytes):
3690 base = base.decode('utf-8')
3691 if not isinstance(base, compat_str) or not re.match(
3692 r'^(?:https?:)?//', base):
3693 return None
3694 return compat_urlparse.urljoin(base, path)
3695
3696
3697 class HEADRequest(compat_urllib_request.Request):
3698 def get_method(self):
3699 return 'HEAD'
3700
3701
3702 class PUTRequest(compat_urllib_request.Request):
3703 def get_method(self):
3704 return 'PUT'
3705
3706
3707 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3708 if get_attr:
3709 if v is not None:
3710 v = getattr(v, get_attr, None)
3711 if v in (None, ''):
3712 return default
3713 try:
3714 return int(v) * invscale // scale
3715 except (ValueError, TypeError, OverflowError):
3716 return default
3717
3718
3719 def str_or_none(v, default=None):
3720 return default if v is None else compat_str(v)
3721
3722
3723 def str_to_int(int_str):
3724 """ A more relaxed version of int_or_none """
3725 if isinstance(int_str, compat_integer_types):
3726 return int_str
3727 elif isinstance(int_str, compat_str):
3728 int_str = re.sub(r'[,\.\+]', '', int_str)
3729 return int_or_none(int_str)
3730
3731
3732 def float_or_none(v, scale=1, invscale=1, default=None):
3733 if v is None:
3734 return default
3735 try:
3736 return float(v) * invscale / scale
3737 except (ValueError, TypeError):
3738 return default
3739
3740
3741 def bool_or_none(v, default=None):
3742 return v if isinstance(v, bool) else default
3743
3744
3745 def strip_or_none(v, default=None):
3746 return v.strip() if isinstance(v, compat_str) else default
3747
3748
3749 def url_or_none(url):
3750 if not url or not isinstance(url, compat_str):
3751 return None
3752 url = url.strip()
3753 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3754
3755
3756 def parse_duration(s):
3757 if not isinstance(s, compat_basestring):
3758 return None
3759
3760 s = s.strip()
3761
3762 days, hours, mins, secs, ms = [None] * 5
3763 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3764 if m:
3765 days, hours, mins, secs, ms = m.groups()
3766 else:
3767 m = re.match(
3768 r'''(?ix)(?:P?
3769 (?:
3770 [0-9]+\s*y(?:ears?)?\s*
3771 )?
3772 (?:
3773 [0-9]+\s*m(?:onths?)?\s*
3774 )?
3775 (?:
3776 [0-9]+\s*w(?:eeks?)?\s*
3777 )?
3778 (?:
3779 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3780 )?
3781 T)?
3782 (?:
3783 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3784 )?
3785 (?:
3786 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3787 )?
3788 (?:
3789 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3790 )?Z?$''', s)
3791 if m:
3792 days, hours, mins, secs, ms = m.groups()
3793 else:
3794 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3795 if m:
3796 hours, mins = m.groups()
3797 else:
3798 return None
3799
3800 duration = 0
3801 if secs:
3802 duration += float(secs)
3803 if mins:
3804 duration += float(mins) * 60
3805 if hours:
3806 duration += float(hours) * 60 * 60
3807 if days:
3808 duration += float(days) * 24 * 60 * 60
3809 if ms:
3810 duration += float(ms)
3811 return duration
3812
3813
3814 def prepend_extension(filename, ext, expected_real_ext=None):
3815 name, real_ext = os.path.splitext(filename)
3816 return (
3817 '{0}.{1}{2}'.format(name, ext, real_ext)
3818 if not expected_real_ext or real_ext[1:] == expected_real_ext
3819 else '{0}.{1}'.format(filename, ext))
3820
3821
3822 def replace_extension(filename, ext, expected_real_ext=None):
3823 name, real_ext = os.path.splitext(filename)
3824 return '{0}.{1}'.format(
3825 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3826 ext)
3827
3828
3829 def check_executable(exe, args=[]):
3830 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3831 args can be a list of arguments for a short output (like -version) """
3832 try:
3833 process_communicate_or_kill(subprocess.Popen(
3834 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3835 except OSError:
3836 return False
3837 return exe
3838
3839
3840 def get_exe_version(exe, args=['--version'],
3841 version_re=None, unrecognized='present'):
3842 """ Returns the version of the specified executable,
3843 or False if the executable is not present """
3844 try:
3845 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3846 # SIGTTOU if youtube-dl is run in the background.
3847 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3848 out, _ = process_communicate_or_kill(subprocess.Popen(
3849 [encodeArgument(exe)] + args,
3850 stdin=subprocess.PIPE,
3851 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3852 except OSError:
3853 return False
3854 if isinstance(out, bytes): # Python 2.x
3855 out = out.decode('ascii', 'ignore')
3856 return detect_exe_version(out, version_re, unrecognized)
3857
3858
3859 def detect_exe_version(output, version_re=None, unrecognized='present'):
3860 assert isinstance(output, compat_str)
3861 if version_re is None:
3862 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3863 m = re.search(version_re, output)
3864 if m:
3865 return m.group(1)
3866 else:
3867 return unrecognized
3868
3869
3870 class PagedList(object):
3871 def __len__(self):
3872 # This is only useful for tests
3873 return len(self.getslice())
3874
3875
3876 class OnDemandPagedList(PagedList):
3877 def __init__(self, pagefunc, pagesize, use_cache=True):
3878 self._pagefunc = pagefunc
3879 self._pagesize = pagesize
3880 self._use_cache = use_cache
3881 if use_cache:
3882 self._cache = {}
3883
3884 def getslice(self, start=0, end=None):
3885 res = []
3886 for pagenum in itertools.count(start // self._pagesize):
3887 firstid = pagenum * self._pagesize
3888 nextfirstid = pagenum * self._pagesize + self._pagesize
3889 if start >= nextfirstid:
3890 continue
3891
3892 page_results = None
3893 if self._use_cache:
3894 page_results = self._cache.get(pagenum)
3895 if page_results is None:
3896 page_results = list(self._pagefunc(pagenum))
3897 if self._use_cache:
3898 self._cache[pagenum] = page_results
3899
3900 startv = (
3901 start % self._pagesize
3902 if firstid <= start < nextfirstid
3903 else 0)
3904
3905 endv = (
3906 ((end - 1) % self._pagesize) + 1
3907 if (end is not None and firstid <= end <= nextfirstid)
3908 else None)
3909
3910 if startv != 0 or endv is not None:
3911 page_results = page_results[startv:endv]
3912 res.extend(page_results)
3913
3914 # A little optimization - if current page is not "full", ie. does
3915 # not contain page_size videos then we can assume that this page
3916 # is the last one - there are no more ids on further pages -
3917 # i.e. no need to query again.
3918 if len(page_results) + startv < self._pagesize:
3919 break
3920
3921 # If we got the whole page, but the next page is not interesting,
3922 # break out early as well
3923 if end == nextfirstid:
3924 break
3925 return res
3926
3927
3928 class InAdvancePagedList(PagedList):
3929 def __init__(self, pagefunc, pagecount, pagesize):
3930 self._pagefunc = pagefunc
3931 self._pagecount = pagecount
3932 self._pagesize = pagesize
3933
3934 def getslice(self, start=0, end=None):
3935 res = []
3936 start_page = start // self._pagesize
3937 end_page = (
3938 self._pagecount if end is None else (end // self._pagesize + 1))
3939 skip_elems = start - start_page * self._pagesize
3940 only_more = None if end is None else end - start
3941 for pagenum in range(start_page, end_page):
3942 page = list(self._pagefunc(pagenum))
3943 if skip_elems:
3944 page = page[skip_elems:]
3945 skip_elems = None
3946 if only_more is not None:
3947 if len(page) < only_more:
3948 only_more -= len(page)
3949 else:
3950 page = page[:only_more]
3951 res.extend(page)
3952 break
3953 res.extend(page)
3954 return res
3955
3956
3957 def uppercase_escape(s):
3958 unicode_escape = codecs.getdecoder('unicode_escape')
3959 return re.sub(
3960 r'\\U[0-9a-fA-F]{8}',
3961 lambda m: unicode_escape(m.group(0))[0],
3962 s)
3963
3964
3965 def lowercase_escape(s):
3966 unicode_escape = codecs.getdecoder('unicode_escape')
3967 return re.sub(
3968 r'\\u[0-9a-fA-F]{4}',
3969 lambda m: unicode_escape(m.group(0))[0],
3970 s)
3971
3972
3973 def escape_rfc3986(s):
3974 """Escape non-ASCII characters as suggested by RFC 3986"""
3975 if sys.version_info < (3, 0) and isinstance(s, compat_str):
3976 s = s.encode('utf-8')
3977 # ensure unicode: after quoting, it can always be converted
3978 return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
3979
3980
3981 def escape_url(url):
3982 """Escape URL as suggested by RFC 3986"""
3983 url_parsed = compat_urllib_parse_urlparse(url)
3984 return url_parsed._replace(
3985 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
3986 path=escape_rfc3986(url_parsed.path),
3987 params=escape_rfc3986(url_parsed.params),
3988 query=escape_rfc3986(url_parsed.query),
3989 fragment=escape_rfc3986(url_parsed.fragment)
3990 ).geturl()
3991
3992
3993 def read_batch_urls(batch_fd):
3994 def fixup(url):
3995 if not isinstance(url, compat_str):
3996 url = url.decode('utf-8', 'replace')
3997 BOM_UTF8 = '\xef\xbb\xbf'
3998 if url.startswith(BOM_UTF8):
3999 url = url[len(BOM_UTF8):]
4000 url = url.strip()
4001 if url.startswith(('#', ';', ']')):
4002 return False
4003 return url
4004
4005 with contextlib.closing(batch_fd) as fd:
4006 return [url for url in map(fixup, fd) if url]
4007
4008
4009 def urlencode_postdata(*args, **kargs):
4010 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4011
4012
4013 def update_url_query(url, query):
4014 if not query:
4015 return url
4016 parsed_url = compat_urlparse.urlparse(url)
4017 qs = compat_parse_qs(parsed_url.query)
4018 qs.update(query)
4019 return compat_urlparse.urlunparse(parsed_url._replace(
4020 query=compat_urllib_parse_urlencode(qs, True)))
4021
4022
4023 def update_Request(req, url=None, data=None, headers={}, query={}):
4024 req_headers = req.headers.copy()
4025 req_headers.update(headers)
4026 req_data = data or req.data
4027 req_url = update_url_query(url or req.get_full_url(), query)
4028 req_get_method = req.get_method()
4029 if req_get_method == 'HEAD':
4030 req_type = HEADRequest
4031 elif req_get_method == 'PUT':
4032 req_type = PUTRequest
4033 else:
4034 req_type = compat_urllib_request.Request
4035 new_req = req_type(
4036 req_url, data=req_data, headers=req_headers,
4037 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4038 if hasattr(req, 'timeout'):
4039 new_req.timeout = req.timeout
4040 return new_req
4041
4042
4043 def _multipart_encode_impl(data, boundary):
4044 content_type = 'multipart/form-data; boundary=%s' % boundary
4045
4046 out = b''
4047 for k, v in data.items():
4048 out += b'--' + boundary.encode('ascii') + b'\r\n'
4049 if isinstance(k, compat_str):
4050 k = k.encode('utf-8')
4051 if isinstance(v, compat_str):
4052 v = v.encode('utf-8')
4053 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4054 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4055 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4056 if boundary.encode('ascii') in content:
4057 raise ValueError('Boundary overlaps with data')
4058 out += content
4059
4060 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4061
4062 return out, content_type
4063
4064
4065 def multipart_encode(data, boundary=None):
4066 '''
4067 Encode a dict to RFC 7578-compliant form-data
4068
4069 data:
4070 A dict where keys and values can be either Unicode or bytes-like
4071 objects.
4072 boundary:
4073 If specified a Unicode object, it's used as the boundary. Otherwise
4074 a random boundary is generated.
4075
4076 Reference: https://tools.ietf.org/html/rfc7578
4077 '''
4078 has_specified_boundary = boundary is not None
4079
4080 while True:
4081 if boundary is None:
4082 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4083
4084 try:
4085 out, content_type = _multipart_encode_impl(data, boundary)
4086 break
4087 except ValueError:
4088 if has_specified_boundary:
4089 raise
4090 boundary = None
4091
4092 return out, content_type
4093
4094
4095 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4096 if isinstance(key_or_keys, (list, tuple)):
4097 for key in key_or_keys:
4098 if key not in d or d[key] is None or skip_false_values and not d[key]:
4099 continue
4100 return d[key]
4101 return default
4102 return d.get(key_or_keys, default)
4103
4104
4105 def try_get(src, getter, expected_type=None):
4106 if not isinstance(getter, (list, tuple)):
4107 getter = [getter]
4108 for get in getter:
4109 try:
4110 v = get(src)
4111 except (AttributeError, KeyError, TypeError, IndexError):
4112 pass
4113 else:
4114 if expected_type is None or isinstance(v, expected_type):
4115 return v
4116
4117
4118 def merge_dicts(*dicts):
4119 merged = {}
4120 for a_dict in dicts:
4121 for k, v in a_dict.items():
4122 if v is None:
4123 continue
4124 if (k not in merged
4125 or (isinstance(v, compat_str) and v
4126 and isinstance(merged[k], compat_str)
4127 and not merged[k])):
4128 merged[k] = v
4129 return merged
4130
4131
4132 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4133 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4134
4135
4136 US_RATINGS = {
4137 'G': 0,
4138 'PG': 10,
4139 'PG-13': 13,
4140 'R': 16,
4141 'NC': 18,
4142 }
4143
4144
4145 TV_PARENTAL_GUIDELINES = {
4146 'TV-Y': 0,
4147 'TV-Y7': 7,
4148 'TV-G': 0,
4149 'TV-PG': 0,
4150 'TV-14': 14,
4151 'TV-MA': 17,
4152 }
4153
4154
4155 def parse_age_limit(s):
4156 if type(s) == int:
4157 return s if 0 <= s <= 21 else None
4158 if not isinstance(s, compat_basestring):
4159 return None
4160 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4161 if m:
4162 return int(m.group('age'))
4163 if s in US_RATINGS:
4164 return US_RATINGS[s]
4165 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4166 if m:
4167 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4168 return None
4169
4170
4171 def strip_jsonp(code):
4172 return re.sub(
4173 r'''(?sx)^
4174 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4175 (?:\s*&&\s*(?P=func_name))?
4176 \s*\(\s*(?P<callback_data>.*)\);?
4177 \s*?(?://[^\n]*)*$''',
4178 r'\g<callback_data>', code)
4179
4180
4181 def js_to_json(code):
4182 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4183 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4184 INTEGER_TABLE = (
4185 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4186 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4187 )
4188
4189 def fix_kv(m):
4190 v = m.group(0)
4191 if v in ('true', 'false', 'null'):
4192 return v
4193 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4194 return ""
4195
4196 if v[0] in ("'", '"'):
4197 v = re.sub(r'(?s)\\.|"', lambda m: {
4198 '"': '\\"',
4199 "\\'": "'",
4200 '\\\n': '',
4201 '\\x': '\\u00',
4202 }.get(m.group(0), m.group(0)), v[1:-1])
4203 else:
4204 for regex, base in INTEGER_TABLE:
4205 im = re.match(regex, v)
4206 if im:
4207 i = int(im.group(1), base)
4208 return '"%d":' % i if v.endswith(':') else '%d' % i
4209
4210 return '"%s"' % v
4211
4212 return re.sub(r'''(?sx)
4213 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4214 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4215 {comment}|,(?={skip}[\]}}])|
4216 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4217 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4218 [0-9]+(?={skip}:)|
4219 !+
4220 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4221
4222
4223 def qualities(quality_ids):
4224 """ Get a numeric quality value out of a list of possible values """
4225 def q(qid):
4226 try:
4227 return quality_ids.index(qid)
4228 except ValueError:
4229 return -1
4230 return q
4231
4232
4233 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4234
4235
4236 def limit_length(s, length):
4237 """ Add ellipses to overly long strings """
4238 if s is None:
4239 return None
4240 ELLIPSES = '...'
4241 if len(s) > length:
4242 return s[:length - len(ELLIPSES)] + ELLIPSES
4243 return s
4244
4245
4246 def version_tuple(v):
4247 return tuple(int(e) for e in re.split(r'[-.]', v))
4248
4249
4250 def is_outdated_version(version, limit, assume_new=True):
4251 if not version:
4252 return not assume_new
4253 try:
4254 return version_tuple(version) < version_tuple(limit)
4255 except ValueError:
4256 return not assume_new
4257
4258
4259 def ytdl_is_updateable():
4260 """ Returns if youtube-dl can be updated with -U """
4261 from zipimport import zipimporter
4262
4263 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4264
4265
4266 def args_to_str(args):
4267 # Get a short string representation for a subprocess command
4268 return ' '.join(compat_shlex_quote(a) for a in args)
4269
4270
4271 def error_to_compat_str(err):
4272 err_str = str(err)
4273 # On python 2 error byte string must be decoded with proper
4274 # encoding rather than ascii
4275 if sys.version_info[0] < 3:
4276 err_str = err_str.decode(preferredencoding())
4277 return err_str
4278
4279
4280 def mimetype2ext(mt):
4281 if mt is None:
4282 return None
4283
4284 ext = {
4285 'audio/mp4': 'm4a',
4286 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4287 # it's the most popular one
4288 'audio/mpeg': 'mp3',
4289 }.get(mt)
4290 if ext is not None:
4291 return ext
4292
4293 _, _, res = mt.rpartition('/')
4294 res = res.split(';')[0].strip().lower()
4295
4296 return {
4297 '3gpp': '3gp',
4298 'smptett+xml': 'tt',
4299 'ttaf+xml': 'dfxp',
4300 'ttml+xml': 'ttml',
4301 'x-flv': 'flv',
4302 'x-mp4-fragmented': 'mp4',
4303 'x-ms-sami': 'sami',
4304 'x-ms-wmv': 'wmv',
4305 'mpegurl': 'm3u8',
4306 'x-mpegurl': 'm3u8',
4307 'vnd.apple.mpegurl': 'm3u8',
4308 'dash+xml': 'mpd',
4309 'f4m+xml': 'f4m',
4310 'hds+xml': 'f4m',
4311 'vnd.ms-sstr+xml': 'ism',
4312 'quicktime': 'mov',
4313 'mp2t': 'ts',
4314 'x-wav': 'wav',
4315 }.get(res, res)
4316
4317
4318 def parse_codecs(codecs_str):
4319 # http://tools.ietf.org/html/rfc6381
4320 if not codecs_str:
4321 return {}
4322 split_codecs = list(filter(None, map(
4323 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4324 vcodec, acodec = None, None
4325 for full_codec in split_codecs:
4326 codec = full_codec.split('.')[0]
4327 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4328 if not vcodec:
4329 vcodec = full_codec
4330 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4331 if not acodec:
4332 acodec = full_codec
4333 else:
4334 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4335 if not vcodec and not acodec:
4336 if len(split_codecs) == 2:
4337 return {
4338 'vcodec': split_codecs[0],
4339 'acodec': split_codecs[1],
4340 }
4341 else:
4342 return {
4343 'vcodec': vcodec or 'none',
4344 'acodec': acodec or 'none',
4345 }
4346 return {}
4347
4348
4349 def urlhandle_detect_ext(url_handle):
4350 getheader = url_handle.headers.get
4351
4352 cd = getheader('Content-Disposition')
4353 if cd:
4354 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4355 if m:
4356 e = determine_ext(m.group('filename'), default_ext=None)
4357 if e:
4358 return e
4359
4360 return mimetype2ext(getheader('Content-Type'))
4361
4362
4363 def encode_data_uri(data, mime_type):
4364 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4365
4366
4367 def age_restricted(content_limit, age_limit):
4368 """ Returns True iff the content should be blocked """
4369
4370 if age_limit is None: # No limit set
4371 return False
4372 if content_limit is None:
4373 return False # Content available for everyone
4374 return age_limit < content_limit
4375
4376
4377 def is_html(first_bytes):
4378 """ Detect whether a file contains HTML by examining its first bytes. """
4379
4380 BOMS = [
4381 (b'\xef\xbb\xbf', 'utf-8'),
4382 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4383 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4384 (b'\xff\xfe', 'utf-16-le'),
4385 (b'\xfe\xff', 'utf-16-be'),
4386 ]
4387 for bom, enc in BOMS:
4388 if first_bytes.startswith(bom):
4389 s = first_bytes[len(bom):].decode(enc, 'replace')
4390 break
4391 else:
4392 s = first_bytes.decode('utf-8', 'replace')
4393
4394 return re.match(r'^\s*<', s)
4395
4396
4397 def determine_protocol(info_dict):
4398 protocol = info_dict.get('protocol')
4399 if protocol is not None:
4400 return protocol
4401
4402 url = info_dict['url']
4403 if url.startswith('rtmp'):
4404 return 'rtmp'
4405 elif url.startswith('mms'):
4406 return 'mms'
4407 elif url.startswith('rtsp'):
4408 return 'rtsp'
4409
4410 ext = determine_ext(url)
4411 if ext == 'm3u8':
4412 return 'm3u8'
4413 elif ext == 'f4m':
4414 return 'f4m'
4415
4416 return compat_urllib_parse_urlparse(url).scheme
4417
4418
4419 def render_table(header_row, data):
4420 """ Render a list of rows, each as a list of values """
4421 table = [header_row] + data
4422 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4423 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4424 return '\n'.join(format_str % tuple(row) for row in table)
4425
4426
4427 def _match_one(filter_part, dct):
4428 COMPARISON_OPERATORS = {
4429 '<': operator.lt,
4430 '<=': operator.le,
4431 '>': operator.gt,
4432 '>=': operator.ge,
4433 '=': operator.eq,
4434 '!=': operator.ne,
4435 }
4436 operator_rex = re.compile(r'''(?x)\s*
4437 (?P<key>[a-z_]+)
4438 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4439 (?:
4440 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4441 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4442 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4443 )
4444 \s*$
4445 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4446 m = operator_rex.search(filter_part)
4447 if m:
4448 op = COMPARISON_OPERATORS[m.group('op')]
4449 actual_value = dct.get(m.group('key'))
4450 if (m.group('quotedstrval') is not None
4451 or m.group('strval') is not None
4452 # If the original field is a string and matching comparisonvalue is
4453 # a number we should respect the origin of the original field
4454 # and process comparison value as a string (see
4455 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4456 or actual_value is not None and m.group('intval') is not None
4457 and isinstance(actual_value, compat_str)):
4458 if m.group('op') not in ('=', '!='):
4459 raise ValueError(
4460 'Operator %s does not support string values!' % m.group('op'))
4461 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4462 quote = m.group('quote')
4463 if quote is not None:
4464 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4465 else:
4466 try:
4467 comparison_value = int(m.group('intval'))
4468 except ValueError:
4469 comparison_value = parse_filesize(m.group('intval'))
4470 if comparison_value is None:
4471 comparison_value = parse_filesize(m.group('intval') + 'B')
4472 if comparison_value is None:
4473 raise ValueError(
4474 'Invalid integer value %r in filter part %r' % (
4475 m.group('intval'), filter_part))
4476 if actual_value is None:
4477 return m.group('none_inclusive')
4478 return op(actual_value, comparison_value)
4479
4480 UNARY_OPERATORS = {
4481 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4482 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4483 }
4484 operator_rex = re.compile(r'''(?x)\s*
4485 (?P<op>%s)\s*(?P<key>[a-z_]+)
4486 \s*$
4487 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4488 m = operator_rex.search(filter_part)
4489 if m:
4490 op = UNARY_OPERATORS[m.group('op')]
4491 actual_value = dct.get(m.group('key'))
4492 return op(actual_value)
4493
4494 raise ValueError('Invalid filter part %r' % filter_part)
4495
4496
4497 def match_str(filter_str, dct):
4498 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4499
4500 return all(
4501 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4502
4503
4504 def match_filter_func(filter_str):
4505 def _match_func(info_dict):
4506 if match_str(filter_str, info_dict):
4507 return None
4508 else:
4509 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4510 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4511 return _match_func
4512
4513
4514 def parse_dfxp_time_expr(time_expr):
4515 if not time_expr:
4516 return
4517
4518 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4519 if mobj:
4520 return float(mobj.group('time_offset'))
4521
4522 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4523 if mobj:
4524 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4525
4526
4527 def srt_subtitles_timecode(seconds):
4528 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4529
4530
4531 def dfxp2srt(dfxp_data):
4532 '''
4533 @param dfxp_data A bytes-like object containing DFXP data
4534 @returns A unicode object containing converted SRT data
4535 '''
4536 LEGACY_NAMESPACES = (
4537 (b'http://www.w3.org/ns/ttml', [
4538 b'http://www.w3.org/2004/11/ttaf1',
4539 b'http://www.w3.org/2006/04/ttaf1',
4540 b'http://www.w3.org/2006/10/ttaf1',
4541 ]),
4542 (b'http://www.w3.org/ns/ttml#styling', [
4543 b'http://www.w3.org/ns/ttml#style',
4544 ]),
4545 )
4546
4547 SUPPORTED_STYLING = [
4548 'color',
4549 'fontFamily',
4550 'fontSize',
4551 'fontStyle',
4552 'fontWeight',
4553 'textDecoration'
4554 ]
4555
4556 _x = functools.partial(xpath_with_ns, ns_map={
4557 'xml': 'http://www.w3.org/XML/1998/namespace',
4558 'ttml': 'http://www.w3.org/ns/ttml',
4559 'tts': 'http://www.w3.org/ns/ttml#styling',
4560 })
4561
4562 styles = {}
4563 default_style = {}
4564
4565 class TTMLPElementParser(object):
4566 _out = ''
4567 _unclosed_elements = []
4568 _applied_styles = []
4569
4570 def start(self, tag, attrib):
4571 if tag in (_x('ttml:br'), 'br'):
4572 self._out += '\n'
4573 else:
4574 unclosed_elements = []
4575 style = {}
4576 element_style_id = attrib.get('style')
4577 if default_style:
4578 style.update(default_style)
4579 if element_style_id:
4580 style.update(styles.get(element_style_id, {}))
4581 for prop in SUPPORTED_STYLING:
4582 prop_val = attrib.get(_x('tts:' + prop))
4583 if prop_val:
4584 style[prop] = prop_val
4585 if style:
4586 font = ''
4587 for k, v in sorted(style.items()):
4588 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4589 continue
4590 if k == 'color':
4591 font += ' color="%s"' % v
4592 elif k == 'fontSize':
4593 font += ' size="%s"' % v
4594 elif k == 'fontFamily':
4595 font += ' face="%s"' % v
4596 elif k == 'fontWeight' and v == 'bold':
4597 self._out += '<b>'
4598 unclosed_elements.append('b')
4599 elif k == 'fontStyle' and v == 'italic':
4600 self._out += '<i>'
4601 unclosed_elements.append('i')
4602 elif k == 'textDecoration' and v == 'underline':
4603 self._out += '<u>'
4604 unclosed_elements.append('u')
4605 if font:
4606 self._out += '<font' + font + '>'
4607 unclosed_elements.append('font')
4608 applied_style = {}
4609 if self._applied_styles:
4610 applied_style.update(self._applied_styles[-1])
4611 applied_style.update(style)
4612 self._applied_styles.append(applied_style)
4613 self._unclosed_elements.append(unclosed_elements)
4614
4615 def end(self, tag):
4616 if tag not in (_x('ttml:br'), 'br'):
4617 unclosed_elements = self._unclosed_elements.pop()
4618 for element in reversed(unclosed_elements):
4619 self._out += '</%s>' % element
4620 if unclosed_elements and self._applied_styles:
4621 self._applied_styles.pop()
4622
4623 def data(self, data):
4624 self._out += data
4625
4626 def close(self):
4627 return self._out.strip()
4628
4629 def parse_node(node):
4630 target = TTMLPElementParser()
4631 parser = xml.etree.ElementTree.XMLParser(target=target)
4632 parser.feed(xml.etree.ElementTree.tostring(node))
4633 return parser.close()
4634
4635 for k, v in LEGACY_NAMESPACES:
4636 for ns in v:
4637 dfxp_data = dfxp_data.replace(ns, k)
4638
4639 dfxp = compat_etree_fromstring(dfxp_data)
4640 out = []
4641 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4642
4643 if not paras:
4644 raise ValueError('Invalid dfxp/TTML subtitle')
4645
4646 repeat = False
4647 while True:
4648 for style in dfxp.findall(_x('.//ttml:style')):
4649 style_id = style.get('id') or style.get(_x('xml:id'))
4650 if not style_id:
4651 continue
4652 parent_style_id = style.get('style')
4653 if parent_style_id:
4654 if parent_style_id not in styles:
4655 repeat = True
4656 continue
4657 styles[style_id] = styles[parent_style_id].copy()
4658 for prop in SUPPORTED_STYLING:
4659 prop_val = style.get(_x('tts:' + prop))
4660 if prop_val:
4661 styles.setdefault(style_id, {})[prop] = prop_val
4662 if repeat:
4663 repeat = False
4664 else:
4665 break
4666
4667 for p in ('body', 'div'):
4668 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4669 if ele is None:
4670 continue
4671 style = styles.get(ele.get('style'))
4672 if not style:
4673 continue
4674 default_style.update(style)
4675
4676 for para, index in zip(paras, itertools.count(1)):
4677 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4678 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4679 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4680 if begin_time is None:
4681 continue
4682 if not end_time:
4683 if not dur:
4684 continue
4685 end_time = begin_time + dur
4686 out.append('%d\n%s --> %s\n%s\n\n' % (
4687 index,
4688 srt_subtitles_timecode(begin_time),
4689 srt_subtitles_timecode(end_time),
4690 parse_node(para)))
4691
4692 return ''.join(out)
4693
4694
4695 def cli_option(params, command_option, param):
4696 param = params.get(param)
4697 if param:
4698 param = compat_str(param)
4699 return [command_option, param] if param is not None else []
4700
4701
4702 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4703 param = params.get(param)
4704 if param is None:
4705 return []
4706 assert isinstance(param, bool)
4707 if separator:
4708 return [command_option + separator + (true_value if param else false_value)]
4709 return [command_option, true_value if param else false_value]
4710
4711
4712 def cli_valueless_option(params, command_option, param, expected_value=True):
4713 param = params.get(param)
4714 return [command_option] if param == expected_value else []
4715
4716
4717 def cli_configuration_args(params, param, default=[]):
4718 ex_args = params.get(param)
4719 if ex_args is None:
4720 return default
4721 assert isinstance(ex_args, list)
4722 return ex_args
4723
4724
4725 class ISO639Utils(object):
4726 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4727 _lang_map = {
4728 'aa': 'aar',
4729 'ab': 'abk',
4730 'ae': 'ave',
4731 'af': 'afr',
4732 'ak': 'aka',
4733 'am': 'amh',
4734 'an': 'arg',
4735 'ar': 'ara',
4736 'as': 'asm',
4737 'av': 'ava',
4738 'ay': 'aym',
4739 'az': 'aze',
4740 'ba': 'bak',
4741 'be': 'bel',
4742 'bg': 'bul',
4743 'bh': 'bih',
4744 'bi': 'bis',
4745 'bm': 'bam',
4746 'bn': 'ben',
4747 'bo': 'bod',
4748 'br': 'bre',
4749 'bs': 'bos',
4750 'ca': 'cat',
4751 'ce': 'che',
4752 'ch': 'cha',
4753 'co': 'cos',
4754 'cr': 'cre',
4755 'cs': 'ces',
4756 'cu': 'chu',
4757 'cv': 'chv',
4758 'cy': 'cym',
4759 'da': 'dan',
4760 'de': 'deu',
4761 'dv': 'div',
4762 'dz': 'dzo',
4763 'ee': 'ewe',
4764 'el': 'ell',
4765 'en': 'eng',
4766 'eo': 'epo',
4767 'es': 'spa',
4768 'et': 'est',
4769 'eu': 'eus',
4770 'fa': 'fas',
4771 'ff': 'ful',
4772 'fi': 'fin',
4773 'fj': 'fij',
4774 'fo': 'fao',
4775 'fr': 'fra',
4776 'fy': 'fry',
4777 'ga': 'gle',
4778 'gd': 'gla',
4779 'gl': 'glg',
4780 'gn': 'grn',
4781 'gu': 'guj',
4782 'gv': 'glv',
4783 'ha': 'hau',
4784 'he': 'heb',
4785 'iw': 'heb', # Replaced by he in 1989 revision
4786 'hi': 'hin',
4787 'ho': 'hmo',
4788 'hr': 'hrv',
4789 'ht': 'hat',
4790 'hu': 'hun',
4791 'hy': 'hye',
4792 'hz': 'her',
4793 'ia': 'ina',
4794 'id': 'ind',
4795 'in': 'ind', # Replaced by id in 1989 revision
4796 'ie': 'ile',
4797 'ig': 'ibo',
4798 'ii': 'iii',
4799 'ik': 'ipk',
4800 'io': 'ido',
4801 'is': 'isl',
4802 'it': 'ita',
4803 'iu': 'iku',
4804 'ja': 'jpn',
4805 'jv': 'jav',
4806 'ka': 'kat',
4807 'kg': 'kon',
4808 'ki': 'kik',
4809 'kj': 'kua',
4810 'kk': 'kaz',
4811 'kl': 'kal',
4812 'km': 'khm',
4813 'kn': 'kan',
4814 'ko': 'kor',
4815 'kr': 'kau',
4816 'ks': 'kas',
4817 'ku': 'kur',
4818 'kv': 'kom',
4819 'kw': 'cor',
4820 'ky': 'kir',
4821 'la': 'lat',
4822 'lb': 'ltz',
4823 'lg': 'lug',
4824 'li': 'lim',
4825 'ln': 'lin',
4826 'lo': 'lao',
4827 'lt': 'lit',
4828 'lu': 'lub',
4829 'lv': 'lav',
4830 'mg': 'mlg',
4831 'mh': 'mah',
4832 'mi': 'mri',
4833 'mk': 'mkd',
4834 'ml': 'mal',
4835 'mn': 'mon',
4836 'mr': 'mar',
4837 'ms': 'msa',
4838 'mt': 'mlt',
4839 'my': 'mya',
4840 'na': 'nau',
4841 'nb': 'nob',
4842 'nd': 'nde',
4843 'ne': 'nep',
4844 'ng': 'ndo',
4845 'nl': 'nld',
4846 'nn': 'nno',
4847 'no': 'nor',
4848 'nr': 'nbl',
4849 'nv': 'nav',
4850 'ny': 'nya',
4851 'oc': 'oci',
4852 'oj': 'oji',
4853 'om': 'orm',
4854 'or': 'ori',
4855 'os': 'oss',
4856 'pa': 'pan',
4857 'pi': 'pli',
4858 'pl': 'pol',
4859 'ps': 'pus',
4860 'pt': 'por',
4861 'qu': 'que',
4862 'rm': 'roh',
4863 'rn': 'run',
4864 'ro': 'ron',
4865 'ru': 'rus',
4866 'rw': 'kin',
4867 'sa': 'san',
4868 'sc': 'srd',
4869 'sd': 'snd',
4870 'se': 'sme',
4871 'sg': 'sag',
4872 'si': 'sin',
4873 'sk': 'slk',
4874 'sl': 'slv',
4875 'sm': 'smo',
4876 'sn': 'sna',
4877 'so': 'som',
4878 'sq': 'sqi',
4879 'sr': 'srp',
4880 'ss': 'ssw',
4881 'st': 'sot',
4882 'su': 'sun',
4883 'sv': 'swe',
4884 'sw': 'swa',
4885 'ta': 'tam',
4886 'te': 'tel',
4887 'tg': 'tgk',
4888 'th': 'tha',
4889 'ti': 'tir',
4890 'tk': 'tuk',
4891 'tl': 'tgl',
4892 'tn': 'tsn',
4893 'to': 'ton',
4894 'tr': 'tur',
4895 'ts': 'tso',
4896 'tt': 'tat',
4897 'tw': 'twi',
4898 'ty': 'tah',
4899 'ug': 'uig',
4900 'uk': 'ukr',
4901 'ur': 'urd',
4902 'uz': 'uzb',
4903 've': 'ven',
4904 'vi': 'vie',
4905 'vo': 'vol',
4906 'wa': 'wln',
4907 'wo': 'wol',
4908 'xh': 'xho',
4909 'yi': 'yid',
4910 'ji': 'yid', # Replaced by yi in 1989 revision
4911 'yo': 'yor',
4912 'za': 'zha',
4913 'zh': 'zho',
4914 'zu': 'zul',
4915 }
4916
4917 @classmethod
4918 def short2long(cls, code):
4919 """Convert language code from ISO 639-1 to ISO 639-2/T"""
4920 return cls._lang_map.get(code[:2])
4921
4922 @classmethod
4923 def long2short(cls, code):
4924 """Convert language code from ISO 639-2/T to ISO 639-1"""
4925 for short_name, long_name in cls._lang_map.items():
4926 if long_name == code:
4927 return short_name
4928
4929
4930 class ISO3166Utils(object):
4931 # From http://data.okfn.org/data/core/country-list
4932 _country_map = {
4933 'AF': 'Afghanistan',
4934 'AX': 'Åland Islands',
4935 'AL': 'Albania',
4936 'DZ': 'Algeria',
4937 'AS': 'American Samoa',
4938 'AD': 'Andorra',
4939 'AO': 'Angola',
4940 'AI': 'Anguilla',
4941 'AQ': 'Antarctica',
4942 'AG': 'Antigua and Barbuda',
4943 'AR': 'Argentina',
4944 'AM': 'Armenia',
4945 'AW': 'Aruba',
4946 'AU': 'Australia',
4947 'AT': 'Austria',
4948 'AZ': 'Azerbaijan',
4949 'BS': 'Bahamas',
4950 'BH': 'Bahrain',
4951 'BD': 'Bangladesh',
4952 'BB': 'Barbados',
4953 'BY': 'Belarus',
4954 'BE': 'Belgium',
4955 'BZ': 'Belize',
4956 'BJ': 'Benin',
4957 'BM': 'Bermuda',
4958 'BT': 'Bhutan',
4959 'BO': 'Bolivia, Plurinational State of',
4960 'BQ': 'Bonaire, Sint Eustatius and Saba',
4961 'BA': 'Bosnia and Herzegovina',
4962 'BW': 'Botswana',
4963 'BV': 'Bouvet Island',
4964 'BR': 'Brazil',
4965 'IO': 'British Indian Ocean Territory',
4966 'BN': 'Brunei Darussalam',
4967 'BG': 'Bulgaria',
4968 'BF': 'Burkina Faso',
4969 'BI': 'Burundi',
4970 'KH': 'Cambodia',
4971 'CM': 'Cameroon',
4972 'CA': 'Canada',
4973 'CV': 'Cape Verde',
4974 'KY': 'Cayman Islands',
4975 'CF': 'Central African Republic',
4976 'TD': 'Chad',
4977 'CL': 'Chile',
4978 'CN': 'China',
4979 'CX': 'Christmas Island',
4980 'CC': 'Cocos (Keeling) Islands',
4981 'CO': 'Colombia',
4982 'KM': 'Comoros',
4983 'CG': 'Congo',
4984 'CD': 'Congo, the Democratic Republic of the',
4985 'CK': 'Cook Islands',
4986 'CR': 'Costa Rica',
4987 'CI': 'Côte d\'Ivoire',
4988 'HR': 'Croatia',
4989 'CU': 'Cuba',
4990 'CW': 'Curaçao',
4991 'CY': 'Cyprus',
4992 'CZ': 'Czech Republic',
4993 'DK': 'Denmark',
4994 'DJ': 'Djibouti',
4995 'DM': 'Dominica',
4996 'DO': 'Dominican Republic',
4997 'EC': 'Ecuador',
4998 'EG': 'Egypt',
4999 'SV': 'El Salvador',
5000 'GQ': 'Equatorial Guinea',
5001 'ER': 'Eritrea',
5002 'EE': 'Estonia',
5003 'ET': 'Ethiopia',
5004 'FK': 'Falkland Islands (Malvinas)',
5005 'FO': 'Faroe Islands',
5006 'FJ': 'Fiji',
5007 'FI': 'Finland',
5008 'FR': 'France',
5009 'GF': 'French Guiana',
5010 'PF': 'French Polynesia',
5011 'TF': 'French Southern Territories',
5012 'GA': 'Gabon',
5013 'GM': 'Gambia',
5014 'GE': 'Georgia',
5015 'DE': 'Germany',
5016 'GH': 'Ghana',
5017 'GI': 'Gibraltar',
5018 'GR': 'Greece',
5019 'GL': 'Greenland',
5020 'GD': 'Grenada',
5021 'GP': 'Guadeloupe',
5022 'GU': 'Guam',
5023 'GT': 'Guatemala',
5024 'GG': 'Guernsey',
5025 'GN': 'Guinea',
5026 'GW': 'Guinea-Bissau',
5027 'GY': 'Guyana',
5028 'HT': 'Haiti',
5029 'HM': 'Heard Island and McDonald Islands',
5030 'VA': 'Holy See (Vatican City State)',
5031 'HN': 'Honduras',
5032 'HK': 'Hong Kong',
5033 'HU': 'Hungary',
5034 'IS': 'Iceland',
5035 'IN': 'India',
5036 'ID': 'Indonesia',
5037 'IR': 'Iran, Islamic Republic of',
5038 'IQ': 'Iraq',
5039 'IE': 'Ireland',
5040 'IM': 'Isle of Man',
5041 'IL': 'Israel',
5042 'IT': 'Italy',
5043 'JM': 'Jamaica',
5044 'JP': 'Japan',
5045 'JE': 'Jersey',
5046 'JO': 'Jordan',
5047 'KZ': 'Kazakhstan',
5048 'KE': 'Kenya',
5049 'KI': 'Kiribati',
5050 'KP': 'Korea, Democratic People\'s Republic of',
5051 'KR': 'Korea, Republic of',
5052 'KW': 'Kuwait',
5053 'KG': 'Kyrgyzstan',
5054 'LA': 'Lao People\'s Democratic Republic',
5055 'LV': 'Latvia',
5056 'LB': 'Lebanon',
5057 'LS': 'Lesotho',
5058 'LR': 'Liberia',
5059 'LY': 'Libya',
5060 'LI': 'Liechtenstein',
5061 'LT': 'Lithuania',
5062 'LU': 'Luxembourg',
5063 'MO': 'Macao',
5064 'MK': 'Macedonia, the Former Yugoslav Republic of',
5065 'MG': 'Madagascar',
5066 'MW': 'Malawi',
5067 'MY': 'Malaysia',
5068 'MV': 'Maldives',
5069 'ML': 'Mali',
5070 'MT': 'Malta',
5071 'MH': 'Marshall Islands',
5072 'MQ': 'Martinique',
5073 'MR': 'Mauritania',
5074 'MU': 'Mauritius',
5075 'YT': 'Mayotte',
5076 'MX': 'Mexico',
5077 'FM': 'Micronesia, Federated States of',
5078 'MD': 'Moldova, Republic of',
5079 'MC': 'Monaco',
5080 'MN': 'Mongolia',
5081 'ME': 'Montenegro',
5082 'MS': 'Montserrat',
5083 'MA': 'Morocco',
5084 'MZ': 'Mozambique',
5085 'MM': 'Myanmar',
5086 'NA': 'Namibia',
5087 'NR': 'Nauru',
5088 'NP': 'Nepal',
5089 'NL': 'Netherlands',
5090 'NC': 'New Caledonia',
5091 'NZ': 'New Zealand',
5092 'NI': 'Nicaragua',
5093 'NE': 'Niger',
5094 'NG': 'Nigeria',
5095 'NU': 'Niue',
5096 'NF': 'Norfolk Island',
5097 'MP': 'Northern Mariana Islands',
5098 'NO': 'Norway',
5099 'OM': 'Oman',
5100 'PK': 'Pakistan',
5101 'PW': 'Palau',
5102 'PS': 'Palestine, State of',
5103 'PA': 'Panama',
5104 'PG': 'Papua New Guinea',
5105 'PY': 'Paraguay',
5106 'PE': 'Peru',
5107 'PH': 'Philippines',
5108 'PN': 'Pitcairn',
5109 'PL': 'Poland',
5110 'PT': 'Portugal',
5111 'PR': 'Puerto Rico',
5112 'QA': 'Qatar',
5113 'RE': 'Réunion',
5114 'RO': 'Romania',
5115 'RU': 'Russian Federation',
5116 'RW': 'Rwanda',
5117 'BL': 'Saint Barthélemy',
5118 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5119 'KN': 'Saint Kitts and Nevis',
5120 'LC': 'Saint Lucia',
5121 'MF': 'Saint Martin (French part)',
5122 'PM': 'Saint Pierre and Miquelon',
5123 'VC': 'Saint Vincent and the Grenadines',
5124 'WS': 'Samoa',
5125 'SM': 'San Marino',
5126 'ST': 'Sao Tome and Principe',
5127 'SA': 'Saudi Arabia',
5128 'SN': 'Senegal',
5129 'RS': 'Serbia',
5130 'SC': 'Seychelles',
5131 'SL': 'Sierra Leone',
5132 'SG': 'Singapore',
5133 'SX': 'Sint Maarten (Dutch part)',
5134 'SK': 'Slovakia',
5135 'SI': 'Slovenia',
5136 'SB': 'Solomon Islands',
5137 'SO': 'Somalia',
5138 'ZA': 'South Africa',
5139 'GS': 'South Georgia and the South Sandwich Islands',
5140 'SS': 'South Sudan',
5141 'ES': 'Spain',
5142 'LK': 'Sri Lanka',
5143 'SD': 'Sudan',
5144 'SR': 'Suriname',
5145 'SJ': 'Svalbard and Jan Mayen',
5146 'SZ': 'Swaziland',
5147 'SE': 'Sweden',
5148 'CH': 'Switzerland',
5149 'SY': 'Syrian Arab Republic',
5150 'TW': 'Taiwan, Province of China',
5151 'TJ': 'Tajikistan',
5152 'TZ': 'Tanzania, United Republic of',
5153 'TH': 'Thailand',
5154 'TL': 'Timor-Leste',
5155 'TG': 'Togo',
5156 'TK': 'Tokelau',
5157 'TO': 'Tonga',
5158 'TT': 'Trinidad and Tobago',
5159 'TN': 'Tunisia',
5160 'TR': 'Turkey',
5161 'TM': 'Turkmenistan',
5162 'TC': 'Turks and Caicos Islands',
5163 'TV': 'Tuvalu',
5164 'UG': 'Uganda',
5165 'UA': 'Ukraine',
5166 'AE': 'United Arab Emirates',
5167 'GB': 'United Kingdom',
5168 'US': 'United States',
5169 'UM': 'United States Minor Outlying Islands',
5170 'UY': 'Uruguay',
5171 'UZ': 'Uzbekistan',
5172 'VU': 'Vanuatu',
5173 'VE': 'Venezuela, Bolivarian Republic of',
5174 'VN': 'Viet Nam',
5175 'VG': 'Virgin Islands, British',
5176 'VI': 'Virgin Islands, U.S.',
5177 'WF': 'Wallis and Futuna',
5178 'EH': 'Western Sahara',
5179 'YE': 'Yemen',
5180 'ZM': 'Zambia',
5181 'ZW': 'Zimbabwe',
5182 }
5183
5184 @classmethod
5185 def short2full(cls, code):
5186 """Convert an ISO 3166-2 country code to the corresponding full name"""
5187 return cls._country_map.get(code.upper())
5188
5189
5190 class GeoUtils(object):
5191 # Major IPv4 address blocks per country
5192 _country_ip_map = {
5193 'AD': '46.172.224.0/19',
5194 'AE': '94.200.0.0/13',
5195 'AF': '149.54.0.0/17',
5196 'AG': '209.59.64.0/18',
5197 'AI': '204.14.248.0/21',
5198 'AL': '46.99.0.0/16',
5199 'AM': '46.70.0.0/15',
5200 'AO': '105.168.0.0/13',
5201 'AP': '182.50.184.0/21',
5202 'AQ': '23.154.160.0/24',
5203 'AR': '181.0.0.0/12',
5204 'AS': '202.70.112.0/20',
5205 'AT': '77.116.0.0/14',
5206 'AU': '1.128.0.0/11',
5207 'AW': '181.41.0.0/18',
5208 'AX': '185.217.4.0/22',
5209 'AZ': '5.197.0.0/16',
5210 'BA': '31.176.128.0/17',
5211 'BB': '65.48.128.0/17',
5212 'BD': '114.130.0.0/16',
5213 'BE': '57.0.0.0/8',
5214 'BF': '102.178.0.0/15',
5215 'BG': '95.42.0.0/15',
5216 'BH': '37.131.0.0/17',
5217 'BI': '154.117.192.0/18',
5218 'BJ': '137.255.0.0/16',
5219 'BL': '185.212.72.0/23',
5220 'BM': '196.12.64.0/18',
5221 'BN': '156.31.0.0/16',
5222 'BO': '161.56.0.0/16',
5223 'BQ': '161.0.80.0/20',
5224 'BR': '191.128.0.0/12',
5225 'BS': '24.51.64.0/18',
5226 'BT': '119.2.96.0/19',
5227 'BW': '168.167.0.0/16',
5228 'BY': '178.120.0.0/13',
5229 'BZ': '179.42.192.0/18',
5230 'CA': '99.224.0.0/11',
5231 'CD': '41.243.0.0/16',
5232 'CF': '197.242.176.0/21',
5233 'CG': '160.113.0.0/16',
5234 'CH': '85.0.0.0/13',
5235 'CI': '102.136.0.0/14',
5236 'CK': '202.65.32.0/19',
5237 'CL': '152.172.0.0/14',
5238 'CM': '102.244.0.0/14',
5239 'CN': '36.128.0.0/10',
5240 'CO': '181.240.0.0/12',
5241 'CR': '201.192.0.0/12',
5242 'CU': '152.206.0.0/15',
5243 'CV': '165.90.96.0/19',
5244 'CW': '190.88.128.0/17',
5245 'CY': '31.153.0.0/16',
5246 'CZ': '88.100.0.0/14',
5247 'DE': '53.0.0.0/8',
5248 'DJ': '197.241.0.0/17',
5249 'DK': '87.48.0.0/12',
5250 'DM': '192.243.48.0/20',
5251 'DO': '152.166.0.0/15',
5252 'DZ': '41.96.0.0/12',
5253 'EC': '186.68.0.0/15',
5254 'EE': '90.190.0.0/15',
5255 'EG': '156.160.0.0/11',
5256 'ER': '196.200.96.0/20',
5257 'ES': '88.0.0.0/11',
5258 'ET': '196.188.0.0/14',
5259 'EU': '2.16.0.0/13',
5260 'FI': '91.152.0.0/13',
5261 'FJ': '144.120.0.0/16',
5262 'FK': '80.73.208.0/21',
5263 'FM': '119.252.112.0/20',
5264 'FO': '88.85.32.0/19',
5265 'FR': '90.0.0.0/9',
5266 'GA': '41.158.0.0/15',
5267 'GB': '25.0.0.0/8',
5268 'GD': '74.122.88.0/21',
5269 'GE': '31.146.0.0/16',
5270 'GF': '161.22.64.0/18',
5271 'GG': '62.68.160.0/19',
5272 'GH': '154.160.0.0/12',
5273 'GI': '95.164.0.0/16',
5274 'GL': '88.83.0.0/19',
5275 'GM': '160.182.0.0/15',
5276 'GN': '197.149.192.0/18',
5277 'GP': '104.250.0.0/19',
5278 'GQ': '105.235.224.0/20',
5279 'GR': '94.64.0.0/13',
5280 'GT': '168.234.0.0/16',
5281 'GU': '168.123.0.0/16',
5282 'GW': '197.214.80.0/20',
5283 'GY': '181.41.64.0/18',
5284 'HK': '113.252.0.0/14',
5285 'HN': '181.210.0.0/16',
5286 'HR': '93.136.0.0/13',
5287 'HT': '148.102.128.0/17',
5288 'HU': '84.0.0.0/14',
5289 'ID': '39.192.0.0/10',
5290 'IE': '87.32.0.0/12',
5291 'IL': '79.176.0.0/13',
5292 'IM': '5.62.80.0/20',
5293 'IN': '117.192.0.0/10',
5294 'IO': '203.83.48.0/21',
5295 'IQ': '37.236.0.0/14',
5296 'IR': '2.176.0.0/12',
5297 'IS': '82.221.0.0/16',
5298 'IT': '79.0.0.0/10',
5299 'JE': '87.244.64.0/18',
5300 'JM': '72.27.0.0/17',
5301 'JO': '176.29.0.0/16',
5302 'JP': '133.0.0.0/8',
5303 'KE': '105.48.0.0/12',
5304 'KG': '158.181.128.0/17',
5305 'KH': '36.37.128.0/17',
5306 'KI': '103.25.140.0/22',
5307 'KM': '197.255.224.0/20',
5308 'KN': '198.167.192.0/19',
5309 'KP': '175.45.176.0/22',
5310 'KR': '175.192.0.0/10',
5311 'KW': '37.36.0.0/14',
5312 'KY': '64.96.0.0/15',
5313 'KZ': '2.72.0.0/13',
5314 'LA': '115.84.64.0/18',
5315 'LB': '178.135.0.0/16',
5316 'LC': '24.92.144.0/20',
5317 'LI': '82.117.0.0/19',
5318 'LK': '112.134.0.0/15',
5319 'LR': '102.183.0.0/16',
5320 'LS': '129.232.0.0/17',
5321 'LT': '78.56.0.0/13',
5322 'LU': '188.42.0.0/16',
5323 'LV': '46.109.0.0/16',
5324 'LY': '41.252.0.0/14',
5325 'MA': '105.128.0.0/11',
5326 'MC': '88.209.64.0/18',
5327 'MD': '37.246.0.0/16',
5328 'ME': '178.175.0.0/17',
5329 'MF': '74.112.232.0/21',
5330 'MG': '154.126.0.0/17',
5331 'MH': '117.103.88.0/21',
5332 'MK': '77.28.0.0/15',
5333 'ML': '154.118.128.0/18',
5334 'MM': '37.111.0.0/17',
5335 'MN': '49.0.128.0/17',
5336 'MO': '60.246.0.0/16',
5337 'MP': '202.88.64.0/20',
5338 'MQ': '109.203.224.0/19',
5339 'MR': '41.188.64.0/18',
5340 'MS': '208.90.112.0/22',
5341 'MT': '46.11.0.0/16',
5342 'MU': '105.16.0.0/12',
5343 'MV': '27.114.128.0/18',
5344 'MW': '102.70.0.0/15',
5345 'MX': '187.192.0.0/11',
5346 'MY': '175.136.0.0/13',
5347 'MZ': '197.218.0.0/15',
5348 'NA': '41.182.0.0/16',
5349 'NC': '101.101.0.0/18',
5350 'NE': '197.214.0.0/18',
5351 'NF': '203.17.240.0/22',
5352 'NG': '105.112.0.0/12',
5353 'NI': '186.76.0.0/15',
5354 'NL': '145.96.0.0/11',
5355 'NO': '84.208.0.0/13',
5356 'NP': '36.252.0.0/15',
5357 'NR': '203.98.224.0/19',
5358 'NU': '49.156.48.0/22',
5359 'NZ': '49.224.0.0/14',
5360 'OM': '5.36.0.0/15',
5361 'PA': '186.72.0.0/15',
5362 'PE': '186.160.0.0/14',
5363 'PF': '123.50.64.0/18',
5364 'PG': '124.240.192.0/19',
5365 'PH': '49.144.0.0/13',
5366 'PK': '39.32.0.0/11',
5367 'PL': '83.0.0.0/11',
5368 'PM': '70.36.0.0/20',
5369 'PR': '66.50.0.0/16',
5370 'PS': '188.161.0.0/16',
5371 'PT': '85.240.0.0/13',
5372 'PW': '202.124.224.0/20',
5373 'PY': '181.120.0.0/14',
5374 'QA': '37.210.0.0/15',
5375 'RE': '102.35.0.0/16',
5376 'RO': '79.112.0.0/13',
5377 'RS': '93.86.0.0/15',
5378 'RU': '5.136.0.0/13',
5379 'RW': '41.186.0.0/16',
5380 'SA': '188.48.0.0/13',
5381 'SB': '202.1.160.0/19',
5382 'SC': '154.192.0.0/11',
5383 'SD': '102.120.0.0/13',
5384 'SE': '78.64.0.0/12',
5385 'SG': '8.128.0.0/10',
5386 'SI': '188.196.0.0/14',
5387 'SK': '78.98.0.0/15',
5388 'SL': '102.143.0.0/17',
5389 'SM': '89.186.32.0/19',
5390 'SN': '41.82.0.0/15',
5391 'SO': '154.115.192.0/18',
5392 'SR': '186.179.128.0/17',
5393 'SS': '105.235.208.0/21',
5394 'ST': '197.159.160.0/19',
5395 'SV': '168.243.0.0/16',
5396 'SX': '190.102.0.0/20',
5397 'SY': '5.0.0.0/16',
5398 'SZ': '41.84.224.0/19',
5399 'TC': '65.255.48.0/20',
5400 'TD': '154.68.128.0/19',
5401 'TG': '196.168.0.0/14',
5402 'TH': '171.96.0.0/13',
5403 'TJ': '85.9.128.0/18',
5404 'TK': '27.96.24.0/21',
5405 'TL': '180.189.160.0/20',
5406 'TM': '95.85.96.0/19',
5407 'TN': '197.0.0.0/11',
5408 'TO': '175.176.144.0/21',
5409 'TR': '78.160.0.0/11',
5410 'TT': '186.44.0.0/15',
5411 'TV': '202.2.96.0/19',
5412 'TW': '120.96.0.0/11',
5413 'TZ': '156.156.0.0/14',
5414 'UA': '37.52.0.0/14',
5415 'UG': '102.80.0.0/13',
5416 'US': '6.0.0.0/8',
5417 'UY': '167.56.0.0/13',
5418 'UZ': '84.54.64.0/18',
5419 'VA': '212.77.0.0/19',
5420 'VC': '207.191.240.0/21',
5421 'VE': '186.88.0.0/13',
5422 'VG': '66.81.192.0/20',
5423 'VI': '146.226.0.0/16',
5424 'VN': '14.160.0.0/11',
5425 'VU': '202.80.32.0/20',
5426 'WF': '117.20.32.0/21',
5427 'WS': '202.4.32.0/19',
5428 'YE': '134.35.0.0/16',
5429 'YT': '41.242.116.0/22',
5430 'ZA': '41.0.0.0/11',
5431 'ZM': '102.144.0.0/13',
5432 'ZW': '102.177.192.0/18',
5433 }
5434
5435 @classmethod
5436 def random_ipv4(cls, code_or_block):
5437 if len(code_or_block) == 2:
5438 block = cls._country_ip_map.get(code_or_block.upper())
5439 if not block:
5440 return None
5441 else:
5442 block = code_or_block
5443 addr, preflen = block.split('/')
5444 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5445 addr_max = addr_min | (0xffffffff >> int(preflen))
5446 return compat_str(socket.inet_ntoa(
5447 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5448
5449
5450 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5451 def __init__(self, proxies=None):
5452 # Set default handlers
5453 for type in ('http', 'https'):
5454 setattr(self, '%s_open' % type,
5455 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5456 meth(r, proxy, type))
5457 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5458
5459 def proxy_open(self, req, proxy, type):
5460 req_proxy = req.headers.get('Ytdl-request-proxy')
5461 if req_proxy is not None:
5462 proxy = req_proxy
5463 del req.headers['Ytdl-request-proxy']
5464
5465 if proxy == '__noproxy__':
5466 return None # No Proxy
5467 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5468 req.add_header('Ytdl-socks-proxy', proxy)
5469 # youtube-dl's http/https handlers do wrapping the socket with socks
5470 return None
5471 return compat_urllib_request.ProxyHandler.proxy_open(
5472 self, req, proxy, type)
5473
5474
5475 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5476 # released into Public Domain
5477 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5478
5479 def long_to_bytes(n, blocksize=0):
5480 """long_to_bytes(n:long, blocksize:int) : string
5481 Convert a long integer to a byte string.
5482
5483 If optional blocksize is given and greater than zero, pad the front of the
5484 byte string with binary zeros so that the length is a multiple of
5485 blocksize.
5486 """
5487 # after much testing, this algorithm was deemed to be the fastest
5488 s = b''
5489 n = int(n)
5490 while n > 0:
5491 s = compat_struct_pack('>I', n & 0xffffffff) + s
5492 n = n >> 32
5493 # strip off leading zeros
5494 for i in range(len(s)):
5495 if s[i] != b'\000'[0]:
5496 break
5497 else:
5498 # only happens when n == 0
5499 s = b'\000'
5500 i = 0
5501 s = s[i:]
5502 # add back some pad bytes. this could be done more efficiently w.r.t. the
5503 # de-padding being done above, but sigh...
5504 if blocksize > 0 and len(s) % blocksize:
5505 s = (blocksize - len(s) % blocksize) * b'\000' + s
5506 return s
5507
5508
5509 def bytes_to_long(s):
5510 """bytes_to_long(string) : long
5511 Convert a byte string to a long integer.
5512
5513 This is (essentially) the inverse of long_to_bytes().
5514 """
5515 acc = 0
5516 length = len(s)
5517 if length % 4:
5518 extra = (4 - length % 4)
5519 s = b'\000' * extra + s
5520 length = length + extra
5521 for i in range(0, length, 4):
5522 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5523 return acc
5524
5525
5526 def ohdave_rsa_encrypt(data, exponent, modulus):
5527 '''
5528 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5529
5530 Input:
5531 data: data to encrypt, bytes-like object
5532 exponent, modulus: parameter e and N of RSA algorithm, both integer
5533 Output: hex string of encrypted data
5534
5535 Limitation: supports one block encryption only
5536 '''
5537
5538 payload = int(binascii.hexlify(data[::-1]), 16)
5539 encrypted = pow(payload, exponent, modulus)
5540 return '%x' % encrypted
5541
5542
5543 def pkcs1pad(data, length):
5544 """
5545 Padding input data with PKCS#1 scheme
5546
5547 @param {int[]} data input data
5548 @param {int} length target length
5549 @returns {int[]} padded data
5550 """
5551 if len(data) > length - 11:
5552 raise ValueError('Input data too long for PKCS#1 padding')
5553
5554 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5555 return [0, 2] + pseudo_random + [0] + data
5556
5557
5558 def encode_base_n(num, n, table=None):
5559 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5560 if not table:
5561 table = FULL_TABLE[:n]
5562
5563 if n > len(table):
5564 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5565
5566 if num == 0:
5567 return table[0]
5568
5569 ret = ''
5570 while num:
5571 ret = table[num % n] + ret
5572 num = num // n
5573 return ret
5574
5575
5576 def decode_packed_codes(code):
5577 mobj = re.search(PACKED_CODES_RE, code)
5578 obfuscated_code, base, count, symbols = mobj.groups()
5579 base = int(base)
5580 count = int(count)
5581 symbols = symbols.split('|')
5582 symbol_table = {}
5583
5584 while count:
5585 count -= 1
5586 base_n_count = encode_base_n(count, base)
5587 symbol_table[base_n_count] = symbols[count] or base_n_count
5588
5589 return re.sub(
5590 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5591 obfuscated_code)
5592
5593
5594 def caesar(s, alphabet, shift):
5595 if shift == 0:
5596 return s
5597 l = len(alphabet)
5598 return ''.join(
5599 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5600 for c in s)
5601
5602
5603 def rot47(s):
5604 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5605
5606
5607 def parse_m3u8_attributes(attrib):
5608 info = {}
5609 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5610 if val.startswith('"'):
5611 val = val[1:-1]
5612 info[key] = val
5613 return info
5614
5615
5616 def urshift(val, n):
5617 return val >> n if val >= 0 else (val + 0x100000000) >> n
5618
5619
5620 # Based on png2str() written by @gdkchan and improved by @yokrysty
5621 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5622 def decode_png(png_data):
5623 # Reference: https://www.w3.org/TR/PNG/
5624 header = png_data[8:]
5625
5626 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5627 raise IOError('Not a valid PNG file.')
5628
5629 int_map = {1: '>B', 2: '>H', 4: '>I'}
5630 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5631
5632 chunks = []
5633
5634 while header:
5635 length = unpack_integer(header[:4])
5636 header = header[4:]
5637
5638 chunk_type = header[:4]
5639 header = header[4:]
5640
5641 chunk_data = header[:length]
5642 header = header[length:]
5643
5644 header = header[4:] # Skip CRC
5645
5646 chunks.append({
5647 'type': chunk_type,
5648 'length': length,
5649 'data': chunk_data
5650 })
5651
5652 ihdr = chunks[0]['data']
5653
5654 width = unpack_integer(ihdr[:4])
5655 height = unpack_integer(ihdr[4:8])
5656
5657 idat = b''
5658
5659 for chunk in chunks:
5660 if chunk['type'] == b'IDAT':
5661 idat += chunk['data']
5662
5663 if not idat:
5664 raise IOError('Unable to read PNG data.')
5665
5666 decompressed_data = bytearray(zlib.decompress(idat))
5667
5668 stride = width * 3
5669 pixels = []
5670
5671 def _get_pixel(idx):
5672 x = idx % stride
5673 y = idx // stride
5674 return pixels[y][x]
5675
5676 for y in range(height):
5677 basePos = y * (1 + stride)
5678 filter_type = decompressed_data[basePos]
5679
5680 current_row = []
5681
5682 pixels.append(current_row)
5683
5684 for x in range(stride):
5685 color = decompressed_data[1 + basePos + x]
5686 basex = y * stride + x
5687 left = 0
5688 up = 0
5689
5690 if x > 2:
5691 left = _get_pixel(basex - 3)
5692 if y > 0:
5693 up = _get_pixel(basex - stride)
5694
5695 if filter_type == 1: # Sub
5696 color = (color + left) & 0xff
5697 elif filter_type == 2: # Up
5698 color = (color + up) & 0xff
5699 elif filter_type == 3: # Average
5700 color = (color + ((left + up) >> 1)) & 0xff
5701 elif filter_type == 4: # Paeth
5702 a = left
5703 b = up
5704 c = 0
5705
5706 if x > 2 and y > 0:
5707 c = _get_pixel(basex - stride - 3)
5708
5709 p = a + b - c
5710
5711 pa = abs(p - a)
5712 pb = abs(p - b)
5713 pc = abs(p - c)
5714
5715 if pa <= pb and pa <= pc:
5716 color = (color + a) & 0xff
5717 elif pb <= pc:
5718 color = (color + b) & 0xff
5719 else:
5720 color = (color + c) & 0xff
5721
5722 current_row.append(color)
5723
5724 return width, height, pixels
5725
5726
5727 def write_xattr(path, key, value):
5728 # This mess below finds the best xattr tool for the job
5729 try:
5730 # try the pyxattr module...
5731 import xattr
5732
5733 if hasattr(xattr, 'set'): # pyxattr
5734 # Unicode arguments are not supported in python-pyxattr until
5735 # version 0.5.0
5736 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5737 pyxattr_required_version = '0.5.0'
5738 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5739 # TODO: fallback to CLI tools
5740 raise XAttrUnavailableError(
5741 'python-pyxattr is detected but is too old. '
5742 'youtube-dl requires %s or above while your version is %s. '
5743 'Falling back to other xattr implementations' % (
5744 pyxattr_required_version, xattr.__version__))
5745
5746 setxattr = xattr.set
5747 else: # xattr
5748 setxattr = xattr.setxattr
5749
5750 try:
5751 setxattr(path, key, value)
5752 except EnvironmentError as e:
5753 raise XAttrMetadataError(e.errno, e.strerror)
5754
5755 except ImportError:
5756 if compat_os_name == 'nt':
5757 # Write xattrs to NTFS Alternate Data Streams:
5758 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5759 assert ':' not in key
5760 assert os.path.exists(path)
5761
5762 ads_fn = path + ':' + key
5763 try:
5764 with open(ads_fn, 'wb') as f:
5765 f.write(value)
5766 except EnvironmentError as e:
5767 raise XAttrMetadataError(e.errno, e.strerror)
5768 else:
5769 user_has_setfattr = check_executable('setfattr', ['--version'])
5770 user_has_xattr = check_executable('xattr', ['-h'])
5771
5772 if user_has_setfattr or user_has_xattr:
5773
5774 value = value.decode('utf-8')
5775 if user_has_setfattr:
5776 executable = 'setfattr'
5777 opts = ['-n', key, '-v', value]
5778 elif user_has_xattr:
5779 executable = 'xattr'
5780 opts = ['-w', key, value]
5781
5782 cmd = ([encodeFilename(executable, True)]
5783 + [encodeArgument(o) for o in opts]
5784 + [encodeFilename(path, True)])
5785
5786 try:
5787 p = subprocess.Popen(
5788 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5789 except EnvironmentError as e:
5790 raise XAttrMetadataError(e.errno, e.strerror)
5791 stdout, stderr = process_communicate_or_kill(p)
5792 stderr = stderr.decode('utf-8', 'replace')
5793 if p.returncode != 0:
5794 raise XAttrMetadataError(p.returncode, stderr)
5795
5796 else:
5797 # On Unix, and can't find pyxattr, setfattr, or xattr.
5798 if sys.platform.startswith('linux'):
5799 raise XAttrUnavailableError(
5800 "Couldn't find a tool to set the xattrs. "
5801 "Install either the python 'pyxattr' or 'xattr' "
5802 "modules, or the GNU 'attr' package "
5803 "(which contains the 'setfattr' tool).")
5804 else:
5805 raise XAttrUnavailableError(
5806 "Couldn't find a tool to set the xattrs. "
5807 "Install either the python 'xattr' module, "
5808 "or the 'xattr' binary.")
5809
5810
5811 def random_birthday(year_field, month_field, day_field):
5812 start_date = datetime.date(1950, 1, 1)
5813 end_date = datetime.date(1995, 12, 31)
5814 offset = random.randint(0, (end_date - start_date).days)
5815 random_date = start_date + datetime.timedelta(offset)
5816 return {
5817 year_field: str(random_date.year),
5818 month_field: str(random_date.month),
5819 day_field: str(random_date.day),
5820 }
5821
5822
5823 def clean_podcast_url(url):
5824 return re.sub(r'''(?x)
5825 (?:
5826 (?:
5827 chtbl\.com/track|
5828 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5829 play\.podtrac\.com
5830 )/[^/]+|
5831 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5832 flex\.acast\.com|
5833 pd(?:
5834 cn\.co| # https://podcorn.com/analytics-prefix/
5835 st\.fm # https://podsights.com/docs/
5836 )/e
5837 )/''', '', url)
|