1 #!/usr/bin/env python
2 # coding: utf-8
3
4 from __future__ import unicode_literals
5
6 import base64
7 import binascii
8 import calendar
9 import codecs
10 import collections
11 import contextlib
12 import ctypes
13 import datetime
14 import email.utils
15 import email.header
16 import errno
17 import functools
18 import gzip
19 import io
20 import itertools
21 import json
22 import locale
23 import math
24 import operator
25 import os
26 import platform
27 import random
28 import re
29 import socket
30 import ssl
31 import subprocess
32 import sys
33 import tempfile
34 import time
35 import traceback
36 import unicodedata
37 import xml.etree.ElementTree
38 import zlib
39
40 from .compat import (
41 compat_HTMLParseError,
42 compat_HTMLParser,
43 compat_HTTPError,
44 compat_basestring,
45 compat_chr,
46 compat_collections_abc,
47 compat_cookiejar,
48 compat_ctypes_WINFUNCTYPE,
49 compat_etree_fromstring,
50 compat_expanduser,
51 compat_html_entities,
52 compat_html_entities_html5,
53 compat_http_client,
54 compat_integer_types,
55 compat_kwargs,
56 compat_os_name,
57 compat_parse_qs,
58 compat_shlex_quote,
59 compat_str,
60 compat_struct_pack,
61 compat_struct_unpack,
62 compat_urllib_error,
63 compat_urllib_parse,
64 compat_urllib_parse_urlencode,
65 compat_urllib_parse_urlparse,
66 compat_urllib_parse_unquote_plus,
67 compat_urllib_request,
68 compat_urlparse,
69 compat_xpath,
70 )
71
72 from .socks import (
73 ProxyType,
74 sockssocket,
75 )
76
77
78 def register_socks_protocols():
79 # "Register" SOCKS protocols
80 # In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
81 # URLs with protocols not in urlparse.uses_netloc are not handled correctly
82 for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
83 if scheme not in compat_urlparse.uses_netloc:
84 compat_urlparse.uses_netloc.append(scheme)
85
86
87 # This is not clearly defined otherwise
88 compiled_regex_type = type(re.compile(''))
89
90
91 def random_user_agent():
92 _USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
93 _CHROME_VERSIONS = (
94 '74.0.3729.129',
95 '76.0.3780.3',
96 '76.0.3780.2',
97 '74.0.3729.128',
98 '76.0.3780.1',
99 '76.0.3780.0',
100 '75.0.3770.15',
101 '74.0.3729.127',
102 '74.0.3729.126',
103 '76.0.3779.1',
104 '76.0.3779.0',
105 '75.0.3770.14',
106 '74.0.3729.125',
107 '76.0.3778.1',
108 '76.0.3778.0',
109 '75.0.3770.13',
110 '74.0.3729.124',
111 '74.0.3729.123',
112 '73.0.3683.121',
113 '76.0.3777.1',
114 '76.0.3777.0',
115 '75.0.3770.12',
116 '74.0.3729.122',
117 '76.0.3776.4',
118 '75.0.3770.11',
119 '74.0.3729.121',
120 '76.0.3776.3',
121 '76.0.3776.2',
122 '73.0.3683.120',
123 '74.0.3729.120',
124 '74.0.3729.119',
125 '74.0.3729.118',
126 '76.0.3776.1',
127 '76.0.3776.0',
128 '76.0.3775.5',
129 '75.0.3770.10',
130 '74.0.3729.117',
131 '76.0.3775.4',
132 '76.0.3775.3',
133 '74.0.3729.116',
134 '75.0.3770.9',
135 '76.0.3775.2',
136 '76.0.3775.1',
137 '76.0.3775.0',
138 '75.0.3770.8',
139 '74.0.3729.115',
140 '74.0.3729.114',
141 '76.0.3774.1',
142 '76.0.3774.0',
143 '75.0.3770.7',
144 '74.0.3729.113',
145 '74.0.3729.112',
146 '74.0.3729.111',
147 '76.0.3773.1',
148 '76.0.3773.0',
149 '75.0.3770.6',
150 '74.0.3729.110',
151 '74.0.3729.109',
152 '76.0.3772.1',
153 '76.0.3772.0',
154 '75.0.3770.5',
155 '74.0.3729.108',
156 '74.0.3729.107',
157 '76.0.3771.1',
158 '76.0.3771.0',
159 '75.0.3770.4',
160 '74.0.3729.106',
161 '74.0.3729.105',
162 '75.0.3770.3',
163 '74.0.3729.104',
164 '74.0.3729.103',
165 '74.0.3729.102',
166 '75.0.3770.2',
167 '74.0.3729.101',
168 '75.0.3770.1',
169 '75.0.3770.0',
170 '74.0.3729.100',
171 '75.0.3769.5',
172 '75.0.3769.4',
173 '74.0.3729.99',
174 '75.0.3769.3',
175 '75.0.3769.2',
176 '75.0.3768.6',
177 '74.0.3729.98',
178 '75.0.3769.1',
179 '75.0.3769.0',
180 '74.0.3729.97',
181 '73.0.3683.119',
182 '73.0.3683.118',
183 '74.0.3729.96',
184 '75.0.3768.5',
185 '75.0.3768.4',
186 '75.0.3768.3',
187 '75.0.3768.2',
188 '74.0.3729.95',
189 '74.0.3729.94',
190 '75.0.3768.1',
191 '75.0.3768.0',
192 '74.0.3729.93',
193 '74.0.3729.92',
194 '73.0.3683.117',
195 '74.0.3729.91',
196 '75.0.3766.3',
197 '74.0.3729.90',
198 '75.0.3767.2',
199 '75.0.3767.1',
200 '75.0.3767.0',
201 '74.0.3729.89',
202 '73.0.3683.116',
203 '75.0.3766.2',
204 '74.0.3729.88',
205 '75.0.3766.1',
206 '75.0.3766.0',
207 '74.0.3729.87',
208 '73.0.3683.115',
209 '74.0.3729.86',
210 '75.0.3765.1',
211 '75.0.3765.0',
212 '74.0.3729.85',
213 '73.0.3683.114',
214 '74.0.3729.84',
215 '75.0.3764.1',
216 '75.0.3764.0',
217 '74.0.3729.83',
218 '73.0.3683.113',
219 '75.0.3763.2',
220 '75.0.3761.4',
221 '74.0.3729.82',
222 '75.0.3763.1',
223 '75.0.3763.0',
224 '74.0.3729.81',
225 '73.0.3683.112',
226 '75.0.3762.1',
227 '75.0.3762.0',
228 '74.0.3729.80',
229 '75.0.3761.3',
230 '74.0.3729.79',
231 '73.0.3683.111',
232 '75.0.3761.2',
233 '74.0.3729.78',
234 '74.0.3729.77',
235 '75.0.3761.1',
236 '75.0.3761.0',
237 '73.0.3683.110',
238 '74.0.3729.76',
239 '74.0.3729.75',
240 '75.0.3760.0',
241 '74.0.3729.74',
242 '75.0.3759.8',
243 '75.0.3759.7',
244 '75.0.3759.6',
245 '74.0.3729.73',
246 '75.0.3759.5',
247 '74.0.3729.72',
248 '73.0.3683.109',
249 '75.0.3759.4',
250 '75.0.3759.3',
251 '74.0.3729.71',
252 '75.0.3759.2',
253 '74.0.3729.70',
254 '73.0.3683.108',
255 '74.0.3729.69',
256 '75.0.3759.1',
257 '75.0.3759.0',
258 '74.0.3729.68',
259 '73.0.3683.107',
260 '74.0.3729.67',
261 '75.0.3758.1',
262 '75.0.3758.0',
263 '74.0.3729.66',
264 '73.0.3683.106',
265 '74.0.3729.65',
266 '75.0.3757.1',
267 '75.0.3757.0',
268 '74.0.3729.64',
269 '73.0.3683.105',
270 '74.0.3729.63',
271 '75.0.3756.1',
272 '75.0.3756.0',
273 '74.0.3729.62',
274 '73.0.3683.104',
275 '75.0.3755.3',
276 '75.0.3755.2',
277 '73.0.3683.103',
278 '75.0.3755.1',
279 '75.0.3755.0',
280 '74.0.3729.61',
281 '73.0.3683.102',
282 '74.0.3729.60',
283 '75.0.3754.2',
284 '74.0.3729.59',
285 '75.0.3753.4',
286 '74.0.3729.58',
287 '75.0.3754.1',
288 '75.0.3754.0',
289 '74.0.3729.57',
290 '73.0.3683.101',
291 '75.0.3753.3',
292 '75.0.3752.2',
293 '75.0.3753.2',
294 '74.0.3729.56',
295 '75.0.3753.1',
296 '75.0.3753.0',
297 '74.0.3729.55',
298 '73.0.3683.100',
299 '74.0.3729.54',
300 '75.0.3752.1',
301 '75.0.3752.0',
302 '74.0.3729.53',
303 '73.0.3683.99',
304 '74.0.3729.52',
305 '75.0.3751.1',
306 '75.0.3751.0',
307 '74.0.3729.51',
308 '73.0.3683.98',
309 '74.0.3729.50',
310 '75.0.3750.0',
311 '74.0.3729.49',
312 '74.0.3729.48',
313 '74.0.3729.47',
314 '75.0.3749.3',
315 '74.0.3729.46',
316 '73.0.3683.97',
317 '75.0.3749.2',
318 '74.0.3729.45',
319 '75.0.3749.1',
320 '75.0.3749.0',
321 '74.0.3729.44',
322 '73.0.3683.96',
323 '74.0.3729.43',
324 '74.0.3729.42',
325 '75.0.3748.1',
326 '75.0.3748.0',
327 '74.0.3729.41',
328 '75.0.3747.1',
329 '73.0.3683.95',
330 '75.0.3746.4',
331 '74.0.3729.40',
332 '74.0.3729.39',
333 '75.0.3747.0',
334 '75.0.3746.3',
335 '75.0.3746.2',
336 '74.0.3729.38',
337 '75.0.3746.1',
338 '75.0.3746.0',
339 '74.0.3729.37',
340 '73.0.3683.94',
341 '75.0.3745.5',
342 '75.0.3745.4',
343 '75.0.3745.3',
344 '75.0.3745.2',
345 '74.0.3729.36',
346 '75.0.3745.1',
347 '75.0.3745.0',
348 '75.0.3744.2',
349 '74.0.3729.35',
350 '73.0.3683.93',
351 '74.0.3729.34',
352 '75.0.3744.1',
353 '75.0.3744.0',
354 '74.0.3729.33',
355 '73.0.3683.92',
356 '74.0.3729.32',
357 '74.0.3729.31',
358 '73.0.3683.91',
359 '75.0.3741.2',
360 '75.0.3740.5',
361 '74.0.3729.30',
362 '75.0.3741.1',
363 '75.0.3741.0',
364 '74.0.3729.29',
365 '75.0.3740.4',
366 '73.0.3683.90',
367 '74.0.3729.28',
368 '75.0.3740.3',
369 '73.0.3683.89',
370 '75.0.3740.2',
371 '74.0.3729.27',
372 '75.0.3740.1',
373 '75.0.3740.0',
374 '74.0.3729.26',
375 '73.0.3683.88',
376 '73.0.3683.87',
377 '74.0.3729.25',
378 '75.0.3739.1',
379 '75.0.3739.0',
380 '73.0.3683.86',
381 '74.0.3729.24',
382 '73.0.3683.85',
383 '75.0.3738.4',
384 '75.0.3738.3',
385 '75.0.3738.2',
386 '75.0.3738.1',
387 '75.0.3738.0',
388 '74.0.3729.23',
389 '73.0.3683.84',
390 '74.0.3729.22',
391 '74.0.3729.21',
392 '75.0.3737.1',
393 '75.0.3737.0',
394 '74.0.3729.20',
395 '73.0.3683.83',
396 '74.0.3729.19',
397 '75.0.3736.1',
398 '75.0.3736.0',
399 '74.0.3729.18',
400 '73.0.3683.82',
401 '74.0.3729.17',
402 '75.0.3735.1',
403 '75.0.3735.0',
404 '74.0.3729.16',
405 '73.0.3683.81',
406 '75.0.3734.1',
407 '75.0.3734.0',
408 '74.0.3729.15',
409 '73.0.3683.80',
410 '74.0.3729.14',
411 '75.0.3733.1',
412 '75.0.3733.0',
413 '75.0.3732.1',
414 '74.0.3729.13',
415 '74.0.3729.12',
416 '73.0.3683.79',
417 '74.0.3729.11',
418 '75.0.3732.0',
419 '74.0.3729.10',
420 '73.0.3683.78',
421 '74.0.3729.9',
422 '74.0.3729.8',
423 '74.0.3729.7',
424 '75.0.3731.3',
425 '75.0.3731.2',
426 '75.0.3731.0',
427 '74.0.3729.6',
428 '73.0.3683.77',
429 '73.0.3683.76',
430 '75.0.3730.5',
431 '75.0.3730.4',
432 '73.0.3683.75',
433 '74.0.3729.5',
434 '73.0.3683.74',
435 '75.0.3730.3',
436 '75.0.3730.2',
437 '74.0.3729.4',
438 '73.0.3683.73',
439 '73.0.3683.72',
440 '75.0.3730.1',
441 '75.0.3730.0',
442 '74.0.3729.3',
443 '73.0.3683.71',
444 '74.0.3729.2',
445 '73.0.3683.70',
446 '74.0.3729.1',
447 '74.0.3729.0',
448 '74.0.3726.4',
449 '73.0.3683.69',
450 '74.0.3726.3',
451 '74.0.3728.0',
452 '74.0.3726.2',
453 '73.0.3683.68',
454 '74.0.3726.1',
455 '74.0.3726.0',
456 '74.0.3725.4',
457 '73.0.3683.67',
458 '73.0.3683.66',
459 '74.0.3725.3',
460 '74.0.3725.2',
461 '74.0.3725.1',
462 '74.0.3724.8',
463 '74.0.3725.0',
464 '73.0.3683.65',
465 '74.0.3724.7',
466 '74.0.3724.6',
467 '74.0.3724.5',
468 '74.0.3724.4',
469 '74.0.3724.3',
470 '74.0.3724.2',
471 '74.0.3724.1',
472 '74.0.3724.0',
473 '73.0.3683.64',
474 '74.0.3723.1',
475 '74.0.3723.0',
476 '73.0.3683.63',
477 '74.0.3722.1',
478 '74.0.3722.0',
479 '73.0.3683.62',
480 '74.0.3718.9',
481 '74.0.3702.3',
482 '74.0.3721.3',
483 '74.0.3721.2',
484 '74.0.3721.1',
485 '74.0.3721.0',
486 '74.0.3720.6',
487 '73.0.3683.61',
488 '72.0.3626.122',
489 '73.0.3683.60',
490 '74.0.3720.5',
491 '72.0.3626.121',
492 '74.0.3718.8',
493 '74.0.3720.4',
494 '74.0.3720.3',
495 '74.0.3718.7',
496 '74.0.3720.2',
497 '74.0.3720.1',
498 '74.0.3720.0',
499 '74.0.3718.6',
500 '74.0.3719.5',
501 '73.0.3683.59',
502 '74.0.3718.5',
503 '74.0.3718.4',
504 '74.0.3719.4',
505 '74.0.3719.3',
506 '74.0.3719.2',
507 '74.0.3719.1',
508 '73.0.3683.58',
509 '74.0.3719.0',
510 '73.0.3683.57',
511 '73.0.3683.56',
512 '74.0.3718.3',
513 '73.0.3683.55',
514 '74.0.3718.2',
515 '74.0.3718.1',
516 '74.0.3718.0',
517 '73.0.3683.54',
518 '74.0.3717.2',
519 '73.0.3683.53',
520 '74.0.3717.1',
521 '74.0.3717.0',
522 '73.0.3683.52',
523 '74.0.3716.1',
524 '74.0.3716.0',
525 '73.0.3683.51',
526 '74.0.3715.1',
527 '74.0.3715.0',
528 '73.0.3683.50',
529 '74.0.3711.2',
530 '74.0.3714.2',
531 '74.0.3713.3',
532 '74.0.3714.1',
533 '74.0.3714.0',
534 '73.0.3683.49',
535 '74.0.3713.1',
536 '74.0.3713.0',
537 '72.0.3626.120',
538 '73.0.3683.48',
539 '74.0.3712.2',
540 '74.0.3712.1',
541 '74.0.3712.0',
542 '73.0.3683.47',
543 '72.0.3626.119',
544 '73.0.3683.46',
545 '74.0.3710.2',
546 '72.0.3626.118',
547 '74.0.3711.1',
548 '74.0.3711.0',
549 '73.0.3683.45',
550 '72.0.3626.117',
551 '74.0.3710.1',
552 '74.0.3710.0',
553 '73.0.3683.44',
554 '72.0.3626.116',
555 '74.0.3709.1',
556 '74.0.3709.0',
557 '74.0.3704.9',
558 '73.0.3683.43',
559 '72.0.3626.115',
560 '74.0.3704.8',
561 '74.0.3704.7',
562 '74.0.3708.0',
563 '74.0.3706.7',
564 '74.0.3704.6',
565 '73.0.3683.42',
566 '72.0.3626.114',
567 '74.0.3706.6',
568 '72.0.3626.113',
569 '74.0.3704.5',
570 '74.0.3706.5',
571 '74.0.3706.4',
572 '74.0.3706.3',
573 '74.0.3706.2',
574 '74.0.3706.1',
575 '74.0.3706.0',
576 '73.0.3683.41',
577 '72.0.3626.112',
578 '74.0.3705.1',
579 '74.0.3705.0',
580 '73.0.3683.40',
581 '72.0.3626.111',
582 '73.0.3683.39',
583 '74.0.3704.4',
584 '73.0.3683.38',
585 '74.0.3704.3',
586 '74.0.3704.2',
587 '74.0.3704.1',
588 '74.0.3704.0',
589 '73.0.3683.37',
590 '72.0.3626.110',
591 '72.0.3626.109',
592 '74.0.3703.3',
593 '74.0.3703.2',
594 '73.0.3683.36',
595 '74.0.3703.1',
596 '74.0.3703.0',
597 '73.0.3683.35',
598 '72.0.3626.108',
599 '74.0.3702.2',
600 '74.0.3699.3',
601 '74.0.3702.1',
602 '74.0.3702.0',
603 '73.0.3683.34',
604 '72.0.3626.107',
605 '73.0.3683.33',
606 '74.0.3701.1',
607 '74.0.3701.0',
608 '73.0.3683.32',
609 '73.0.3683.31',
610 '72.0.3626.105',
611 '74.0.3700.1',
612 '74.0.3700.0',
613 '73.0.3683.29',
614 '72.0.3626.103',
615 '74.0.3699.2',
616 '74.0.3699.1',
617 '74.0.3699.0',
618 '73.0.3683.28',
619 '72.0.3626.102',
620 '73.0.3683.27',
621 '73.0.3683.26',
622 '74.0.3698.0',
623 '74.0.3696.2',
624 '72.0.3626.101',
625 '73.0.3683.25',
626 '74.0.3696.1',
627 '74.0.3696.0',
628 '74.0.3694.8',
629 '72.0.3626.100',
630 '74.0.3694.7',
631 '74.0.3694.6',
632 '74.0.3694.5',
633 '74.0.3694.4',
634 '72.0.3626.99',
635 '72.0.3626.98',
636 '74.0.3694.3',
637 '73.0.3683.24',
638 '72.0.3626.97',
639 '72.0.3626.96',
640 '72.0.3626.95',
641 '73.0.3683.23',
642 '72.0.3626.94',
643 '73.0.3683.22',
644 '73.0.3683.21',
645 '72.0.3626.93',
646 '74.0.3694.2',
647 '72.0.3626.92',
648 '74.0.3694.1',
649 '74.0.3694.0',
650 '74.0.3693.6',
651 '73.0.3683.20',
652 '72.0.3626.91',
653 '74.0.3693.5',
654 '74.0.3693.4',
655 '74.0.3693.3',
656 '74.0.3693.2',
657 '73.0.3683.19',
658 '74.0.3693.1',
659 '74.0.3693.0',
660 '73.0.3683.18',
661 '72.0.3626.90',
662 '74.0.3692.1',
663 '74.0.3692.0',
664 '73.0.3683.17',
665 '72.0.3626.89',
666 '74.0.3687.3',
667 '74.0.3691.1',
668 '74.0.3691.0',
669 '73.0.3683.16',
670 '72.0.3626.88',
671 '72.0.3626.87',
672 '73.0.3683.15',
673 '74.0.3690.1',
674 '74.0.3690.0',
675 '73.0.3683.14',
676 '72.0.3626.86',
677 '73.0.3683.13',
678 '73.0.3683.12',
679 '74.0.3689.1',
680 '74.0.3689.0',
681 '73.0.3683.11',
682 '72.0.3626.85',
683 '73.0.3683.10',
684 '72.0.3626.84',
685 '73.0.3683.9',
686 '74.0.3688.1',
687 '74.0.3688.0',
688 '73.0.3683.8',
689 '72.0.3626.83',
690 '74.0.3687.2',
691 '74.0.3687.1',
692 '74.0.3687.0',
693 '73.0.3683.7',
694 '72.0.3626.82',
695 '74.0.3686.4',
696 '72.0.3626.81',
697 '74.0.3686.3',
698 '74.0.3686.2',
699 '74.0.3686.1',
700 '74.0.3686.0',
701 '73.0.3683.6',
702 '72.0.3626.80',
703 '74.0.3685.1',
704 '74.0.3685.0',
705 '73.0.3683.5',
706 '72.0.3626.79',
707 '74.0.3684.1',
708 '74.0.3684.0',
709 '73.0.3683.4',
710 '72.0.3626.78',
711 '72.0.3626.77',
712 '73.0.3683.3',
713 '73.0.3683.2',
714 '72.0.3626.76',
715 '73.0.3683.1',
716 '73.0.3683.0',
717 '72.0.3626.75',
718 '71.0.3578.141',
719 '73.0.3682.1',
720 '73.0.3682.0',
721 '72.0.3626.74',
722 '71.0.3578.140',
723 '73.0.3681.4',
724 '73.0.3681.3',
725 '73.0.3681.2',
726 '73.0.3681.1',
727 '73.0.3681.0',
728 '72.0.3626.73',
729 '71.0.3578.139',
730 '72.0.3626.72',
731 '72.0.3626.71',
732 '73.0.3680.1',
733 '73.0.3680.0',
734 '72.0.3626.70',
735 '71.0.3578.138',
736 '73.0.3678.2',
737 '73.0.3679.1',
738 '73.0.3679.0',
739 '72.0.3626.69',
740 '71.0.3578.137',
741 '73.0.3678.1',
742 '73.0.3678.0',
743 '71.0.3578.136',
744 '73.0.3677.1',
745 '73.0.3677.0',
746 '72.0.3626.68',
747 '72.0.3626.67',
748 '71.0.3578.135',
749 '73.0.3676.1',
750 '73.0.3676.0',
751 '73.0.3674.2',
752 '72.0.3626.66',
753 '71.0.3578.134',
754 '73.0.3674.1',
755 '73.0.3674.0',
756 '72.0.3626.65',
757 '71.0.3578.133',
758 '73.0.3673.2',
759 '73.0.3673.1',
760 '73.0.3673.0',
761 '72.0.3626.64',
762 '71.0.3578.132',
763 '72.0.3626.63',
764 '72.0.3626.62',
765 '72.0.3626.61',
766 '72.0.3626.60',
767 '73.0.3672.1',
768 '73.0.3672.0',
769 '72.0.3626.59',
770 '71.0.3578.131',
771 '73.0.3671.3',
772 '73.0.3671.2',
773 '73.0.3671.1',
774 '73.0.3671.0',
775 '72.0.3626.58',
776 '71.0.3578.130',
777 '73.0.3670.1',
778 '73.0.3670.0',
779 '72.0.3626.57',
780 '71.0.3578.129',
781 '73.0.3669.1',
782 '73.0.3669.0',
783 '72.0.3626.56',
784 '71.0.3578.128',
785 '73.0.3668.2',
786 '73.0.3668.1',
787 '73.0.3668.0',
788 '72.0.3626.55',
789 '71.0.3578.127',
790 '73.0.3667.2',
791 '73.0.3667.1',
792 '73.0.3667.0',
793 '72.0.3626.54',
794 '71.0.3578.126',
795 '73.0.3666.1',
796 '73.0.3666.0',
797 '72.0.3626.53',
798 '71.0.3578.125',
799 '73.0.3665.4',
800 '73.0.3665.3',
801 '72.0.3626.52',
802 '73.0.3665.2',
803 '73.0.3664.4',
804 '73.0.3665.1',
805 '73.0.3665.0',
806 '72.0.3626.51',
807 '71.0.3578.124',
808 '72.0.3626.50',
809 '73.0.3664.3',
810 '73.0.3664.2',
811 '73.0.3664.1',
812 '73.0.3664.0',
813 '73.0.3663.2',
814 '72.0.3626.49',
815 '71.0.3578.123',
816 '73.0.3663.1',
817 '73.0.3663.0',
818 '72.0.3626.48',
819 '71.0.3578.122',
820 '73.0.3662.1',
821 '73.0.3662.0',
822 '72.0.3626.47',
823 '71.0.3578.121',
824 '73.0.3661.1',
825 '72.0.3626.46',
826 '73.0.3661.0',
827 '72.0.3626.45',
828 '71.0.3578.120',
829 '73.0.3660.2',
830 '73.0.3660.1',
831 '73.0.3660.0',
832 '72.0.3626.44',
833 '71.0.3578.119',
834 '73.0.3659.1',
835 '73.0.3659.0',
836 '72.0.3626.43',
837 '71.0.3578.118',
838 '73.0.3658.1',
839 '73.0.3658.0',
840 '72.0.3626.42',
841 '71.0.3578.117',
842 '73.0.3657.1',
843 '73.0.3657.0',
844 '72.0.3626.41',
845 '71.0.3578.116',
846 '73.0.3656.1',
847 '73.0.3656.0',
848 '72.0.3626.40',
849 '71.0.3578.115',
850 '73.0.3655.1',
851 '73.0.3655.0',
852 '72.0.3626.39',
853 '71.0.3578.114',
854 '73.0.3654.1',
855 '73.0.3654.0',
856 '72.0.3626.38',
857 '71.0.3578.113',
858 '73.0.3653.1',
859 '73.0.3653.0',
860 '72.0.3626.37',
861 '71.0.3578.112',
862 '73.0.3652.1',
863 '73.0.3652.0',
864 '72.0.3626.36',
865 '71.0.3578.111',
866 '73.0.3651.1',
867 '73.0.3651.0',
868 '72.0.3626.35',
869 '71.0.3578.110',
870 '73.0.3650.1',
871 '73.0.3650.0',
872 '72.0.3626.34',
873 '71.0.3578.109',
874 '73.0.3649.1',
875 '73.0.3649.0',
876 '72.0.3626.33',
877 '71.0.3578.108',
878 '73.0.3648.2',
879 '73.0.3648.1',
880 '73.0.3648.0',
881 '72.0.3626.32',
882 '71.0.3578.107',
883 '73.0.3647.2',
884 '73.0.3647.1',
885 '73.0.3647.0',
886 '72.0.3626.31',
887 '71.0.3578.106',
888 '73.0.3635.3',
889 '73.0.3646.2',
890 '73.0.3646.1',
891 '73.0.3646.0',
892 '72.0.3626.30',
893 '71.0.3578.105',
894 '72.0.3626.29',
895 '73.0.3645.2',
896 '73.0.3645.1',
897 '73.0.3645.0',
898 '72.0.3626.28',
899 '71.0.3578.104',
900 '72.0.3626.27',
901 '72.0.3626.26',
902 '72.0.3626.25',
903 '72.0.3626.24',
904 '73.0.3644.0',
905 '73.0.3643.2',
906 '72.0.3626.23',
907 '71.0.3578.103',
908 '73.0.3643.1',
909 '73.0.3643.0',
910 '72.0.3626.22',
911 '71.0.3578.102',
912 '73.0.3642.1',
913 '73.0.3642.0',
914 '72.0.3626.21',
915 '71.0.3578.101',
916 '73.0.3641.1',
917 '73.0.3641.0',
918 '72.0.3626.20',
919 '71.0.3578.100',
920 '72.0.3626.19',
921 '73.0.3640.1',
922 '73.0.3640.0',
923 '72.0.3626.18',
924 '73.0.3639.1',
925 '71.0.3578.99',
926 '73.0.3639.0',
927 '72.0.3626.17',
928 '73.0.3638.2',
929 '72.0.3626.16',
930 '73.0.3638.1',
931 '73.0.3638.0',
932 '72.0.3626.15',
933 '71.0.3578.98',
934 '73.0.3635.2',
935 '71.0.3578.97',
936 '73.0.3637.1',
937 '73.0.3637.0',
938 '72.0.3626.14',
939 '71.0.3578.96',
940 '71.0.3578.95',
941 '72.0.3626.13',
942 '71.0.3578.94',
943 '73.0.3636.2',
944 '71.0.3578.93',
945 '73.0.3636.1',
946 '73.0.3636.0',
947 '72.0.3626.12',
948 '71.0.3578.92',
949 '73.0.3635.1',
950 '73.0.3635.0',
951 '72.0.3626.11',
952 '71.0.3578.91',
953 '73.0.3634.2',
954 '73.0.3634.1',
955 '73.0.3634.0',
956 '72.0.3626.10',
957 '71.0.3578.90',
958 '71.0.3578.89',
959 '73.0.3633.2',
960 '73.0.3633.1',
961 '73.0.3633.0',
962 '72.0.3610.4',
963 '72.0.3626.9',
964 '71.0.3578.88',
965 '73.0.3632.5',
966 '73.0.3632.4',
967 '73.0.3632.3',
968 '73.0.3632.2',
969 '73.0.3632.1',
970 '73.0.3632.0',
971 '72.0.3626.8',
972 '71.0.3578.87',
973 '73.0.3631.2',
974 '73.0.3631.1',
975 '73.0.3631.0',
976 '72.0.3626.7',
977 '71.0.3578.86',
978 '72.0.3626.6',
979 '73.0.3630.1',
980 '73.0.3630.0',
981 '72.0.3626.5',
982 '71.0.3578.85',
983 '72.0.3626.4',
984 '73.0.3628.3',
985 '73.0.3628.2',
986 '73.0.3629.1',
987 '73.0.3629.0',
988 '72.0.3626.3',
989 '71.0.3578.84',
990 '73.0.3628.1',
991 '73.0.3628.0',
992 '71.0.3578.83',
993 '73.0.3627.1',
994 '73.0.3627.0',
995 '72.0.3626.2',
996 '71.0.3578.82',
997 '71.0.3578.81',
998 '71.0.3578.80',
999 '72.0.3626.1',
1000 '72.0.3626.0',
1001 '71.0.3578.79',
1002 '70.0.3538.124',
1003 '71.0.3578.78',
1004 '72.0.3623.4',
1005 '72.0.3625.2',
1006 '72.0.3625.1',
1007 '72.0.3625.0',
1008 '71.0.3578.77',
1009 '70.0.3538.123',
1010 '72.0.3624.4',
1011 '72.0.3624.3',
1012 '72.0.3624.2',
1013 '71.0.3578.76',
1014 '72.0.3624.1',
1015 '72.0.3624.0',
1016 '72.0.3623.3',
1017 '71.0.3578.75',
1018 '70.0.3538.122',
1019 '71.0.3578.74',
1020 '72.0.3623.2',
1021 '72.0.3610.3',
1022 '72.0.3623.1',
1023 '72.0.3623.0',
1024 '72.0.3622.3',
1025 '72.0.3622.2',
1026 '71.0.3578.73',
1027 '70.0.3538.121',
1028 '72.0.3622.1',
1029 '72.0.3622.0',
1030 '71.0.3578.72',
1031 '70.0.3538.120',
1032 '72.0.3621.1',
1033 '72.0.3621.0',
1034 '71.0.3578.71',
1035 '70.0.3538.119',
1036 '72.0.3620.1',
1037 '72.0.3620.0',
1038 '71.0.3578.70',
1039 '70.0.3538.118',
1040 '71.0.3578.69',
1041 '72.0.3619.1',
1042 '72.0.3619.0',
1043 '71.0.3578.68',
1044 '70.0.3538.117',
1045 '71.0.3578.67',
1046 '72.0.3618.1',
1047 '72.0.3618.0',
1048 '71.0.3578.66',
1049 '70.0.3538.116',
1050 '72.0.3617.1',
1051 '72.0.3617.0',
1052 '71.0.3578.65',
1053 '70.0.3538.115',
1054 '72.0.3602.3',
1055 '71.0.3578.64',
1056 '72.0.3616.1',
1057 '72.0.3616.0',
1058 '71.0.3578.63',
1059 '70.0.3538.114',
1060 '71.0.3578.62',
1061 '72.0.3615.1',
1062 '72.0.3615.0',
1063 '71.0.3578.61',
1064 '70.0.3538.113',
1065 '72.0.3614.1',
1066 '72.0.3614.0',
1067 '71.0.3578.60',
1068 '70.0.3538.112',
1069 '72.0.3613.1',
1070 '72.0.3613.0',
1071 '71.0.3578.59',
1072 '70.0.3538.111',
1073 '72.0.3612.2',
1074 '72.0.3612.1',
1075 '72.0.3612.0',
1076 '70.0.3538.110',
1077 '71.0.3578.58',
1078 '70.0.3538.109',
1079 '72.0.3611.2',
1080 '72.0.3611.1',
1081 '72.0.3611.0',
1082 '71.0.3578.57',
1083 '70.0.3538.108',
1084 '72.0.3610.2',
1085 '71.0.3578.56',
1086 '71.0.3578.55',
1087 '72.0.3610.1',
1088 '72.0.3610.0',
1089 '71.0.3578.54',
1090 '70.0.3538.107',
1091 '71.0.3578.53',
1092 '72.0.3609.3',
1093 '71.0.3578.52',
1094 '72.0.3609.2',
1095 '71.0.3578.51',
1096 '72.0.3608.5',
1097 '72.0.3609.1',
1098 '72.0.3609.0',
1099 '71.0.3578.50',
1100 '70.0.3538.106',
1101 '72.0.3608.4',
1102 '72.0.3608.3',
1103 '72.0.3608.2',
1104 '71.0.3578.49',
1105 '72.0.3608.1',
1106 '72.0.3608.0',
1107 '70.0.3538.105',
1108 '71.0.3578.48',
1109 '72.0.3607.1',
1110 '72.0.3607.0',
1111 '71.0.3578.47',
1112 '70.0.3538.104',
1113 '72.0.3606.2',
1114 '72.0.3606.1',
1115 '72.0.3606.0',
1116 '71.0.3578.46',
1117 '70.0.3538.103',
1118 '70.0.3538.102',
1119 '72.0.3605.3',
1120 '72.0.3605.2',
1121 '72.0.3605.1',
1122 '72.0.3605.0',
1123 '71.0.3578.45',
1124 '70.0.3538.101',
1125 '71.0.3578.44',
1126 '71.0.3578.43',
1127 '70.0.3538.100',
1128 '70.0.3538.99',
1129 '71.0.3578.42',
1130 '72.0.3604.1',
1131 '72.0.3604.0',
1132 '71.0.3578.41',
1133 '70.0.3538.98',
1134 '71.0.3578.40',
1135 '72.0.3603.2',
1136 '72.0.3603.1',
1137 '72.0.3603.0',
1138 '71.0.3578.39',
1139 '70.0.3538.97',
1140 '72.0.3602.2',
1141 '71.0.3578.38',
1142 '71.0.3578.37',
1143 '72.0.3602.1',
1144 '72.0.3602.0',
1145 '71.0.3578.36',
1146 '70.0.3538.96',
1147 '72.0.3601.1',
1148 '72.0.3601.0',
1149 '71.0.3578.35',
1150 '70.0.3538.95',
1151 '72.0.3600.1',
1152 '72.0.3600.0',
1153 '71.0.3578.34',
1154 '70.0.3538.94',
1155 '72.0.3599.3',
1156 '72.0.3599.2',
1157 '72.0.3599.1',
1158 '72.0.3599.0',
1159 '71.0.3578.33',
1160 '70.0.3538.93',
1161 '72.0.3598.1',
1162 '72.0.3598.0',
1163 '71.0.3578.32',
1164 '70.0.3538.87',
1165 '72.0.3597.1',
1166 '72.0.3597.0',
1167 '72.0.3596.2',
1168 '71.0.3578.31',
1169 '70.0.3538.86',
1170 '71.0.3578.30',
1171 '71.0.3578.29',
1172 '72.0.3596.1',
1173 '72.0.3596.0',
1174 '71.0.3578.28',
1175 '70.0.3538.85',
1176 '72.0.3595.2',
1177 '72.0.3591.3',
1178 '72.0.3595.1',
1179 '72.0.3595.0',
1180 '71.0.3578.27',
1181 '70.0.3538.84',
1182 '72.0.3594.1',
1183 '72.0.3594.0',
1184 '71.0.3578.26',
1185 '70.0.3538.83',
1186 '72.0.3593.2',
1187 '72.0.3593.1',
1188 '72.0.3593.0',
1189 '71.0.3578.25',
1190 '70.0.3538.82',
1191 '72.0.3589.3',
1192 '72.0.3592.2',
1193 '72.0.3592.1',
1194 '72.0.3592.0',
1195 '71.0.3578.24',
1196 '72.0.3589.2',
1197 '70.0.3538.81',
1198 '70.0.3538.80',
1199 '72.0.3591.2',
1200 '72.0.3591.1',
1201 '72.0.3591.0',
1202 '71.0.3578.23',
1203 '70.0.3538.79',
1204 '71.0.3578.22',
1205 '72.0.3590.1',
1206 '72.0.3590.0',
1207 '71.0.3578.21',
1208 '70.0.3538.78',
1209 '70.0.3538.77',
1210 '72.0.3589.1',
1211 '72.0.3589.0',
1212 '71.0.3578.20',
1213 '70.0.3538.76',
1214 '71.0.3578.19',
1215 '70.0.3538.75',
1216 '72.0.3588.1',
1217 '72.0.3588.0',
1218 '71.0.3578.18',
1219 '70.0.3538.74',
1220 '72.0.3586.2',
1221 '72.0.3587.0',
1222 '71.0.3578.17',
1223 '70.0.3538.73',
1224 '72.0.3586.1',
1225 '72.0.3586.0',
1226 '71.0.3578.16',
1227 '70.0.3538.72',
1228 '72.0.3585.1',
1229 '72.0.3585.0',
1230 '71.0.3578.15',
1231 '70.0.3538.71',
1232 '71.0.3578.14',
1233 '72.0.3584.1',
1234 '72.0.3584.0',
1235 '71.0.3578.13',
1236 '70.0.3538.70',
1237 '72.0.3583.2',
1238 '71.0.3578.12',
1239 '72.0.3583.1',
1240 '72.0.3583.0',
1241 '71.0.3578.11',
1242 '70.0.3538.69',
1243 '71.0.3578.10',
1244 '72.0.3582.0',
1245 '72.0.3581.4',
1246 '71.0.3578.9',
1247 '70.0.3538.67',
1248 '72.0.3581.3',
1249 '72.0.3581.2',
1250 '72.0.3581.1',
1251 '72.0.3581.0',
1252 '71.0.3578.8',
1253 '70.0.3538.66',
1254 '72.0.3580.1',
1255 '72.0.3580.0',
1256 '71.0.3578.7',
1257 '70.0.3538.65',
1258 '71.0.3578.6',
1259 '72.0.3579.1',
1260 '72.0.3579.0',
1261 '71.0.3578.5',
1262 '70.0.3538.64',
1263 '71.0.3578.4',
1264 '71.0.3578.3',
1265 '71.0.3578.2',
1266 '71.0.3578.1',
1267 '71.0.3578.0',
1268 '70.0.3538.63',
1269 '69.0.3497.128',
1270 '70.0.3538.62',
1271 '70.0.3538.61',
1272 '70.0.3538.60',
1273 '70.0.3538.59',
1274 '71.0.3577.1',
1275 '71.0.3577.0',
1276 '70.0.3538.58',
1277 '69.0.3497.127',
1278 '71.0.3576.2',
1279 '71.0.3576.1',
1280 '71.0.3576.0',
1281 '70.0.3538.57',
1282 '70.0.3538.56',
1283 '71.0.3575.2',
1284 '70.0.3538.55',
1285 '69.0.3497.126',
1286 '70.0.3538.54',
1287 '71.0.3575.1',
1288 '71.0.3575.0',
1289 '71.0.3574.1',
1290 '71.0.3574.0',
1291 '70.0.3538.53',
1292 '69.0.3497.125',
1293 '70.0.3538.52',
1294 '71.0.3573.1',
1295 '71.0.3573.0',
1296 '70.0.3538.51',
1297 '69.0.3497.124',
1298 '71.0.3572.1',
1299 '71.0.3572.0',
1300 '70.0.3538.50',
1301 '69.0.3497.123',
1302 '71.0.3571.2',
1303 '70.0.3538.49',
1304 '69.0.3497.122',
1305 '71.0.3571.1',
1306 '71.0.3571.0',
1307 '70.0.3538.48',
1308 '69.0.3497.121',
1309 '71.0.3570.1',
1310 '71.0.3570.0',
1311 '70.0.3538.47',
1312 '69.0.3497.120',
1313 '71.0.3568.2',
1314 '71.0.3569.1',
1315 '71.0.3569.0',
1316 '70.0.3538.46',
1317 '69.0.3497.119',
1318 '70.0.3538.45',
1319 '71.0.3568.1',
1320 '71.0.3568.0',
1321 '70.0.3538.44',
1322 '69.0.3497.118',
1323 '70.0.3538.43',
1324 '70.0.3538.42',
1325 '71.0.3567.1',
1326 '71.0.3567.0',
1327 '70.0.3538.41',
1328 '69.0.3497.117',
1329 '71.0.3566.1',
1330 '71.0.3566.0',
1331 '70.0.3538.40',
1332 '69.0.3497.116',
1333 '71.0.3565.1',
1334 '71.0.3565.0',
1335 '70.0.3538.39',
1336 '69.0.3497.115',
1337 '71.0.3564.1',
1338 '71.0.3564.0',
1339 '70.0.3538.38',
1340 '69.0.3497.114',
1341 '71.0.3563.0',
1342 '71.0.3562.2',
1343 '70.0.3538.37',
1344 '69.0.3497.113',
1345 '70.0.3538.36',
1346 '70.0.3538.35',
1347 '71.0.3562.1',
1348 '71.0.3562.0',
1349 '70.0.3538.34',
1350 '69.0.3497.112',
1351 '70.0.3538.33',
1352 '71.0.3561.1',
1353 '71.0.3561.0',
1354 '70.0.3538.32',
1355 '69.0.3497.111',
1356 '71.0.3559.6',
1357 '71.0.3560.1',
1358 '71.0.3560.0',
1359 '71.0.3559.5',
1360 '71.0.3559.4',
1361 '70.0.3538.31',
1362 '69.0.3497.110',
1363 '71.0.3559.3',
1364 '70.0.3538.30',
1365 '69.0.3497.109',
1366 '71.0.3559.2',
1367 '71.0.3559.1',
1368 '71.0.3559.0',
1369 '70.0.3538.29',
1370 '69.0.3497.108',
1371 '71.0.3558.2',
1372 '71.0.3558.1',
1373 '71.0.3558.0',
1374 '70.0.3538.28',
1375 '69.0.3497.107',
1376 '71.0.3557.2',
1377 '71.0.3557.1',
1378 '71.0.3557.0',
1379 '70.0.3538.27',
1380 '69.0.3497.106',
1381 '71.0.3554.4',
1382 '70.0.3538.26',
1383 '71.0.3556.1',
1384 '71.0.3556.0',
1385 '70.0.3538.25',
1386 '71.0.3554.3',
1387 '69.0.3497.105',
1388 '71.0.3554.2',
1389 '70.0.3538.24',
1390 '69.0.3497.104',
1391 '71.0.3555.2',
1392 '70.0.3538.23',
1393 '71.0.3555.1',
1394 '71.0.3555.0',
1395 '70.0.3538.22',
1396 '69.0.3497.103',
1397 '71.0.3554.1',
1398 '71.0.3554.0',
1399 '70.0.3538.21',
1400 '69.0.3497.102',
1401 '71.0.3553.3',
1402 '70.0.3538.20',
1403 '69.0.3497.101',
1404 '71.0.3553.2',
1405 '69.0.3497.100',
1406 '71.0.3553.1',
1407 '71.0.3553.0',
1408 '70.0.3538.19',
1409 '69.0.3497.99',
1410 '69.0.3497.98',
1411 '69.0.3497.97',
1412 '71.0.3552.6',
1413 '71.0.3552.5',
1414 '71.0.3552.4',
1415 '71.0.3552.3',
1416 '71.0.3552.2',
1417 '71.0.3552.1',
1418 '71.0.3552.0',
1419 '70.0.3538.18',
1420 '69.0.3497.96',
1421 '71.0.3551.3',
1422 '71.0.3551.2',
1423 '71.0.3551.1',
1424 '71.0.3551.0',
1425 '70.0.3538.17',
1426 '69.0.3497.95',
1427 '71.0.3550.3',
1428 '71.0.3550.2',
1429 '71.0.3550.1',
1430 '71.0.3550.0',
1431 '70.0.3538.16',
1432 '69.0.3497.94',
1433 '71.0.3549.1',
1434 '71.0.3549.0',
1435 '70.0.3538.15',
1436 '69.0.3497.93',
1437 '69.0.3497.92',
1438 '71.0.3548.1',
1439 '71.0.3548.0',
1440 '70.0.3538.14',
1441 '69.0.3497.91',
1442 '71.0.3547.1',
1443 '71.0.3547.0',
1444 '70.0.3538.13',
1445 '69.0.3497.90',
1446 '71.0.3546.2',
1447 '69.0.3497.89',
1448 '71.0.3546.1',
1449 '71.0.3546.0',
1450 '70.0.3538.12',
1451 '69.0.3497.88',
1452 '71.0.3545.4',
1453 '71.0.3545.3',
1454 '71.0.3545.2',
1455 '71.0.3545.1',
1456 '71.0.3545.0',
1457 '70.0.3538.11',
1458 '69.0.3497.87',
1459 '71.0.3544.5',
1460 '71.0.3544.4',
1461 '71.0.3544.3',
1462 '71.0.3544.2',
1463 '71.0.3544.1',
1464 '71.0.3544.0',
1465 '69.0.3497.86',
1466 '70.0.3538.10',
1467 '69.0.3497.85',
1468 '70.0.3538.9',
1469 '69.0.3497.84',
1470 '71.0.3543.4',
1471 '70.0.3538.8',
1472 '71.0.3543.3',
1473 '71.0.3543.2',
1474 '71.0.3543.1',
1475 '71.0.3543.0',
1476 '70.0.3538.7',
1477 '69.0.3497.83',
1478 '71.0.3542.2',
1479 '71.0.3542.1',
1480 '71.0.3542.0',
1481 '70.0.3538.6',
1482 '69.0.3497.82',
1483 '69.0.3497.81',
1484 '71.0.3541.1',
1485 '71.0.3541.0',
1486 '70.0.3538.5',
1487 '69.0.3497.80',
1488 '71.0.3540.1',
1489 '71.0.3540.0',
1490 '70.0.3538.4',
1491 '69.0.3497.79',
1492 '70.0.3538.3',
1493 '71.0.3539.1',
1494 '71.0.3539.0',
1495 '69.0.3497.78',
1496 '68.0.3440.134',
1497 '69.0.3497.77',
1498 '70.0.3538.2',
1499 '70.0.3538.1',
1500 '70.0.3538.0',
1501 '69.0.3497.76',
1502 '68.0.3440.133',
1503 '69.0.3497.75',
1504 '70.0.3537.2',
1505 '70.0.3537.1',
1506 '70.0.3537.0',
1507 '69.0.3497.74',
1508 '68.0.3440.132',
1509 '70.0.3536.0',
1510 '70.0.3535.5',
1511 '70.0.3535.4',
1512 '70.0.3535.3',
1513 '69.0.3497.73',
1514 '68.0.3440.131',
1515 '70.0.3532.8',
1516 '70.0.3532.7',
1517 '69.0.3497.72',
1518 '69.0.3497.71',
1519 '70.0.3535.2',
1520 '70.0.3535.1',
1521 '70.0.3535.0',
1522 '69.0.3497.70',
1523 '68.0.3440.130',
1524 '69.0.3497.69',
1525 '68.0.3440.129',
1526 '70.0.3534.4',
1527 '70.0.3534.3',
1528 '70.0.3534.2',
1529 '70.0.3534.1',
1530 '70.0.3534.0',
1531 '69.0.3497.68',
1532 '68.0.3440.128',
1533 '70.0.3533.2',
1534 '70.0.3533.1',
1535 '70.0.3533.0',
1536 '69.0.3497.67',
1537 '68.0.3440.127',
1538 '70.0.3532.6',
1539 '70.0.3532.5',
1540 '70.0.3532.4',
1541 '69.0.3497.66',
1542 '68.0.3440.126',
1543 '70.0.3532.3',
1544 '70.0.3532.2',
1545 '70.0.3532.1',
1546 '69.0.3497.60',
1547 '69.0.3497.65',
1548 '69.0.3497.64',
1549 '70.0.3532.0',
1550 '70.0.3531.0',
1551 '70.0.3530.4',
1552 '70.0.3530.3',
1553 '70.0.3530.2',
1554 '69.0.3497.58',
1555 '68.0.3440.125',
1556 '69.0.3497.57',
1557 '69.0.3497.56',
1558 '69.0.3497.55',
1559 '69.0.3497.54',
1560 '70.0.3530.1',
1561 '70.0.3530.0',
1562 '69.0.3497.53',
1563 '68.0.3440.124',
1564 '69.0.3497.52',
1565 '70.0.3529.3',
1566 '70.0.3529.2',
1567 '70.0.3529.1',
1568 '70.0.3529.0',
1569 '69.0.3497.51',
1570 '70.0.3528.4',
1571 '68.0.3440.123',
1572 '70.0.3528.3',
1573 '70.0.3528.2',
1574 '70.0.3528.1',
1575 '70.0.3528.0',
1576 '69.0.3497.50',
1577 '68.0.3440.122',
1578 '70.0.3527.1',
1579 '70.0.3527.0',
1580 '69.0.3497.49',
1581 '68.0.3440.121',
1582 '70.0.3526.1',
1583 '70.0.3526.0',
1584 '68.0.3440.120',
1585 '69.0.3497.48',
1586 '69.0.3497.47',
1587 '68.0.3440.119',
1588 '68.0.3440.118',
1589 '70.0.3525.5',
1590 '70.0.3525.4',
1591 '70.0.3525.3',
1592 '68.0.3440.117',
1593 '69.0.3497.46',
1594 '70.0.3525.2',
1595 '70.0.3525.1',
1596 '70.0.3525.0',
1597 '69.0.3497.45',
1598 '68.0.3440.116',
1599 '70.0.3524.4',
1600 '70.0.3524.3',
1601 '69.0.3497.44',
1602 '70.0.3524.2',
1603 '70.0.3524.1',
1604 '70.0.3524.0',
1605 '70.0.3523.2',
1606 '69.0.3497.43',
1607 '68.0.3440.115',
1608 '70.0.3505.9',
1609 '69.0.3497.42',
1610 '70.0.3505.8',
1611 '70.0.3523.1',
1612 '70.0.3523.0',
1613 '69.0.3497.41',
1614 '68.0.3440.114',
1615 '70.0.3505.7',
1616 '69.0.3497.40',
1617 '70.0.3522.1',
1618 '70.0.3522.0',
1619 '70.0.3521.2',
1620 '69.0.3497.39',
1621 '68.0.3440.113',
1622 '70.0.3505.6',
1623 '70.0.3521.1',
1624 '70.0.3521.0',
1625 '69.0.3497.38',
1626 '68.0.3440.112',
1627 '70.0.3520.1',
1628 '70.0.3520.0',
1629 '69.0.3497.37',
1630 '68.0.3440.111',
1631 '70.0.3519.3',
1632 '70.0.3519.2',
1633 '70.0.3519.1',
1634 '70.0.3519.0',
1635 '69.0.3497.36',
1636 '68.0.3440.110',
1637 '70.0.3518.1',
1638 '70.0.3518.0',
1639 '69.0.3497.35',
1640 '69.0.3497.34',
1641 '68.0.3440.109',
1642 '70.0.3517.1',
1643 '70.0.3517.0',
1644 '69.0.3497.33',
1645 '68.0.3440.108',
1646 '69.0.3497.32',
1647 '70.0.3516.3',
1648 '70.0.3516.2',
1649 '70.0.3516.1',
1650 '70.0.3516.0',
1651 '69.0.3497.31',
1652 '68.0.3440.107',
1653 '70.0.3515.4',
1654 '68.0.3440.106',
1655 '70.0.3515.3',
1656 '70.0.3515.2',
1657 '70.0.3515.1',
1658 '70.0.3515.0',
1659 '69.0.3497.30',
1660 '68.0.3440.105',
1661 '68.0.3440.104',
1662 '70.0.3514.2',
1663 '70.0.3514.1',
1664 '70.0.3514.0',
1665 '69.0.3497.29',
1666 '68.0.3440.103',
1667 '70.0.3513.1',
1668 '70.0.3513.0',
1669 '69.0.3497.28',
1670 )
1671 return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
1672
1673
1674 std_headers = {
1675 'User-Agent': random_user_agent(),
1676 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
1677 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
1678 'Accept-Encoding': 'gzip, deflate',
1679 'Accept-Language': 'en-us,en;q=0.5',
1680 }
1681
1682
1683 USER_AGENTS = {
1684 'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
1685 }
1686
1687
1688 NO_DEFAULT = object()
1689 IDENTITY = lambda x: x
1690
1691 ENGLISH_MONTH_NAMES = [
1692 'January', 'February', 'March', 'April', 'May', 'June',
1693 'July', 'August', 'September', 'October', 'November', 'December']
1694
1695 MONTH_NAMES = {
1696 'en': ENGLISH_MONTH_NAMES,
1697 'fr': [
1698 'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
1699 'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
1700 }
1701
1702 # Timezone names for RFC2822 obs-zone
1703 # From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
1704 TIMEZONE_NAMES = {
1705 'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
1706 'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
1707 'EST': -5, 'EDT': -4, # Eastern
1708 'CST': -6, 'CDT': -5, # Central
1709 'MST': -7, 'MDT': -6, # Mountain
1710 'PST': -8, 'PDT': -7 # Pacific
1711 }
1712
1713 KNOWN_EXTENSIONS = (
1714 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
1715 'flv', 'f4v', 'f4a', 'f4b',
1716 'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
1717 'mkv', 'mka', 'mk3d',
1718 'avi', 'divx',
1719 'mov',
1720 'asf', 'wmv', 'wma',
1721 '3gp', '3g2',
1722 'mp3',
1723 'flac',
1724 'ape',
1725 'wav',
1726 'f4f', 'f4m', 'm3u8', 'smil')
1727
1728 # needed for sanitizing filenames in restricted mode
1729 ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
1730 itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
1731 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
1732
1733 DATE_FORMATS = (
1734 '%d %B %Y',
1735 '%d %b %Y',
1736 '%B %d %Y',
1737 '%B %dst %Y',
1738 '%B %dnd %Y',
1739 '%B %drd %Y',
1740 '%B %dth %Y',
1741 '%b %d %Y',
1742 '%b %dst %Y',
1743 '%b %dnd %Y',
1744 '%b %drd %Y',
1745 '%b %dth %Y',
1746 '%b %dst %Y %I:%M',
1747 '%b %dnd %Y %I:%M',
1748 '%b %drd %Y %I:%M',
1749 '%b %dth %Y %I:%M',
1750 '%Y %m %d',
1751 '%Y-%m-%d',
1752 '%Y.%m.%d.',
1753 '%Y/%m/%d',
1754 '%Y/%m/%d %H:%M',
1755 '%Y/%m/%d %H:%M:%S',
1756 '%Y%m%d%H%M',
1757 '%Y%m%d%H%M%S',
1758 '%Y%m%d',
1759 '%Y-%m-%d %H:%M',
1760 '%Y-%m-%d %H:%M:%S',
1761 '%Y-%m-%d %H:%M:%S.%f',
1762 '%Y-%m-%d %H:%M:%S:%f',
1763 '%d.%m.%Y %H:%M',
1764 '%d.%m.%Y %H.%M',
1765 '%Y-%m-%dT%H:%M:%SZ',
1766 '%Y-%m-%dT%H:%M:%S.%fZ',
1767 '%Y-%m-%dT%H:%M:%S.%f0Z',
1768 '%Y-%m-%dT%H:%M:%S',
1769 '%Y-%m-%dT%H:%M:%S.%f',
1770 '%Y-%m-%dT%H:%M',
1771 '%b %d %Y at %H:%M',
1772 '%b %d %Y at %H:%M:%S',
1773 '%B %d %Y at %H:%M',
1774 '%B %d %Y at %H:%M:%S',
1775 '%H:%M %d-%b-%Y',
1776 )
1777
1778 DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
1779 DATE_FORMATS_DAY_FIRST.extend([
1780 '%d-%m-%Y',
1781 '%d.%m.%Y',
1782 '%d.%m.%y',
1783 '%d/%m/%Y',
1784 '%d/%m/%y',
1785 '%d/%m/%Y %H:%M:%S',
1786 '%d-%m-%Y %H:%M',
1787 ])
1788
1789 DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
1790 DATE_FORMATS_MONTH_FIRST.extend([
1791 '%m-%d-%Y',
1792 '%m.%d.%Y',
1793 '%m/%d/%Y',
1794 '%m/%d/%y',
1795 '%m/%d/%Y %H:%M:%S',
1796 ])
1797
1798 PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
1799 JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
1800
1801
1802 def preferredencoding():
1803 """Get preferred encoding.
1804
1805 Returns the best encoding scheme for the system, based on
1806 locale.getpreferredencoding() and some further tweaks.
1807 """
1808 try:
1809 pref = locale.getpreferredencoding()
1810 'TEST'.encode(pref)
1811 except Exception:
1812 pref = 'UTF-8'
1813
1814 return pref
1815
1816
1817 def write_json_file(obj, fn):
1818 """ Encode obj as JSON and write it to fn, atomically if possible """
1819
1820 fn = encodeFilename(fn)
1821 if sys.version_info < (3, 0) and sys.platform != 'win32':
1822 encoding = get_filesystem_encoding()
1823 # os.path.basename returns a bytes object, but NamedTemporaryFile
1824 # will fail if the filename contains non ascii characters unless we
1825 # use a unicode object
1826 path_basename = lambda f: os.path.basename(fn).decode(encoding)
1827 # the same for os.path.dirname
1828 path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
1829 else:
1830 path_basename = os.path.basename
1831 path_dirname = os.path.dirname
1832
1833 args = {
1834 'suffix': '.tmp',
1835 'prefix': path_basename(fn) + '.',
1836 'dir': path_dirname(fn),
1837 'delete': False,
1838 }
1839
1840 # In Python 2.x, json.dump expects a bytestream.
1841 # In Python 3.x, it writes to a character stream
1842 if sys.version_info < (3, 0):
1843 args['mode'] = 'wb'
1844 else:
1845 args.update({
1846 'mode': 'w',
1847 'encoding': 'utf-8',
1848 })
1849
1850 tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
1851
1852 try:
1853 with tf:
1854 json.dump(obj, tf)
1855 if sys.platform == 'win32':
1856 # Need to remove existing file on Windows, else os.rename raises
1857 # WindowsError or FileExistsError.
1858 try:
1859 os.unlink(fn)
1860 except OSError:
1861 pass
1862 try:
1863 mask = os.umask(0)
1864 os.umask(mask)
1865 os.chmod(tf.name, 0o666 & ~mask)
1866 except OSError:
1867 pass
1868 os.rename(tf.name, fn)
1869 except Exception:
1870 try:
1871 os.remove(tf.name)
1872 except OSError:
1873 pass
1874 raise
1875
1876
1877 if sys.version_info >= (2, 7):
1878 def find_xpath_attr(node, xpath, key, val=None):
1879 """ Find the xpath xpath[@key=val] """
1880 assert re.match(r'^[a-zA-Z_-]+$', key)
1881 expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
1882 return node.find(expr)
1883 else:
1884 def find_xpath_attr(node, xpath, key, val=None):
1885 for f in node.findall(compat_xpath(xpath)):
1886 if key not in f.attrib:
1887 continue
1888 if val is None or f.attrib.get(key) == val:
1889 return f
1890 return None
1891
1892 # On python2.6 the xml.etree.ElementTree.Element methods don't support
1893 # the namespace parameter
1894
1895
1896 def xpath_with_ns(path, ns_map):
1897 components = [c.split(':') for c in path.split('/')]
1898 replaced = []
1899 for c in components:
1900 if len(c) == 1:
1901 replaced.append(c[0])
1902 else:
1903 ns, tag = c
1904 replaced.append('{%s}%s' % (ns_map[ns], tag))
1905 return '/'.join(replaced)
1906
1907
1908 def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1909 def _find_xpath(xpath):
1910 return node.find(compat_xpath(xpath))
1911
1912 if isinstance(xpath, (str, compat_str)):
1913 n = _find_xpath(xpath)
1914 else:
1915 for xp in xpath:
1916 n = _find_xpath(xp)
1917 if n is not None:
1918 break
1919
1920 if n is None:
1921 if default is not NO_DEFAULT:
1922 return default
1923 elif fatal:
1924 name = xpath if name is None else name
1925 raise ExtractorError('Could not find XML element %s' % name)
1926 else:
1927 return None
1928 return n
1929
1930
1931 def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
1932 n = xpath_element(node, xpath, name, fatal=fatal, default=default)
1933 if n is None or n == default:
1934 return n
1935 if n.text is None:
1936 if default is not NO_DEFAULT:
1937 return default
1938 elif fatal:
1939 name = xpath if name is None else name
1940 raise ExtractorError('Could not find XML element\'s text %s' % name)
1941 else:
1942 return None
1943 return n.text
1944
1945
1946 def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
1947 n = find_xpath_attr(node, xpath, key)
1948 if n is None:
1949 if default is not NO_DEFAULT:
1950 return default
1951 elif fatal:
1952 name = '%s[@%s]' % (xpath, key) if name is None else name
1953 raise ExtractorError('Could not find XML attribute %s' % name)
1954 else:
1955 return None
1956 return n.attrib[key]
1957
1958
1959 def get_element_by_id(id, html):
1960 """Return the content of the tag with the specified ID in the passed HTML document"""
1961 return get_element_by_attribute('id', id, html)
1962
1963
1964 def get_element_by_class(class_name, html):
1965 """Return the content of the first tag with the specified class in the passed HTML document"""
1966 retval = get_elements_by_class(class_name, html)
1967 return retval[0] if retval else None
1968
1969
1970 def get_element_by_attribute(attribute, value, html, escape_value=True):
1971 retval = get_elements_by_attribute(attribute, value, html, escape_value)
1972 return retval[0] if retval else None
1973
1974
1975 def get_elements_by_class(class_name, html):
1976 """Return the content of all tags with the specified class in the passed HTML document as a list"""
1977 return get_elements_by_attribute(
1978 'class', r'[^\'"]*\b%s\b[^\'"]*' % re.escape(class_name),
1979 html, escape_value=False)
1980
1981
1982 def get_elements_by_attribute(attribute, value, html, escape_value=True):
1983 """Return the content of the tag with the specified attribute in the passed HTML document"""
1984
1985 value = re.escape(value) if escape_value else value
1986
1987 retlist = []
1988 for m in re.finditer(r'''(?xs)
1989 <([a-zA-Z0-9:._-]+)
1990 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1991 \s+%s=['"]?%s['"]?
1992 (?:\s+[a-zA-Z0-9:._-]+(?:=[a-zA-Z0-9:._-]*|="[^"]*"|='[^']*'|))*?
1993 \s*>
1994 (?P<content>.*?)
1995 </\1>
1996 ''' % (re.escape(attribute), value), html):
1997 res = m.group('content')
1998
1999 if res.startswith('"') or res.startswith("'"):
2000 res = res[1:-1]
2001
2002 retlist.append(unescapeHTML(res))
2003
2004 return retlist
2005
2006
2007 class HTMLAttributeParser(compat_HTMLParser):
2008 """Trivial HTML parser to gather the attributes for a single element"""
2009 def __init__(self):
2010 self.attrs = {}
2011 compat_HTMLParser.__init__(self)
2012
2013 def handle_starttag(self, tag, attrs):
2014 self.attrs = dict(attrs)
2015
2016
2017 def extract_attributes(html_element):
2018 """Given a string for an HTML element such as
2019 <el
2020 a="foo" B="bar" c="&98;az" d=boz
2021 empty= noval entity="&"
2022 sq='"' dq="'"
2023 >
2024 Decode and return a dictionary of attributes.
2025 {
2026 'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
2027 'empty': '', 'noval': None, 'entity': '&',
2028 'sq': '"', 'dq': '\''
2029 }.
2030 NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
2031 but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
2032 """
2033 parser = HTMLAttributeParser()
2034 try:
2035 parser.feed(html_element)
2036 parser.close()
2037 # Older Python may throw HTMLParseError in case of malformed HTML
2038 except compat_HTMLParseError:
2039 pass
2040 return parser.attrs
2041
2042
2043 def clean_html(html):
2044 """Clean an HTML snippet into a readable string"""
2045
2046 if html is None: # Convenience for sanitizing descriptions etc.
2047 return html
2048
2049 # Newline vs <br />
2050 html = html.replace('\n', ' ')
2051 html = re.sub(r'(?u)\s*<\s*br\s*/?\s*>\s*', '\n', html)
2052 html = re.sub(r'(?u)<\s*/\s*p\s*>\s*<\s*p[^>]*>', '\n', html)
2053 # Strip html tags
2054 html = re.sub('<.*?>', '', html)
2055 # Replace html entities
2056 html = unescapeHTML(html)
2057 return html.strip()
2058
2059
2060 def sanitize_open(filename, open_mode):
2061 """Try to open the given filename, and slightly tweak it if this fails.
2062
2063 Attempts to open the given filename. If this fails, it tries to change
2064 the filename slightly, step by step, until it's either able to open it
2065 or it fails and raises a final exception, like the standard open()
2066 function.
2067
2068 It returns the tuple (stream, definitive_file_name).
2069 """
2070 try:
2071 if filename == '-':
2072 if sys.platform == 'win32':
2073 import msvcrt
2074 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
2075 return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
2076 stream = open(encodeFilename(filename), open_mode)
2077 return (stream, filename)
2078 except (IOError, OSError) as err:
2079 if err.errno in (errno.EACCES,):
2080 raise
2081
2082 # In case of error, try to remove win32 forbidden chars
2083 alt_filename = sanitize_path(filename)
2084 if alt_filename == filename:
2085 raise
2086 else:
2087 # An exception here should be caught in the caller
2088 stream = open(encodeFilename(alt_filename), open_mode)
2089 return (stream, alt_filename)
2090
2091
2092 def timeconvert(timestr):
2093 """Convert RFC 2822 defined time string into system timestamp"""
2094 timestamp = None
2095 timetuple = email.utils.parsedate_tz(timestr)
2096 if timetuple is not None:
2097 timestamp = email.utils.mktime_tz(timetuple)
2098 return timestamp
2099
2100
2101 def sanitize_filename(s, restricted=False, is_id=False):
2102 """Sanitizes a string so it could be used as part of a filename.
2103 If restricted is set, use a stricter subset of allowed characters.
2104 Set is_id if this is not an arbitrary string, but an ID that should be kept
2105 if possible.
2106 """
2107 def replace_insane(char):
2108 if restricted and char in ACCENT_CHARS:
2109 return ACCENT_CHARS[char]
2110 if char == '?' or ord(char) < 32 or ord(char) == 127:
2111 return ''
2112 elif char == '"':
2113 return '' if restricted else '\''
2114 elif char == ':':
2115 return '_-' if restricted else ' -'
2116 elif char in '\\/|*<>':
2117 return '_'
2118 if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
2119 return '_'
2120 if restricted and ord(char) > 127:
2121 return '_'
2122 return char
2123
2124 # Replace look-alike Unicode glyphs
2125 if restricted and not is_id:
2126 s = unicodedata.normalize('NFKC', s)
2127 # Handle timestamps
2128 s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
2129 result = ''.join(map(replace_insane, s))
2130 if not is_id:
2131 while '__' in result:
2132 result = result.replace('__', '_')
2133 result = result.strip('_')
2134 # Common case of "Foreign band name - English song title"
2135 if restricted and result.startswith('-_'):
2136 result = result[2:]
2137 if result.startswith('-'):
2138 result = '_' + result[len('-'):]
2139 result = result.lstrip('.')
2140 if not result:
2141 result = '_'
2142 return result
2143
2144
2145 def sanitize_path(s):
2146 """Sanitizes and normalizes path on Windows"""
2147 if sys.platform != 'win32':
2148 return s
2149 drive_or_unc, _ = os.path.splitdrive(s)
2150 if sys.version_info < (2, 7) and not drive_or_unc:
2151 drive_or_unc, _ = os.path.splitunc(s)
2152 norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
2153 if drive_or_unc:
2154 norm_path.pop(0)
2155 sanitized_path = [
2156 path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\|\\?\*]|[\s.]$)', '#', path_part)
2157 for path_part in norm_path]
2158 if drive_or_unc:
2159 sanitized_path.insert(0, drive_or_unc + os.path.sep)
2160 return os.path.join(*sanitized_path)
2161
2162
2163 def sanitize_url(url):
2164 # Prepend protocol-less URLs with `http:` scheme in order to mitigate
2165 # the number of unwanted failures due to missing protocol
2166 if url.startswith('//'):
2167 return 'http:%s' % url
2168 # Fix some common typos seen so far
2169 COMMON_TYPOS = (
2170 # https://github.com/ytdl-org/youtube-dl/issues/15649
2171 (r'^httpss://', r'https://'),
2172 # https://bx1.be/lives/direct-tv/
2173 (r'^rmtp([es]?)://', r'rtmp\1://'),
2174 )
2175 for mistake, fixup in COMMON_TYPOS:
2176 if re.match(mistake, url):
2177 return re.sub(mistake, fixup, url)
2178 return escape_url(url)
2179
2180
2181 def sanitized_Request(url, *args, **kwargs):
2182 return compat_urllib_request.Request(sanitize_url(url), *args, **kwargs)
2183
2184
2185 def expand_path(s):
2186 """Expand shell variables and ~"""
2187 return os.path.expandvars(compat_expanduser(s))
2188
2189
2190 def orderedSet(iterable):
2191 """ Remove all duplicates from the input iterable """
2192 res = []
2193 for el in iterable:
2194 if el not in res:
2195 res.append(el)
2196 return res
2197
2198
2199 def _htmlentity_transform(entity_with_semicolon):
2200 """Transforms an HTML entity to a character."""
2201 entity = entity_with_semicolon[:-1]
2202
2203 # Known non-numeric HTML entity
2204 if entity in compat_html_entities.name2codepoint:
2205 return compat_chr(compat_html_entities.name2codepoint[entity])
2206
2207 # TODO: HTML5 allows entities without a semicolon. For example,
2208 # 'Éric' should be decoded as 'Éric'.
2209 if entity_with_semicolon in compat_html_entities_html5:
2210 return compat_html_entities_html5[entity_with_semicolon]
2211
2212 mobj = re.match(r'#(x[0-9a-fA-F]+|[0-9]+)', entity)
2213 if mobj is not None:
2214 numstr = mobj.group(1)
2215 if numstr.startswith('x'):
2216 base = 16
2217 numstr = '0%s' % numstr
2218 else:
2219 base = 10
2220 # See https://github.com/ytdl-org/youtube-dl/issues/7518
2221 try:
2222 return compat_chr(int(numstr, base))
2223 except ValueError:
2224 pass
2225
2226 # Unknown entity in name, return its literal representation
2227 return '&%s;' % entity
2228
2229
2230 def unescapeHTML(s):
2231 if s is None:
2232 return None
2233 assert type(s) == compat_str
2234
2235 return re.sub(
2236 r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
2237
2238
2239 def process_communicate_or_kill(p, *args, **kwargs):
2240 try:
2241 return p.communicate(*args, **kwargs)
2242 except BaseException: # Including KeyboardInterrupt
2243 p.kill()
2244 p.wait()
2245 raise
2246
2247
2248 def get_subprocess_encoding():
2249 if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2250 # For subprocess calls, encode with locale encoding
2251 # Refer to http://stackoverflow.com/a/9951851/35070
2252 encoding = preferredencoding()
2253 else:
2254 encoding = sys.getfilesystemencoding()
2255 if encoding is None:
2256 encoding = 'utf-8'
2257 return encoding
2258
2259
2260 def encodeFilename(s, for_subprocess=False):
2261 """
2262 @param s The name of the file
2263 """
2264
2265 assert type(s) == compat_str
2266
2267 # Python 3 has a Unicode API
2268 if sys.version_info >= (3, 0):
2269 return s
2270
2271 # Pass '' directly to use Unicode APIs on Windows 2000 and up
2272 # (Detecting Windows NT 4 is tricky because 'major >= 4' would
2273 # match Windows 9x series as well. Besides, NT 4 is obsolete.)
2274 if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
2275 return s
2276
2277 # Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
2278 if sys.platform.startswith('java'):
2279 return s
2280
2281 return s.encode(get_subprocess_encoding(), 'ignore')
2282
2283
2284 def decodeFilename(b, for_subprocess=False):
2285
2286 if sys.version_info >= (3, 0):
2287 return b
2288
2289 if not isinstance(b, bytes):
2290 return b
2291
2292 return b.decode(get_subprocess_encoding(), 'ignore')
2293
2294
2295 def encodeArgument(s):
2296 if not isinstance(s, compat_str):
2297 # Legacy code that uses byte strings
2298 # Uncomment the following line after fixing all post processors
2299 # assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
2300 s = s.decode('ascii')
2301 return encodeFilename(s, True)
2302
2303
2304 def decodeArgument(b):
2305 return decodeFilename(b, True)
2306
2307
2308 def decodeOption(optval):
2309 if optval is None:
2310 return optval
2311 if isinstance(optval, bytes):
2312 optval = optval.decode(preferredencoding())
2313
2314 assert isinstance(optval, compat_str)
2315 return optval
2316
2317
2318 def formatSeconds(secs):
2319 if secs > 3600:
2320 return '%d:%02d:%02d' % (secs // 3600, (secs % 3600) // 60, secs % 60)
2321 elif secs > 60:
2322 return '%d:%02d' % (secs // 60, secs % 60)
2323 else:
2324 return '%d' % secs
2325
2326
2327 def make_HTTPS_handler(params, **kwargs):
2328
2329 # https://www.rfc-editor.org/info/rfc7301
2330 ALPN_PROTOCOLS = ['http/1.1']
2331
2332 def set_alpn_protocols(ctx):
2333 # From https://github.com/yt-dlp/yt-dlp/commit/2c6dcb65fb612fc5bc5c61937bf438d3c473d8d0
2334 # Thanks @coletdjnz
2335 # Some servers may (wrongly) reject requests if ALPN extension is not sent. See:
2336 # https://github.com/python/cpython/issues/85140
2337 # https://github.com/yt-dlp/yt-dlp/issues/3878
2338 try:
2339 ctx.set_alpn_protocols(ALPN_PROTOCOLS)
2340 except (AttributeError, NotImplementedError):
2341 # Python < 2.7.10, not ssl.HAS_ALPN
2342 pass
2343
2344 opts_no_check_certificate = params.get('nocheckcertificate', False)
2345 if hasattr(ssl, 'create_default_context'): # Python >= 3.4 or 2.7.9
2346 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
2347 set_alpn_protocols(context)
2348 if opts_no_check_certificate:
2349 context.check_hostname = False
2350 context.verify_mode = ssl.CERT_NONE
2351
2352 try:
2353 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2354 except TypeError:
2355 # Python 2.7.8
2356 # (create_default_context present but HTTPSHandler has no context=)
2357 pass
2358
2359 if sys.version_info < (3, 2):
2360 return YoutubeDLHTTPSHandler(params, **kwargs)
2361 else: # Python < 3.4
2362 context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
2363 context.verify_mode = (ssl.CERT_NONE
2364 if opts_no_check_certificate
2365 else ssl.CERT_REQUIRED)
2366 context.set_default_verify_paths()
2367 set_alpn_protocols(context)
2368 return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
2369
2370
2371 def bug_reports_message():
2372 if ytdl_is_updateable():
2373 update_cmd = 'type youtube-dl -U to update'
2374 else:
2375 update_cmd = 'see https://yt-dl.org/update on how to update'
2376 msg = '; please report this issue on https://yt-dl.org/bug .'
2377 msg += ' Make sure you are using the latest version; %s.' % update_cmd
2378 msg += ' Be sure to call youtube-dl with the --verbose flag and include its complete output.'
2379 return msg
2380
2381
2382 class YoutubeDLError(Exception):
2383 """Base exception for YoutubeDL errors."""
2384 pass
2385
2386
2387 class ExtractorError(YoutubeDLError):
2388 """Error during info extraction."""
2389
2390 def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None):
2391 """ tb, if given, is the original traceback (so that it can be printed out).
2392 If expected is set, this is a normal error message and most likely not a bug in youtube-dl.
2393 """
2394
2395 if sys.exc_info()[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError):
2396 expected = True
2397 if video_id is not None:
2398 msg = video_id + ': ' + msg
2399 if cause:
2400 msg += ' (caused by %r)' % cause
2401 if not expected:
2402 msg += bug_reports_message()
2403 super(ExtractorError, self).__init__(msg)
2404
2405 self.traceback = tb
2406 self.exc_info = sys.exc_info() # preserve original exception
2407 self.cause = cause
2408 self.video_id = video_id
2409
2410 def format_traceback(self):
2411 if self.traceback is None:
2412 return None
2413 return ''.join(traceback.format_tb(self.traceback))
2414
2415
2416 class UnsupportedError(ExtractorError):
2417 def __init__(self, url):
2418 super(UnsupportedError, self).__init__(
2419 'Unsupported URL: %s' % url, expected=True)
2420 self.url = url
2421
2422
2423 class RegexNotFoundError(ExtractorError):
2424 """Error when a regex didn't match"""
2425 pass
2426
2427
2428 class GeoRestrictedError(ExtractorError):
2429 """Geographic restriction Error exception.
2430
2431 This exception may be thrown when a video is not available from your
2432 geographic location due to geographic restrictions imposed by a website.
2433 """
2434 def __init__(self, msg, countries=None):
2435 super(GeoRestrictedError, self).__init__(msg, expected=True)
2436 self.msg = msg
2437 self.countries = countries
2438
2439
2440 class DownloadError(YoutubeDLError):
2441 """Download Error exception.
2442
2443 This exception may be thrown by FileDownloader objects if they are not
2444 configured to continue on errors. They will contain the appropriate
2445 error message.
2446 """
2447
2448 def __init__(self, msg, exc_info=None):
2449 """ exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
2450 super(DownloadError, self).__init__(msg)
2451 self.exc_info = exc_info
2452
2453
2454 class SameFileError(YoutubeDLError):
2455 """Same File exception.
2456
2457 This exception will be thrown by FileDownloader objects if they detect
2458 multiple files would have to be downloaded to the same file on disk.
2459 """
2460 pass
2461
2462
2463 class PostProcessingError(YoutubeDLError):
2464 """Post Processing exception.
2465
2466 This exception may be raised by PostProcessor's .run() method to
2467 indicate an error in the postprocessing task.
2468 """
2469
2470 def __init__(self, msg):
2471 super(PostProcessingError, self).__init__(msg)
2472 self.msg = msg
2473
2474
2475 class MaxDownloadsReached(YoutubeDLError):
2476 """ --max-downloads limit has been reached. """
2477 pass
2478
2479
2480 class UnavailableVideoError(YoutubeDLError):
2481 """Unavailable Format exception.
2482
2483 This exception will be thrown when a video is requested
2484 in a format that is not available for that video.
2485 """
2486 pass
2487
2488
2489 class ContentTooShortError(YoutubeDLError):
2490 """Content Too Short exception.
2491
2492 This exception may be raised by FileDownloader objects when a file they
2493 download is too small for what the server announced first, indicating
2494 the connection was probably interrupted.
2495 """
2496
2497 def __init__(self, downloaded, expected):
2498 super(ContentTooShortError, self).__init__(
2499 'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
2500 )
2501 # Both in bytes
2502 self.downloaded = downloaded
2503 self.expected = expected
2504
2505
2506 class XAttrMetadataError(YoutubeDLError):
2507 def __init__(self, code=None, msg='Unknown error'):
2508 super(XAttrMetadataError, self).__init__(msg)
2509 self.code = code
2510 self.msg = msg
2511
2512 # Parsing code and msg
2513 if (self.code in (errno.ENOSPC, errno.EDQUOT)
2514 or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
2515 self.reason = 'NO_SPACE'
2516 elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
2517 self.reason = 'VALUE_TOO_LONG'
2518 else:
2519 self.reason = 'NOT_SUPPORTED'
2520
2521
2522 class XAttrUnavailableError(YoutubeDLError):
2523 pass
2524
2525
2526 def _create_http_connection(ydl_handler, http_class, is_https, *args, **kwargs):
2527 # Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
2528 # expected HTTP responses to meet HTTP/1.0 or later (see also
2529 # https://github.com/ytdl-org/youtube-dl/issues/6727)
2530 if sys.version_info < (3, 0):
2531 kwargs['strict'] = True
2532 hc = http_class(*args, **compat_kwargs(kwargs))
2533 source_address = ydl_handler._params.get('source_address')
2534
2535 if source_address is not None:
2536 # This is to workaround _create_connection() from socket where it will try all
2537 # address data from getaddrinfo() including IPv6. This filters the result from
2538 # getaddrinfo() based on the source_address value.
2539 # This is based on the cpython socket.create_connection() function.
2540 # https://github.com/python/cpython/blob/master/Lib/socket.py#L691
2541 def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
2542 host, port = address
2543 err = None
2544 addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
2545 af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
2546 ip_addrs = [addr for addr in addrs if addr[0] == af]
2547 if addrs and not ip_addrs:
2548 ip_version = 'v4' if af == socket.AF_INET else 'v6'
2549 raise socket.error(
2550 "No remote IP%s addresses available for connect, can't use '%s' as source address"
2551 % (ip_version, source_address[0]))
2552 for res in ip_addrs:
2553 af, socktype, proto, canonname, sa = res
2554 sock = None
2555 try:
2556 sock = socket.socket(af, socktype, proto)
2557 if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
2558 sock.settimeout(timeout)
2559 sock.bind(source_address)
2560 sock.connect(sa)
2561 err = None # Explicitly break reference cycle
2562 return sock
2563 except socket.error as _:
2564 err = _
2565 if sock is not None:
2566 sock.close()
2567 if err is not None:
2568 raise err
2569 else:
2570 raise socket.error('getaddrinfo returns an empty list')
2571 if hasattr(hc, '_create_connection'):
2572 hc._create_connection = _create_connection
2573 sa = (source_address, 0)
2574 if hasattr(hc, 'source_address'): # Python 2.7+
2575 hc.source_address = sa
2576 else: # Python 2.6
2577 def _hc_connect(self, *args, **kwargs):
2578 sock = _create_connection(
2579 (self.host, self.port), self.timeout, sa)
2580 if is_https:
2581 self.sock = ssl.wrap_socket(
2582 sock, self.key_file, self.cert_file,
2583 ssl_version=ssl.PROTOCOL_TLSv1)
2584 else:
2585 self.sock = sock
2586 hc.connect = functools.partial(_hc_connect, hc)
2587
2588 return hc
2589
2590
2591 def handle_youtubedl_headers(headers):
2592 filtered_headers = headers
2593
2594 if 'Youtubedl-no-compression' in filtered_headers:
2595 filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
2596 del filtered_headers['Youtubedl-no-compression']
2597
2598 return filtered_headers
2599
2600
2601 class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
2602 """Handler for HTTP requests and responses.
2603
2604 This class, when installed with an OpenerDirector, automatically adds
2605 the standard headers to every HTTP request and handles gzipped and
2606 deflated responses from web servers. If compression is to be avoided in
2607 a particular request, the original request in the program code only has
2608 to include the HTTP header "Youtubedl-no-compression", which will be
2609 removed before making the real request.
2610
2611 Part of this code was copied from:
2612
2613 http://techknack.net/python-urllib2-handlers/
2614
2615 Andrew Rowls, the author of that code, agreed to release it to the
2616 public domain.
2617 """
2618
2619 def __init__(self, params, *args, **kwargs):
2620 compat_urllib_request.HTTPHandler.__init__(self, *args, **kwargs)
2621 self._params = params
2622
2623 def http_open(self, req):
2624 conn_class = compat_http_client.HTTPConnection
2625
2626 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2627 if socks_proxy:
2628 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2629 del req.headers['Ytdl-socks-proxy']
2630
2631 return self.do_open(functools.partial(
2632 _create_http_connection, self, conn_class, False),
2633 req)
2634
2635 @staticmethod
2636 def deflate(data):
2637 try:
2638 return zlib.decompress(data, -zlib.MAX_WBITS)
2639 except zlib.error:
2640 return zlib.decompress(data)
2641
2642 def http_request(self, req):
2643 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
2644 # always respected by websites, some tend to give out URLs with non percent-encoded
2645 # non-ASCII characters (see telemb.py, ard.py [#3412])
2646 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
2647 # To work around aforementioned issue we will replace request's original URL with
2648 # percent-encoded one
2649 # Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
2650 # the code of this workaround has been moved here from YoutubeDL.urlopen()
2651 url = req.get_full_url()
2652 url_escaped = escape_url(url)
2653
2654 # Substitute URL if any change after escaping
2655 if url != url_escaped:
2656 req = update_Request(req, url=url_escaped)
2657
2658 for h, v in std_headers.items():
2659 # Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
2660 # The dict keys are capitalized because of this bug by urllib
2661 if h.capitalize() not in req.headers:
2662 req.add_header(h, v)
2663
2664 req.headers = handle_youtubedl_headers(req.headers)
2665
2666 if sys.version_info < (2, 7) and '#' in req.get_full_url():
2667 # Python 2.6 is brain-dead when it comes to fragments
2668 req._Request__original = req._Request__original.partition('#')[0]
2669 req._Request__r_type = req._Request__r_type.partition('#')[0]
2670
2671 return req
2672
2673 def http_response(self, req, resp):
2674 old_resp = resp
2675 # gzip
2676 if resp.headers.get('Content-encoding', '') == 'gzip':
2677 content = resp.read()
2678 gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
2679 try:
2680 uncompressed = io.BytesIO(gz.read())
2681 except IOError as original_ioerror:
2682 # There may be junk add the end of the file
2683 # See http://stackoverflow.com/q/4928560/35070 for details
2684 for i in range(1, 1024):
2685 try:
2686 gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
2687 uncompressed = io.BytesIO(gz.read())
2688 except IOError:
2689 continue
2690 break
2691 else:
2692 raise original_ioerror
2693 resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
2694 resp.msg = old_resp.msg
2695 del resp.headers['Content-encoding']
2696 # deflate
2697 if resp.headers.get('Content-encoding', '') == 'deflate':
2698 gz = io.BytesIO(self.deflate(resp.read()))
2699 resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
2700 resp.msg = old_resp.msg
2701 del resp.headers['Content-encoding']
2702 # Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
2703 # https://github.com/ytdl-org/youtube-dl/issues/6457).
2704 if 300 <= resp.code < 400:
2705 location = resp.headers.get('Location')
2706 if location:
2707 # As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
2708 if sys.version_info >= (3, 0):
2709 location = location.encode('iso-8859-1').decode('utf-8')
2710 else:
2711 location = location.decode('utf-8')
2712 location_escaped = escape_url(location)
2713 if location != location_escaped:
2714 del resp.headers['Location']
2715 if sys.version_info < (3, 0):
2716 location_escaped = location_escaped.encode('utf-8')
2717 resp.headers['Location'] = location_escaped
2718 return resp
2719
2720 https_request = http_request
2721 https_response = http_response
2722
2723
2724 def make_socks_conn_class(base_class, socks_proxy):
2725 assert issubclass(base_class, (
2726 compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
2727
2728 url_components = compat_urlparse.urlparse(socks_proxy)
2729 if url_components.scheme.lower() == 'socks5':
2730 socks_type = ProxyType.SOCKS5
2731 elif url_components.scheme.lower() in ('socks', 'socks4'):
2732 socks_type = ProxyType.SOCKS4
2733 elif url_components.scheme.lower() == 'socks4a':
2734 socks_type = ProxyType.SOCKS4A
2735
2736 def unquote_if_non_empty(s):
2737 if not s:
2738 return s
2739 return compat_urllib_parse_unquote_plus(s)
2740
2741 proxy_args = (
2742 socks_type,
2743 url_components.hostname, url_components.port or 1080,
2744 True, # Remote DNS
2745 unquote_if_non_empty(url_components.username),
2746 unquote_if_non_empty(url_components.password),
2747 )
2748
2749 class SocksConnection(base_class):
2750 def connect(self):
2751 self.sock = sockssocket()
2752 self.sock.setproxy(*proxy_args)
2753 if type(self.timeout) in (int, float):
2754 self.sock.settimeout(self.timeout)
2755 self.sock.connect((self.host, self.port))
2756
2757 if isinstance(self, compat_http_client.HTTPSConnection):
2758 if hasattr(self, '_context'): # Python > 2.6
2759 self.sock = self._context.wrap_socket(
2760 self.sock, server_hostname=self.host)
2761 else:
2762 self.sock = ssl.wrap_socket(self.sock)
2763
2764 return SocksConnection
2765
2766
2767 class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
2768 def __init__(self, params, https_conn_class=None, *args, **kwargs):
2769 compat_urllib_request.HTTPSHandler.__init__(self, *args, **kwargs)
2770 self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
2771 self._params = params
2772
2773 def https_open(self, req):
2774 kwargs = {}
2775 conn_class = self._https_conn_class
2776
2777 if hasattr(self, '_context'): # python > 2.6
2778 kwargs['context'] = self._context
2779 if hasattr(self, '_check_hostname'): # python 3.x
2780 kwargs['check_hostname'] = self._check_hostname
2781
2782 socks_proxy = req.headers.get('Ytdl-socks-proxy')
2783 if socks_proxy:
2784 conn_class = make_socks_conn_class(conn_class, socks_proxy)
2785 del req.headers['Ytdl-socks-proxy']
2786
2787 return self.do_open(functools.partial(
2788 _create_http_connection, self, conn_class, True),
2789 req, **kwargs)
2790
2791
2792 class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
2793 """
2794 See [1] for cookie file format.
2795
2796 1. https://curl.haxx.se/docs/http-cookies.html
2797 """
2798 _HTTPONLY_PREFIX = '#HttpOnly_'
2799 _ENTRY_LEN = 7
2800 _HEADER = '''# Netscape HTTP Cookie File
2801 # This file is generated by youtube-dl. Do not edit.
2802
2803 '''
2804 _CookieFileEntry = collections.namedtuple(
2805 'CookieFileEntry',
2806 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
2807
2808 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2809 """
2810 Save cookies to a file.
2811
2812 Most of the code is taken from CPython 3.8 and slightly adapted
2813 to support cookie files with UTF-8 in both python 2 and 3.
2814 """
2815 if filename is None:
2816 if self.filename is not None:
2817 filename = self.filename
2818 else:
2819 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2820
2821 # Store session cookies with `expires` set to 0 instead of an empty
2822 # string
2823 for cookie in self:
2824 if cookie.expires is None:
2825 cookie.expires = 0
2826
2827 with io.open(filename, 'w', encoding='utf-8') as f:
2828 f.write(self._HEADER)
2829 now = time.time()
2830 for cookie in self:
2831 if not ignore_discard and cookie.discard:
2832 continue
2833 if not ignore_expires and cookie.is_expired(now):
2834 continue
2835 if cookie.secure:
2836 secure = 'TRUE'
2837 else:
2838 secure = 'FALSE'
2839 if cookie.domain.startswith('.'):
2840 initial_dot = 'TRUE'
2841 else:
2842 initial_dot = 'FALSE'
2843 if cookie.expires is not None:
2844 expires = compat_str(cookie.expires)
2845 else:
2846 expires = ''
2847 if cookie.value is None:
2848 # cookies.txt regards 'Set-Cookie: foo' as a cookie
2849 # with no name, whereas http.cookiejar regards it as a
2850 # cookie with no value.
2851 name = ''
2852 value = cookie.name
2853 else:
2854 name = cookie.name
2855 value = cookie.value
2856 f.write(
2857 '\t'.join([cookie.domain, initial_dot, cookie.path,
2858 secure, expires, name, value]) + '\n')
2859
2860 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
2861 """Load cookies from a file."""
2862 if filename is None:
2863 if self.filename is not None:
2864 filename = self.filename
2865 else:
2866 raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
2867
2868 def prepare_line(line):
2869 if line.startswith(self._HTTPONLY_PREFIX):
2870 line = line[len(self._HTTPONLY_PREFIX):]
2871 # comments and empty lines are fine
2872 if line.startswith('#') or not line.strip():
2873 return line
2874 cookie_list = line.split('\t')
2875 if len(cookie_list) != self._ENTRY_LEN:
2876 raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
2877 cookie = self._CookieFileEntry(*cookie_list)
2878 if cookie.expires_at and not cookie.expires_at.isdigit():
2879 raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
2880 return line
2881
2882 cf = io.StringIO()
2883 with io.open(filename, encoding='utf-8') as f:
2884 for line in f:
2885 try:
2886 cf.write(prepare_line(line))
2887 except compat_cookiejar.LoadError as e:
2888 write_string(
2889 'WARNING: skipping cookie file entry due to %s: %r\n'
2890 % (e, line), sys.stderr)
2891 continue
2892 cf.seek(0)
2893 self._really_load(cf, filename, ignore_discard, ignore_expires)
2894 # Session cookies are denoted by either `expires` field set to
2895 # an empty string or 0. MozillaCookieJar only recognizes the former
2896 # (see [1]). So we need force the latter to be recognized as session
2897 # cookies on our own.
2898 # Session cookies may be important for cookies-based authentication,
2899 # e.g. usually, when user does not check 'Remember me' check box while
2900 # logging in on a site, some important cookies are stored as session
2901 # cookies so that not recognizing them will result in failed login.
2902 # 1. https://bugs.python.org/issue17164
2903 for cookie in self:
2904 # Treat `expires=0` cookies as session cookies
2905 if cookie.expires == 0:
2906 cookie.expires = None
2907 cookie.discard = True
2908
2909
2910 class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
2911 def __init__(self, cookiejar=None):
2912 compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
2913
2914 def http_response(self, request, response):
2915 # Python 2 will choke on next HTTP request in row if there are non-ASCII
2916 # characters in Set-Cookie HTTP header of last response (see
2917 # https://github.com/ytdl-org/youtube-dl/issues/6769).
2918 # In order to at least prevent crashing we will percent encode Set-Cookie
2919 # header before HTTPCookieProcessor starts processing it.
2920 # if sys.version_info < (3, 0) and response.headers:
2921 # for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
2922 # set_cookie = response.headers.get(set_cookie_header)
2923 # if set_cookie:
2924 # set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
2925 # if set_cookie != set_cookie_escaped:
2926 # del response.headers[set_cookie_header]
2927 # response.headers[set_cookie_header] = set_cookie_escaped
2928 return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
2929
2930 https_request = compat_urllib_request.HTTPCookieProcessor.http_request
2931 https_response = http_response
2932
2933
2934 class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
2935 """YoutubeDL redirect handler
2936
2937 The code is based on HTTPRedirectHandler implementation from CPython [1].
2938
2939 This redirect handler solves two issues:
2940 - ensures redirect URL is always unicode under python 2
2941 - introduces support for experimental HTTP response status code
2942 308 Permanent Redirect [2] used by some sites [3]
2943
2944 1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
2945 2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
2946 3. https://github.com/ytdl-org/youtube-dl/issues/28768
2947 """
2948
2949 http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
2950
2951 def redirect_request(self, req, fp, code, msg, headers, newurl):
2952 """Return a Request or None in response to a redirect.
2953
2954 This is called by the http_error_30x methods when a
2955 redirection response is received. If a redirection should
2956 take place, return a new Request to allow http_error_30x to
2957 perform the redirect. Otherwise, raise HTTPError if no-one
2958 else should try to handle this url. Return None if you can't
2959 but another Handler might.
2960 """
2961 m = req.get_method()
2962 if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
2963 or code in (301, 302, 303) and m == "POST")):
2964 raise compat_HTTPError(req.full_url, code, msg, headers, fp)
2965 # Strictly (according to RFC 2616), 301 or 302 in response to
2966 # a POST MUST NOT cause a redirection without confirmation
2967 # from the user (of urllib.request, in this case). In practice,
2968 # essentially all clients do redirect in this case, so we do
2969 # the same.
2970
2971 # On python 2 urlh.geturl() may sometimes return redirect URL
2972 # as byte string instead of unicode. This workaround allows
2973 # to force it always return unicode.
2974 if sys.version_info[0] < 3:
2975 newurl = compat_str(newurl)
2976
2977 # Be conciliant with URIs containing a space. This is mainly
2978 # redundant with the more complete encoding done in http_error_302(),
2979 # but it is kept for compatibility with other callers.
2980 newurl = newurl.replace(' ', '%20')
2981
2982 CONTENT_HEADERS = ("content-length", "content-type")
2983 # NB: don't use dict comprehension for python 2.6 compatibility
2984 newheaders = dict((k, v) for k, v in req.headers.items()
2985 if k.lower() not in CONTENT_HEADERS)
2986 return compat_urllib_request.Request(
2987 newurl, headers=newheaders, origin_req_host=req.origin_req_host,
2988 unverifiable=True)
2989
2990
2991 def extract_timezone(date_str):
2992 m = re.search(
2993 r'''(?x)
2994 ^.{8,}? # >=8 char non-TZ prefix, if present
2995 (?P<tz>Z| # just the UTC Z, or
2996 (?:(?<=.\b\d{4}|\b\d{2}:\d\d)| # preceded by 4 digits or hh:mm or
2997 (?<!.\b[a-zA-Z]{3}|[a-zA-Z]{4}|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
2998 [ ]? # optional space
2999 (?P<sign>\+|-) # +/-
3000 (?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
3001 $)
3002 ''', date_str)
3003 if not m:
3004 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3005 timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
3006 if timezone is not None:
3007 date_str = date_str[:-len(m.group('tz'))]
3008 timezone = datetime.timedelta(hours=timezone or 0)
3009 else:
3010 date_str = date_str[:-len(m.group('tz'))]
3011 if not m.group('sign'):
3012 timezone = datetime.timedelta()
3013 else:
3014 sign = 1 if m.group('sign') == '+' else -1
3015 timezone = datetime.timedelta(
3016 hours=sign * int(m.group('hours')),
3017 minutes=sign * int(m.group('minutes')))
3018 return timezone, date_str
3019
3020
3021 def parse_iso8601(date_str, delimiter='T', timezone=None):
3022 """ Return a UNIX timestamp from the given date """
3023
3024 if date_str is None:
3025 return None
3026
3027 date_str = re.sub(r'\.[0-9]+', '', date_str)
3028
3029 if timezone is None:
3030 timezone, date_str = extract_timezone(date_str)
3031
3032 try:
3033 date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
3034 dt = datetime.datetime.strptime(date_str, date_format) - timezone
3035 return calendar.timegm(dt.timetuple())
3036 except ValueError:
3037 pass
3038
3039
3040 def date_formats(day_first=True):
3041 return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
3042
3043
3044 def unified_strdate(date_str, day_first=True):
3045 """Return a string with the date in the format YYYYMMDD"""
3046
3047 if date_str is None:
3048 return None
3049 upload_date = None
3050 # Replace commas
3051 date_str = date_str.replace(',', ' ')
3052 # Remove AM/PM + timezone
3053 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3054 _, date_str = extract_timezone(date_str)
3055
3056 for expression in date_formats(day_first):
3057 try:
3058 upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
3059 except ValueError:
3060 pass
3061 if upload_date is None:
3062 timetuple = email.utils.parsedate_tz(date_str)
3063 if timetuple:
3064 try:
3065 upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
3066 except ValueError:
3067 pass
3068 if upload_date is not None:
3069 return compat_str(upload_date)
3070
3071
3072 def unified_timestamp(date_str, day_first=True):
3073 if date_str is None:
3074 return None
3075
3076 date_str = re.sub(r'\s+', ' ', re.sub(
3077 r'(?i)[,|]|(mon|tues?|wed(nes)?|thu(rs)?|fri|sat(ur)?)(day)?', '', date_str))
3078
3079 pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
3080 timezone, date_str = extract_timezone(date_str)
3081
3082 # Remove AM/PM + timezone
3083 date_str = re.sub(r'(?i)\s*(?:AM|PM)(?:\s+[A-Z]+)?', '', date_str)
3084
3085 # Remove unrecognized timezones from ISO 8601 alike timestamps
3086 m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
3087 if m:
3088 date_str = date_str[:-len(m.group('tz'))]
3089
3090 # Python only supports microseconds, so remove nanoseconds
3091 m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
3092 if m:
3093 date_str = m.group(1)
3094
3095 for expression in date_formats(day_first):
3096 try:
3097 dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
3098 return calendar.timegm(dt.timetuple())
3099 except ValueError:
3100 pass
3101 timetuple = email.utils.parsedate_tz(date_str)
3102 if timetuple:
3103 return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
3104
3105
3106 def determine_ext(url, default_ext='unknown_video'):
3107 if url is None or '.' not in url:
3108 return default_ext
3109 guess = url.partition('?')[0].rpartition('.')[2]
3110 if re.match(r'^[A-Za-z0-9]+$', guess):
3111 return guess
3112 # Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
3113 elif guess.rstrip('/') in KNOWN_EXTENSIONS:
3114 return guess.rstrip('/')
3115 else:
3116 return default_ext
3117
3118
3119 def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
3120 return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
3121
3122
3123 def date_from_str(date_str):
3124 """
3125 Return a datetime object from a string in the format YYYYMMDD or
3126 (now|today)[+-][0-9](day|week|month|year)(s)?"""
3127 today = datetime.date.today()
3128 if date_str in ('now', 'today'):
3129 return today
3130 if date_str == 'yesterday':
3131 return today - datetime.timedelta(days=1)
3132 match = re.match(r'(now|today)(?P<sign>[+-])(?P<time>\d+)(?P<unit>day|week|month|year)(s)?', date_str)
3133 if match is not None:
3134 sign = match.group('sign')
3135 time = int(match.group('time'))
3136 if sign == '-':
3137 time = -time
3138 unit = match.group('unit')
3139 # A bad approximation?
3140 if unit == 'month':
3141 unit = 'day'
3142 time *= 30
3143 elif unit == 'year':
3144 unit = 'day'
3145 time *= 365
3146 unit += 's'
3147 delta = datetime.timedelta(**{unit: time})
3148 return today + delta
3149 return datetime.datetime.strptime(date_str, '%Y%m%d').date()
3150
3151
3152 def hyphenate_date(date_str):
3153 """
3154 Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
3155 match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
3156 if match is not None:
3157 return '-'.join(match.groups())
3158 else:
3159 return date_str
3160
3161
3162 class DateRange(object):
3163 """Represents a time interval between two dates"""
3164
3165 def __init__(self, start=None, end=None):
3166 """start and end must be strings in the format accepted by date"""
3167 if start is not None:
3168 self.start = date_from_str(start)
3169 else:
3170 self.start = datetime.datetime.min.date()
3171 if end is not None:
3172 self.end = date_from_str(end)
3173 else:
3174 self.end = datetime.datetime.max.date()
3175 if self.start > self.end:
3176 raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
3177
3178 @classmethod
3179 def day(cls, day):
3180 """Returns a range that only contains the given day"""
3181 return cls(day, day)
3182
3183 def __contains__(self, date):
3184 """Check if the date is in the range"""
3185 if not isinstance(date, datetime.date):
3186 date = date_from_str(date)
3187 return self.start <= date <= self.end
3188
3189 def __str__(self):
3190 return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
3191
3192
3193 def platform_name():
3194 """ Returns the platform name as a compat_str """
3195 res = platform.platform()
3196 if isinstance(res, bytes):
3197 res = res.decode(preferredencoding())
3198
3199 assert isinstance(res, compat_str)
3200 return res
3201
3202
3203 def _windows_write_string(s, out):
3204 """ Returns True if the string was written using special methods,
3205 False if it has yet to be written out."""
3206 # Adapted from http://stackoverflow.com/a/3259271/35070
3207
3208 import ctypes
3209 import ctypes.wintypes
3210
3211 WIN_OUTPUT_IDS = {
3212 1: -11,
3213 2: -12,
3214 }
3215
3216 try:
3217 fileno = out.fileno()
3218 except AttributeError:
3219 # If the output stream doesn't have a fileno, it's virtual
3220 return False
3221 except io.UnsupportedOperation:
3222 # Some strange Windows pseudo files?
3223 return False
3224 if fileno not in WIN_OUTPUT_IDS:
3225 return False
3226
3227 GetStdHandle = compat_ctypes_WINFUNCTYPE(
3228 ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
3229 ('GetStdHandle', ctypes.windll.kernel32))
3230 h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
3231
3232 WriteConsoleW = compat_ctypes_WINFUNCTYPE(
3233 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
3234 ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
3235 ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
3236 written = ctypes.wintypes.DWORD(0)
3237
3238 GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
3239 FILE_TYPE_CHAR = 0x0002
3240 FILE_TYPE_REMOTE = 0x8000
3241 GetConsoleMode = compat_ctypes_WINFUNCTYPE(
3242 ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
3243 ctypes.POINTER(ctypes.wintypes.DWORD))(
3244 ('GetConsoleMode', ctypes.windll.kernel32))
3245 INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
3246
3247 def not_a_console(handle):
3248 if handle == INVALID_HANDLE_VALUE or handle is None:
3249 return True
3250 return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
3251 or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
3252
3253 if not_a_console(h):
3254 return False
3255
3256 def next_nonbmp_pos(s):
3257 try:
3258 return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
3259 except StopIteration:
3260 return len(s)
3261
3262 while s:
3263 count = min(next_nonbmp_pos(s), 1024)
3264
3265 ret = WriteConsoleW(
3266 h, s, count if count else 2, ctypes.byref(written), None)
3267 if ret == 0:
3268 raise OSError('Failed to write string')
3269 if not count: # We just wrote a non-BMP character
3270 assert written.value == 2
3271 s = s[1:]
3272 else:
3273 assert written.value > 0
3274 s = s[written.value:]
3275 return True
3276
3277
3278 def write_string(s, out=None, encoding=None):
3279 if out is None:
3280 out = sys.stderr
3281 assert type(s) == compat_str
3282
3283 if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
3284 if _windows_write_string(s, out):
3285 return
3286
3287 if ('b' in getattr(out, 'mode', '')
3288 or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
3289 byt = s.encode(encoding or preferredencoding(), 'ignore')
3290 out.write(byt)
3291 elif hasattr(out, 'buffer'):
3292 enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
3293 byt = s.encode(enc, 'ignore')
3294 out.buffer.write(byt)
3295 else:
3296 out.write(s)
3297 out.flush()
3298
3299
3300 def bytes_to_intlist(bs):
3301 if not bs:
3302 return []
3303 if isinstance(bs[0], int): # Python 3
3304 return list(bs)
3305 else:
3306 return [ord(c) for c in bs]
3307
3308
3309 def intlist_to_bytes(xs):
3310 if not xs:
3311 return b''
3312 return compat_struct_pack('%dB' % len(xs), *xs)
3313
3314
3315 # Cross-platform file locking
3316 if sys.platform == 'win32':
3317 import ctypes.wintypes
3318 import msvcrt
3319
3320 class OVERLAPPED(ctypes.Structure):
3321 _fields_ = [
3322 ('Internal', ctypes.wintypes.LPVOID),
3323 ('InternalHigh', ctypes.wintypes.LPVOID),
3324 ('Offset', ctypes.wintypes.DWORD),
3325 ('OffsetHigh', ctypes.wintypes.DWORD),
3326 ('hEvent', ctypes.wintypes.HANDLE),
3327 ]
3328
3329 kernel32 = ctypes.windll.kernel32
3330 LockFileEx = kernel32.LockFileEx
3331 LockFileEx.argtypes = [
3332 ctypes.wintypes.HANDLE, # hFile
3333 ctypes.wintypes.DWORD, # dwFlags
3334 ctypes.wintypes.DWORD, # dwReserved
3335 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3336 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3337 ctypes.POINTER(OVERLAPPED) # Overlapped
3338 ]
3339 LockFileEx.restype = ctypes.wintypes.BOOL
3340 UnlockFileEx = kernel32.UnlockFileEx
3341 UnlockFileEx.argtypes = [
3342 ctypes.wintypes.HANDLE, # hFile
3343 ctypes.wintypes.DWORD, # dwReserved
3344 ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
3345 ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
3346 ctypes.POINTER(OVERLAPPED) # Overlapped
3347 ]
3348 UnlockFileEx.restype = ctypes.wintypes.BOOL
3349 whole_low = 0xffffffff
3350 whole_high = 0x7fffffff
3351
3352 def _lock_file(f, exclusive):
3353 overlapped = OVERLAPPED()
3354 overlapped.Offset = 0
3355 overlapped.OffsetHigh = 0
3356 overlapped.hEvent = 0
3357 f._lock_file_overlapped_p = ctypes.pointer(overlapped)
3358 handle = msvcrt.get_osfhandle(f.fileno())
3359 if not LockFileEx(handle, 0x2 if exclusive else 0x0, 0,
3360 whole_low, whole_high, f._lock_file_overlapped_p):
3361 raise OSError('Locking file failed: %r' % ctypes.FormatError())
3362
3363 def _unlock_file(f):
3364 assert f._lock_file_overlapped_p
3365 handle = msvcrt.get_osfhandle(f.fileno())
3366 if not UnlockFileEx(handle, 0,
3367 whole_low, whole_high, f._lock_file_overlapped_p):
3368 raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
3369
3370 else:
3371 # Some platforms, such as Jython, is missing fcntl
3372 try:
3373 import fcntl
3374
3375 def _lock_file(f, exclusive):
3376 fcntl.flock(f, fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH)
3377
3378 def _unlock_file(f):
3379 fcntl.flock(f, fcntl.LOCK_UN)
3380 except ImportError:
3381 UNSUPPORTED_MSG = 'file locking is not supported on this platform'
3382
3383 def _lock_file(f, exclusive):
3384 raise IOError(UNSUPPORTED_MSG)
3385
3386 def _unlock_file(f):
3387 raise IOError(UNSUPPORTED_MSG)
3388
3389
3390 class locked_file(object):
3391 def __init__(self, filename, mode, encoding=None):
3392 assert mode in ['r', 'a', 'w']
3393 self.f = io.open(filename, mode, encoding=encoding)
3394 self.mode = mode
3395
3396 def __enter__(self):
3397 exclusive = self.mode != 'r'
3398 try:
3399 _lock_file(self.f, exclusive)
3400 except IOError:
3401 self.f.close()
3402 raise
3403 return self
3404
3405 def __exit__(self, etype, value, traceback):
3406 try:
3407 _unlock_file(self.f)
3408 finally:
3409 self.f.close()
3410
3411 def __iter__(self):
3412 return iter(self.f)
3413
3414 def write(self, *args):
3415 return self.f.write(*args)
3416
3417 def read(self, *args):
3418 return self.f.read(*args)
3419
3420
3421 def get_filesystem_encoding():
3422 encoding = sys.getfilesystemencoding()
3423 return encoding if encoding is not None else 'utf-8'
3424
3425
3426 def shell_quote(args):
3427 quoted_args = []
3428 encoding = get_filesystem_encoding()
3429 for a in args:
3430 if isinstance(a, bytes):
3431 # We may get a filename encoded with 'encodeFilename'
3432 a = a.decode(encoding)
3433 quoted_args.append(compat_shlex_quote(a))
3434 return ' '.join(quoted_args)
3435
3436
3437 def smuggle_url(url, data):
3438 """ Pass additional data in a URL for internal use. """
3439
3440 url, idata = unsmuggle_url(url, {})
3441 data.update(idata)
3442 sdata = compat_urllib_parse_urlencode(
3443 {'__youtubedl_smuggle': json.dumps(data)})
3444 return url + '#' + sdata
3445
3446
3447 def unsmuggle_url(smug_url, default=None):
3448 if '#__youtubedl_smuggle' not in smug_url:
3449 return smug_url, default
3450 url, _, sdata = smug_url.rpartition('#')
3451 jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
3452 data = json.loads(jsond)
3453 return url, data
3454
3455
3456 def format_bytes(bytes):
3457 if bytes is None:
3458 return 'N/A'
3459 if type(bytes) is str:
3460 bytes = float(bytes)
3461 if bytes == 0.0:
3462 exponent = 0
3463 else:
3464 exponent = int(math.log(bytes, 1024.0))
3465 suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
3466 converted = float(bytes) / float(1024 ** exponent)
3467 return '%.2f%s' % (converted, suffix)
3468
3469
3470 def lookup_unit_table(unit_table, s):
3471 units_re = '|'.join(re.escape(u) for u in unit_table)
3472 m = re.match(
3473 r'(?P<num>[0-9]+(?:[,.][0-9]*)?)\s*(?P<unit>%s)\b' % units_re, s)
3474 if not m:
3475 return None
3476 num_str = m.group('num').replace(',', '.')
3477 mult = unit_table[m.group('unit')]
3478 return int(float(num_str) * mult)
3479
3480
3481 def parse_filesize(s):
3482 if s is None:
3483 return None
3484
3485 # The lower-case forms are of course incorrect and unofficial,
3486 # but we support those too
3487 _UNIT_TABLE = {
3488 'B': 1,
3489 'b': 1,
3490 'bytes': 1,
3491 'KiB': 1024,
3492 'KB': 1000,
3493 'kB': 1024,
3494 'Kb': 1000,
3495 'kb': 1000,
3496 'kilobytes': 1000,
3497 'kibibytes': 1024,
3498 'MiB': 1024 ** 2,
3499 'MB': 1000 ** 2,
3500 'mB': 1024 ** 2,
3501 'Mb': 1000 ** 2,
3502 'mb': 1000 ** 2,
3503 'megabytes': 1000 ** 2,
3504 'mebibytes': 1024 ** 2,
3505 'GiB': 1024 ** 3,
3506 'GB': 1000 ** 3,
3507 'gB': 1024 ** 3,
3508 'Gb': 1000 ** 3,
3509 'gb': 1000 ** 3,
3510 'gigabytes': 1000 ** 3,
3511 'gibibytes': 1024 ** 3,
3512 'TiB': 1024 ** 4,
3513 'TB': 1000 ** 4,
3514 'tB': 1024 ** 4,
3515 'Tb': 1000 ** 4,
3516 'tb': 1000 ** 4,
3517 'terabytes': 1000 ** 4,
3518 'tebibytes': 1024 ** 4,
3519 'PiB': 1024 ** 5,
3520 'PB': 1000 ** 5,
3521 'pB': 1024 ** 5,
3522 'Pb': 1000 ** 5,
3523 'pb': 1000 ** 5,
3524 'petabytes': 1000 ** 5,
3525 'pebibytes': 1024 ** 5,
3526 'EiB': 1024 ** 6,
3527 'EB': 1000 ** 6,
3528 'eB': 1024 ** 6,
3529 'Eb': 1000 ** 6,
3530 'eb': 1000 ** 6,
3531 'exabytes': 1000 ** 6,
3532 'exbibytes': 1024 ** 6,
3533 'ZiB': 1024 ** 7,
3534 'ZB': 1000 ** 7,
3535 'zB': 1024 ** 7,
3536 'Zb': 1000 ** 7,
3537 'zb': 1000 ** 7,
3538 'zettabytes': 1000 ** 7,
3539 'zebibytes': 1024 ** 7,
3540 'YiB': 1024 ** 8,
3541 'YB': 1000 ** 8,
3542 'yB': 1024 ** 8,
3543 'Yb': 1000 ** 8,
3544 'yb': 1000 ** 8,
3545 'yottabytes': 1000 ** 8,
3546 'yobibytes': 1024 ** 8,
3547 }
3548
3549 return lookup_unit_table(_UNIT_TABLE, s)
3550
3551
3552 def parse_count(s):
3553 if s is None:
3554 return None
3555
3556 s = s.strip()
3557
3558 if re.match(r'^[\d,.]+$', s):
3559 return str_to_int(s)
3560
3561 _UNIT_TABLE = {
3562 'k': 1000,
3563 'K': 1000,
3564 'm': 1000 ** 2,
3565 'M': 1000 ** 2,
3566 'kk': 1000 ** 2,
3567 'KK': 1000 ** 2,
3568 }
3569
3570 return lookup_unit_table(_UNIT_TABLE, s)
3571
3572
3573 def parse_resolution(s):
3574 if s is None:
3575 return {}
3576
3577 mobj = re.search(r'\b(?P<w>\d+)\s*[xX×]\s*(?P<h>\d+)\b', s)
3578 if mobj:
3579 return {
3580 'width': int(mobj.group('w')),
3581 'height': int(mobj.group('h')),
3582 }
3583
3584 mobj = re.search(r'\b(\d+)[pPiI]\b', s)
3585 if mobj:
3586 return {'height': int(mobj.group(1))}
3587
3588 mobj = re.search(r'\b([48])[kK]\b', s)
3589 if mobj:
3590 return {'height': int(mobj.group(1)) * 540}
3591
3592 return {}
3593
3594
3595 def parse_bitrate(s):
3596 if not isinstance(s, compat_str):
3597 return
3598 mobj = re.search(r'\b(\d+)\s*kbps', s)
3599 if mobj:
3600 return int(mobj.group(1))
3601
3602
3603 def month_by_name(name, lang='en'):
3604 """ Return the number of a month by (locale-independently) English name """
3605
3606 month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
3607
3608 try:
3609 return month_names.index(name) + 1
3610 except ValueError:
3611 return None
3612
3613
3614 def month_by_abbreviation(abbrev):
3615 """ Return the number of a month by (locale-independently) English
3616 abbreviations """
3617
3618 try:
3619 return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
3620 except ValueError:
3621 return None
3622
3623
3624 def fix_xml_ampersands(xml_str):
3625 """Replace all the '&' by '&' in XML"""
3626 return re.sub(
3627 r'&(?!amp;|lt;|gt;|apos;|quot;|#x[0-9a-fA-F]{,4};|#[0-9]{,4};)',
3628 '&',
3629 xml_str)
3630
3631
3632 def setproctitle(title):
3633 assert isinstance(title, compat_str)
3634
3635 # ctypes in Jython is not complete
3636 # http://bugs.jython.org/issue2148
3637 if sys.platform.startswith('java'):
3638 return
3639
3640 try:
3641 libc = ctypes.cdll.LoadLibrary('libc.so.6')
3642 except OSError:
3643 return
3644 except TypeError:
3645 # LoadLibrary in Windows Python 2.7.13 only expects
3646 # a bytestring, but since unicode_literals turns
3647 # every string into a unicode string, it fails.
3648 return
3649 title_bytes = title.encode('utf-8')
3650 buf = ctypes.create_string_buffer(len(title_bytes))
3651 buf.value = title_bytes
3652 try:
3653 libc.prctl(15, buf, 0, 0, 0)
3654 except AttributeError:
3655 return # Strange libc, just skip this
3656
3657
3658 def remove_start(s, start):
3659 return s[len(start):] if s is not None and s.startswith(start) else s
3660
3661
3662 def remove_end(s, end):
3663 return s[:-len(end)] if s is not None and s.endswith(end) else s
3664
3665
3666 def remove_quotes(s):
3667 if s is None or len(s) < 2:
3668 return s
3669 for quote in ('"', "'", ):
3670 if s[0] == quote and s[-1] == quote:
3671 return s[1:-1]
3672 return s
3673
3674
3675 def url_basename(url):
3676 path = compat_urlparse.urlparse(url).path
3677 return path.strip('/').split('/')[-1]
3678
3679
3680 def base_url(url):
3681 return re.match(r'https?://[^?#&]+/', url).group()
3682
3683
3684 def urljoin(base, path):
3685 if isinstance(path, bytes):
3686 path = path.decode('utf-8')
3687 if not isinstance(path, compat_str) or not path:
3688 return None
3689 if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
3690 return path
3691 if isinstance(base, bytes):
3692 base = base.decode('utf-8')
3693 if not isinstance(base, compat_str) or not re.match(
3694 r'^(?:https?:)?//', base):
3695 return None
3696 return compat_urlparse.urljoin(base, path)
3697
3698
3699 class HEADRequest(compat_urllib_request.Request):
3700 def get_method(self):
3701 return 'HEAD'
3702
3703
3704 class PUTRequest(compat_urllib_request.Request):
3705 def get_method(self):
3706 return 'PUT'
3707
3708
3709 def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
3710 if get_attr:
3711 if v is not None:
3712 v = getattr(v, get_attr, None)
3713 if v in (None, ''):
3714 return default
3715 try:
3716 return int(v) * invscale // scale
3717 except (ValueError, TypeError, OverflowError):
3718 return default
3719
3720
3721 def str_or_none(v, default=None):
3722 return default if v is None else compat_str(v)
3723
3724
3725 def str_to_int(int_str):
3726 """ A more relaxed version of int_or_none """
3727 if isinstance(int_str, compat_integer_types):
3728 return int_str
3729 elif isinstance(int_str, compat_str):
3730 int_str = re.sub(r'[,\.\+]', '', int_str)
3731 return int_or_none(int_str)
3732
3733
3734 def float_or_none(v, scale=1, invscale=1, default=None):
3735 if v is None:
3736 return default
3737 try:
3738 return float(v) * invscale / scale
3739 except (ValueError, TypeError):
3740 return default
3741
3742
3743 def bool_or_none(v, default=None):
3744 return v if isinstance(v, bool) else default
3745
3746
3747 def strip_or_none(v, default=None):
3748 return v.strip() if isinstance(v, compat_str) else default
3749
3750
3751 def url_or_none(url):
3752 if not url or not isinstance(url, compat_str):
3753 return None
3754 url = url.strip()
3755 return url if re.match(r'^(?:(?:https?|rt(?:m(?:pt?[es]?|fp)|sp[su]?)|mms|ftps?):)?//', url) else None
3756
3757
3758 def parse_duration(s):
3759 if not isinstance(s, compat_basestring):
3760 return None
3761
3762 s = s.strip()
3763
3764 days, hours, mins, secs, ms = [None] * 5
3765 m = re.match(r'(?:(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?Z?$', s)
3766 if m:
3767 days, hours, mins, secs, ms = m.groups()
3768 else:
3769 m = re.match(
3770 r'''(?ix)(?:P?
3771 (?:
3772 [0-9]+\s*y(?:ears?)?\s*
3773 )?
3774 (?:
3775 [0-9]+\s*m(?:onths?)?\s*
3776 )?
3777 (?:
3778 [0-9]+\s*w(?:eeks?)?\s*
3779 )?
3780 (?:
3781 (?P<days>[0-9]+)\s*d(?:ays?)?\s*
3782 )?
3783 T)?
3784 (?:
3785 (?P<hours>[0-9]+)\s*h(?:ours?)?\s*
3786 )?
3787 (?:
3788 (?P<mins>[0-9]+)\s*m(?:in(?:ute)?s?)?\s*
3789 )?
3790 (?:
3791 (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*s(?:ec(?:ond)?s?)?\s*
3792 )?Z?$''', s)
3793 if m:
3794 days, hours, mins, secs, ms = m.groups()
3795 else:
3796 m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s*(?:hours?)|(?P<mins>[0-9.]+)\s*(?:mins?\.?|minutes?)\s*)Z?$', s)
3797 if m:
3798 hours, mins = m.groups()
3799 else:
3800 return None
3801
3802 duration = 0
3803 if secs:
3804 duration += float(secs)
3805 if mins:
3806 duration += float(mins) * 60
3807 if hours:
3808 duration += float(hours) * 60 * 60
3809 if days:
3810 duration += float(days) * 24 * 60 * 60
3811 if ms:
3812 duration += float(ms)
3813 return duration
3814
3815
3816 def prepend_extension(filename, ext, expected_real_ext=None):
3817 name, real_ext = os.path.splitext(filename)
3818 return (
3819 '{0}.{1}{2}'.format(name, ext, real_ext)
3820 if not expected_real_ext or real_ext[1:] == expected_real_ext
3821 else '{0}.{1}'.format(filename, ext))
3822
3823
3824 def replace_extension(filename, ext, expected_real_ext=None):
3825 name, real_ext = os.path.splitext(filename)
3826 return '{0}.{1}'.format(
3827 name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
3828 ext)
3829
3830
3831 def check_executable(exe, args=[]):
3832 """ Checks if the given binary is installed somewhere in PATH, and returns its name.
3833 args can be a list of arguments for a short output (like -version) """
3834 try:
3835 process_communicate_or_kill(subprocess.Popen(
3836 [exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE))
3837 except OSError:
3838 return False
3839 return exe
3840
3841
3842 def get_exe_version(exe, args=['--version'],
3843 version_re=None, unrecognized='present'):
3844 """ Returns the version of the specified executable,
3845 or False if the executable is not present """
3846 try:
3847 # STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
3848 # SIGTTOU if youtube-dl is run in the background.
3849 # See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
3850 out, _ = process_communicate_or_kill(subprocess.Popen(
3851 [encodeArgument(exe)] + args,
3852 stdin=subprocess.PIPE,
3853 stdout=subprocess.PIPE, stderr=subprocess.STDOUT))
3854 except OSError:
3855 return False
3856 if isinstance(out, bytes): # Python 2.x
3857 out = out.decode('ascii', 'ignore')
3858 return detect_exe_version(out, version_re, unrecognized)
3859
3860
3861 def detect_exe_version(output, version_re=None, unrecognized='present'):
3862 assert isinstance(output, compat_str)
3863 if version_re is None:
3864 version_re = r'version\s+([-0-9._a-zA-Z]+)'
3865 m = re.search(version_re, output)
3866 if m:
3867 return m.group(1)
3868 else:
3869 return unrecognized
3870
3871
3872 class LazyList(compat_collections_abc.Sequence):
3873 """Lazy immutable list from an iterable
3874 Note that slices of a LazyList are lists and not LazyList"""
3875
3876 class IndexError(IndexError):
3877 def __init__(self, cause=None):
3878 if cause:
3879 # reproduce `raise from`
3880 self.__cause__ = cause
3881 super(IndexError, self).__init__()
3882
3883 def __init__(self, iterable, **kwargs):
3884 # kwarg-only
3885 reverse = kwargs.get('reverse', False)
3886 _cache = kwargs.get('_cache')
3887
3888 self._iterable = iter(iterable)
3889 self._cache = [] if _cache is None else _cache
3890 self._reversed = reverse
3891
3892 def __iter__(self):
3893 if self._reversed:
3894 # We need to consume the entire iterable to iterate in reverse
3895 for item in self.exhaust():
3896 yield item
3897 return
3898 for item in self._cache:
3899 yield item
3900 for item in self._iterable:
3901 self._cache.append(item)
3902 yield item
3903
3904 def _exhaust(self):
3905 self._cache.extend(self._iterable)
3906 self._iterable = [] # Discard the emptied iterable to make it pickle-able
3907 return self._cache
3908
3909 def exhaust(self):
3910 """Evaluate the entire iterable"""
3911 return self._exhaust()[::-1 if self._reversed else 1]
3912
3913 @staticmethod
3914 def _reverse_index(x):
3915 return None if x is None else ~x
3916
3917 def __getitem__(self, idx):
3918 if isinstance(idx, slice):
3919 if self._reversed:
3920 idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
3921 start, stop, step = idx.start, idx.stop, idx.step or 1
3922 elif isinstance(idx, int):
3923 if self._reversed:
3924 idx = self._reverse_index(idx)
3925 start, stop, step = idx, idx, 0
3926 else:
3927 raise TypeError('indices must be integers or slices')
3928 if ((start or 0) < 0 or (stop or 0) < 0
3929 or (start is None and step < 0)
3930 or (stop is None and step > 0)):
3931 # We need to consume the entire iterable to be able to slice from the end
3932 # Obviously, never use this with infinite iterables
3933 self._exhaust()
3934 try:
3935 return self._cache[idx]
3936 except IndexError as e:
3937 raise self.IndexError(e)
3938 n = max(start or 0, stop or 0) - len(self._cache) + 1
3939 if n > 0:
3940 self._cache.extend(itertools.islice(self._iterable, n))
3941 try:
3942 return self._cache[idx]
3943 except IndexError as e:
3944 raise self.IndexError(e)
3945
3946 def __bool__(self):
3947 try:
3948 self[-1] if self._reversed else self[0]
3949 except self.IndexError:
3950 return False
3951 return True
3952
3953 def __len__(self):
3954 self._exhaust()
3955 return len(self._cache)
3956
3957 def __reversed__(self):
3958 return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
3959
3960 def __copy__(self):
3961 return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
3962
3963 def __repr__(self):
3964 # repr and str should mimic a list. So we exhaust the iterable
3965 return repr(self.exhaust())
3966
3967 def __str__(self):
3968 return repr(self.exhaust())
3969
3970
3971 class PagedList(object):
3972 def __len__(self):
3973 # This is only useful for tests
3974 return len(self.getslice())
3975
3976
3977 class OnDemandPagedList(PagedList):
3978 def __init__(self, pagefunc, pagesize, use_cache=True):
3979 self._pagefunc = pagefunc
3980 self._pagesize = pagesize
3981 self._use_cache = use_cache
3982 if use_cache:
3983 self._cache = {}
3984
3985 def getslice(self, start=0, end=None):
3986 res = []
3987 for pagenum in itertools.count(start // self._pagesize):
3988 firstid = pagenum * self._pagesize
3989 nextfirstid = pagenum * self._pagesize + self._pagesize
3990 if start >= nextfirstid:
3991 continue
3992
3993 page_results = None
3994 if self._use_cache:
3995 page_results = self._cache.get(pagenum)
3996 if page_results is None:
3997 page_results = list(self._pagefunc(pagenum))
3998 if self._use_cache:
3999 self._cache[pagenum] = page_results
4000
4001 startv = (
4002 start % self._pagesize
4003 if firstid <= start < nextfirstid
4004 else 0)
4005
4006 endv = (
4007 ((end - 1) % self._pagesize) + 1
4008 if (end is not None and firstid <= end <= nextfirstid)
4009 else None)
4010
4011 if startv != 0 or endv is not None:
4012 page_results = page_results[startv:endv]
4013 res.extend(page_results)
4014
4015 # A little optimization - if current page is not "full", ie. does
4016 # not contain page_size videos then we can assume that this page
4017 # is the last one - there are no more ids on further pages -
4018 # i.e. no need to query again.
4019 if len(page_results) + startv < self._pagesize:
4020 break
4021
4022 # If we got the whole page, but the next page is not interesting,
4023 # break out early as well
4024 if end == nextfirstid:
4025 break
4026 return res
4027
4028
4029 class InAdvancePagedList(PagedList):
4030 def __init__(self, pagefunc, pagecount, pagesize):
4031 self._pagefunc = pagefunc
4032 self._pagecount = pagecount
4033 self._pagesize = pagesize
4034
4035 def getslice(self, start=0, end=None):
4036 res = []
4037 start_page = start // self._pagesize
4038 end_page = (
4039 self._pagecount if end is None else (end // self._pagesize + 1))
4040 skip_elems = start - start_page * self._pagesize
4041 only_more = None if end is None else end - start
4042 for pagenum in range(start_page, end_page):
4043 page = list(self._pagefunc(pagenum))
4044 if skip_elems:
4045 page = page[skip_elems:]
4046 skip_elems = None
4047 if only_more is not None:
4048 if len(page) < only_more:
4049 only_more -= len(page)
4050 else:
4051 page = page[:only_more]
4052 res.extend(page)
4053 break
4054 res.extend(page)
4055 return res
4056
4057
4058 def uppercase_escape(s):
4059 unicode_escape = codecs.getdecoder('unicode_escape')
4060 return re.sub(
4061 r'\\U[0-9a-fA-F]{8}',
4062 lambda m: unicode_escape(m.group(0))[0],
4063 s)
4064
4065
4066 def lowercase_escape(s):
4067 unicode_escape = codecs.getdecoder('unicode_escape')
4068 return re.sub(
4069 r'\\u[0-9a-fA-F]{4}',
4070 lambda m: unicode_escape(m.group(0))[0],
4071 s)
4072
4073
4074 def escape_rfc3986(s):
4075 """Escape non-ASCII characters as suggested by RFC 3986"""
4076 if sys.version_info < (3, 0) and isinstance(s, compat_str):
4077 s = s.encode('utf-8')
4078 # ensure unicode: after quoting, it can always be converted
4079 return compat_str(compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]"))
4080
4081
4082 def escape_url(url):
4083 """Escape URL as suggested by RFC 3986"""
4084 url_parsed = compat_urllib_parse_urlparse(url)
4085 return url_parsed._replace(
4086 netloc=url_parsed.netloc.encode('idna').decode('ascii'),
4087 path=escape_rfc3986(url_parsed.path),
4088 params=escape_rfc3986(url_parsed.params),
4089 query=escape_rfc3986(url_parsed.query),
4090 fragment=escape_rfc3986(url_parsed.fragment)
4091 ).geturl()
4092
4093
4094 def read_batch_urls(batch_fd):
4095 def fixup(url):
4096 if not isinstance(url, compat_str):
4097 url = url.decode('utf-8', 'replace')
4098 BOM_UTF8 = '\xef\xbb\xbf'
4099 if url.startswith(BOM_UTF8):
4100 url = url[len(BOM_UTF8):]
4101 url = url.strip()
4102 if url.startswith(('#', ';', ']')):
4103 return False
4104 return url
4105
4106 with contextlib.closing(batch_fd) as fd:
4107 return [url for url in map(fixup, fd) if url]
4108
4109
4110 def urlencode_postdata(*args, **kargs):
4111 return compat_urllib_parse_urlencode(*args, **kargs).encode('ascii')
4112
4113
4114 def update_url_query(url, query):
4115 if not query:
4116 return url
4117 parsed_url = compat_urlparse.urlparse(url)
4118 qs = compat_parse_qs(parsed_url.query)
4119 qs.update(query)
4120 return compat_urlparse.urlunparse(parsed_url._replace(
4121 query=compat_urllib_parse_urlencode(qs, True)))
4122
4123
4124 def update_Request(req, url=None, data=None, headers={}, query={}):
4125 req_headers = req.headers.copy()
4126 req_headers.update(headers)
4127 req_data = data or req.data
4128 req_url = update_url_query(url or req.get_full_url(), query)
4129 req_get_method = req.get_method()
4130 if req_get_method == 'HEAD':
4131 req_type = HEADRequest
4132 elif req_get_method == 'PUT':
4133 req_type = PUTRequest
4134 else:
4135 req_type = compat_urllib_request.Request
4136 new_req = req_type(
4137 req_url, data=req_data, headers=req_headers,
4138 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
4139 if hasattr(req, 'timeout'):
4140 new_req.timeout = req.timeout
4141 return new_req
4142
4143
4144 def _multipart_encode_impl(data, boundary):
4145 content_type = 'multipart/form-data; boundary=%s' % boundary
4146
4147 out = b''
4148 for k, v in data.items():
4149 out += b'--' + boundary.encode('ascii') + b'\r\n'
4150 if isinstance(k, compat_str):
4151 k = k.encode('utf-8')
4152 if isinstance(v, compat_str):
4153 v = v.encode('utf-8')
4154 # RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
4155 # suggests sending UTF-8 directly. Firefox sends UTF-8, too
4156 content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
4157 if boundary.encode('ascii') in content:
4158 raise ValueError('Boundary overlaps with data')
4159 out += content
4160
4161 out += b'--' + boundary.encode('ascii') + b'--\r\n'
4162
4163 return out, content_type
4164
4165
4166 def multipart_encode(data, boundary=None):
4167 '''
4168 Encode a dict to RFC 7578-compliant form-data
4169
4170 data:
4171 A dict where keys and values can be either Unicode or bytes-like
4172 objects.
4173 boundary:
4174 If specified a Unicode object, it's used as the boundary. Otherwise
4175 a random boundary is generated.
4176
4177 Reference: https://tools.ietf.org/html/rfc7578
4178 '''
4179 has_specified_boundary = boundary is not None
4180
4181 while True:
4182 if boundary is None:
4183 boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
4184
4185 try:
4186 out, content_type = _multipart_encode_impl(data, boundary)
4187 break
4188 except ValueError:
4189 if has_specified_boundary:
4190 raise
4191 boundary = None
4192
4193 return out, content_type
4194
4195
4196 def variadic(x, allowed_types=(compat_str, bytes, dict)):
4197 return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,)
4198
4199
4200 def dict_get(d, key_or_keys, default=None, skip_false_values=True):
4201 if isinstance(key_or_keys, (list, tuple)):
4202 for key in key_or_keys:
4203 if key not in d or d[key] is None or skip_false_values and not d[key]:
4204 continue
4205 return d[key]
4206 return default
4207 return d.get(key_or_keys, default)
4208
4209
4210 def try_call(*funcs, **kwargs):
4211
4212 # parameter defaults
4213 expected_type = kwargs.get('expected_type')
4214 fargs = kwargs.get('args', [])
4215 fkwargs = kwargs.get('kwargs', {})
4216
4217 for f in funcs:
4218 try:
4219 val = f(*fargs, **fkwargs)
4220 except (AttributeError, KeyError, TypeError, IndexError, ZeroDivisionError):
4221 pass
4222 else:
4223 if expected_type is None or isinstance(val, expected_type):
4224 return val
4225
4226
4227 def try_get(src, getter, expected_type=None):
4228 if not isinstance(getter, (list, tuple)):
4229 getter = [getter]
4230 for get in getter:
4231 try:
4232 v = get(src)
4233 except (AttributeError, KeyError, TypeError, IndexError):
4234 pass
4235 else:
4236 if expected_type is None or isinstance(v, expected_type):
4237 return v
4238
4239
4240 def merge_dicts(*dicts):
4241 merged = {}
4242 for a_dict in dicts:
4243 for k, v in a_dict.items():
4244 if v is None:
4245 continue
4246 if (k not in merged
4247 or (isinstance(v, compat_str) and v
4248 and isinstance(merged[k], compat_str)
4249 and not merged[k])):
4250 merged[k] = v
4251 return merged
4252
4253
4254 def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
4255 return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
4256
4257
4258 US_RATINGS = {
4259 'G': 0,
4260 'PG': 10,
4261 'PG-13': 13,
4262 'R': 16,
4263 'NC': 18,
4264 }
4265
4266
4267 TV_PARENTAL_GUIDELINES = {
4268 'TV-Y': 0,
4269 'TV-Y7': 7,
4270 'TV-G': 0,
4271 'TV-PG': 0,
4272 'TV-14': 14,
4273 'TV-MA': 17,
4274 }
4275
4276
4277 def parse_age_limit(s):
4278 if type(s) == int:
4279 return s if 0 <= s <= 21 else None
4280 if not isinstance(s, compat_basestring):
4281 return None
4282 m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
4283 if m:
4284 return int(m.group('age'))
4285 if s in US_RATINGS:
4286 return US_RATINGS[s]
4287 m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
4288 if m:
4289 return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
4290 return None
4291
4292
4293 def strip_jsonp(code):
4294 return re.sub(
4295 r'''(?sx)^
4296 (?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
4297 (?:\s*&&\s*(?P=func_name))?
4298 \s*\(\s*(?P<callback_data>.*)\);?
4299 \s*?(?://[^\n]*)*$''',
4300 r'\g<callback_data>', code)
4301
4302
4303 def js_to_json(code):
4304 COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*'
4305 SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE)
4306 INTEGER_TABLE = (
4307 (r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
4308 (r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
4309 )
4310
4311 def fix_kv(m):
4312 v = m.group(0)
4313 if v in ('true', 'false', 'null'):
4314 return v
4315 elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
4316 return ""
4317
4318 if v[0] in ("'", '"'):
4319 v = re.sub(r'(?s)\\.|"', lambda m: {
4320 '"': '\\"',
4321 "\\'": "'",
4322 '\\\n': '',
4323 '\\x': '\\u00',
4324 }.get(m.group(0), m.group(0)), v[1:-1])
4325 else:
4326 for regex, base in INTEGER_TABLE:
4327 im = re.match(regex, v)
4328 if im:
4329 i = int(im.group(1), base)
4330 return '"%d":' % i if v.endswith(':') else '%d' % i
4331
4332 return '"%s"' % v
4333
4334 return re.sub(r'''(?sx)
4335 "(?:[^"\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^"\\]*"|
4336 '(?:[^'\\]*(?:\\\\|\\['"nurtbfx/\n]))*[^'\\]*'|
4337 {comment}|,(?={skip}[\]}}])|
4338 (?:(?<![0-9])[eE]|[a-df-zA-DF-Z_])[.a-zA-Z_0-9]*|
4339 \b(?:0[xX][0-9a-fA-F]+|0+[0-7]+)(?:{skip}:)?|
4340 [0-9]+(?={skip}:)|
4341 !+
4342 '''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
4343
4344
4345 def qualities(quality_ids):
4346 """ Get a numeric quality value out of a list of possible values """
4347 def q(qid):
4348 try:
4349 return quality_ids.index(qid)
4350 except ValueError:
4351 return -1
4352 return q
4353
4354
4355 DEFAULT_OUTTMPL = '%(title)s-%(id)s.%(ext)s'
4356
4357
4358 def limit_length(s, length):
4359 """ Add ellipses to overly long strings """
4360 if s is None:
4361 return None
4362 ELLIPSES = '...'
4363 if len(s) > length:
4364 return s[:length - len(ELLIPSES)] + ELLIPSES
4365 return s
4366
4367
4368 def version_tuple(v):
4369 return tuple(int(e) for e in re.split(r'[-.]', v))
4370
4371
4372 def is_outdated_version(version, limit, assume_new=True):
4373 if not version:
4374 return not assume_new
4375 try:
4376 return version_tuple(version) < version_tuple(limit)
4377 except ValueError:
4378 return not assume_new
4379
4380
4381 def ytdl_is_updateable():
4382 """ Returns if youtube-dl can be updated with -U """
4383 from zipimport import zipimporter
4384
4385 return isinstance(globals().get('__loader__'), zipimporter) or hasattr(sys, 'frozen')
4386
4387
4388 def args_to_str(args):
4389 # Get a short string representation for a subprocess command
4390 return ' '.join(compat_shlex_quote(a) for a in args)
4391
4392
4393 def error_to_compat_str(err):
4394 err_str = str(err)
4395 # On python 2 error byte string must be decoded with proper
4396 # encoding rather than ascii
4397 if sys.version_info[0] < 3:
4398 err_str = err_str.decode(preferredencoding())
4399 return err_str
4400
4401
4402 def mimetype2ext(mt):
4403 if mt is None:
4404 return None
4405
4406 ext = {
4407 'audio/mp4': 'm4a',
4408 # Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
4409 # it's the most popular one
4410 'audio/mpeg': 'mp3',
4411 }.get(mt)
4412 if ext is not None:
4413 return ext
4414
4415 _, _, res = mt.rpartition('/')
4416 res = res.split(';')[0].strip().lower()
4417
4418 return {
4419 '3gpp': '3gp',
4420 'smptett+xml': 'tt',
4421 'ttaf+xml': 'dfxp',
4422 'ttml+xml': 'ttml',
4423 'x-flv': 'flv',
4424 'x-mp4-fragmented': 'mp4',
4425 'x-ms-sami': 'sami',
4426 'x-ms-wmv': 'wmv',
4427 'mpegurl': 'm3u8',
4428 'x-mpegurl': 'm3u8',
4429 'vnd.apple.mpegurl': 'm3u8',
4430 'dash+xml': 'mpd',
4431 'f4m+xml': 'f4m',
4432 'hds+xml': 'f4m',
4433 'vnd.ms-sstr+xml': 'ism',
4434 'quicktime': 'mov',
4435 'mp2t': 'ts',
4436 'x-wav': 'wav',
4437 }.get(res, res)
4438
4439
4440 def parse_codecs(codecs_str):
4441 # http://tools.ietf.org/html/rfc6381
4442 if not codecs_str:
4443 return {}
4444 split_codecs = list(filter(None, map(
4445 lambda str: str.strip(), codecs_str.strip().strip(',').split(','))))
4446 vcodec, acodec = None, None
4447 for full_codec in split_codecs:
4448 codec = full_codec.split('.')[0]
4449 if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2', 'h263', 'h264', 'mp4v', 'hvc1', 'av01', 'theora'):
4450 if not vcodec:
4451 vcodec = full_codec
4452 elif codec in ('mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
4453 if not acodec:
4454 acodec = full_codec
4455 else:
4456 write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
4457 if not vcodec and not acodec:
4458 if len(split_codecs) == 2:
4459 return {
4460 'vcodec': split_codecs[0],
4461 'acodec': split_codecs[1],
4462 }
4463 else:
4464 return {
4465 'vcodec': vcodec or 'none',
4466 'acodec': acodec or 'none',
4467 }
4468 return {}
4469
4470
4471 def urlhandle_detect_ext(url_handle):
4472 getheader = url_handle.headers.get
4473
4474 cd = getheader('Content-Disposition')
4475 if cd:
4476 m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
4477 if m:
4478 e = determine_ext(m.group('filename'), default_ext=None)
4479 if e:
4480 return e
4481
4482 return mimetype2ext(getheader('Content-Type'))
4483
4484
4485 def encode_data_uri(data, mime_type):
4486 return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
4487
4488
4489 def age_restricted(content_limit, age_limit):
4490 """ Returns True iff the content should be blocked """
4491
4492 if age_limit is None: # No limit set
4493 return False
4494 if content_limit is None:
4495 return False # Content available for everyone
4496 return age_limit < content_limit
4497
4498
4499 def is_html(first_bytes):
4500 """ Detect whether a file contains HTML by examining its first bytes. """
4501
4502 BOMS = [
4503 (b'\xef\xbb\xbf', 'utf-8'),
4504 (b'\x00\x00\xfe\xff', 'utf-32-be'),
4505 (b'\xff\xfe\x00\x00', 'utf-32-le'),
4506 (b'\xff\xfe', 'utf-16-le'),
4507 (b'\xfe\xff', 'utf-16-be'),
4508 ]
4509 for bom, enc in BOMS:
4510 if first_bytes.startswith(bom):
4511 s = first_bytes[len(bom):].decode(enc, 'replace')
4512 break
4513 else:
4514 s = first_bytes.decode('utf-8', 'replace')
4515
4516 return re.match(r'^\s*<', s)
4517
4518
4519 def determine_protocol(info_dict):
4520 protocol = info_dict.get('protocol')
4521 if protocol is not None:
4522 return protocol
4523
4524 url = info_dict['url']
4525 if url.startswith('rtmp'):
4526 return 'rtmp'
4527 elif url.startswith('mms'):
4528 return 'mms'
4529 elif url.startswith('rtsp'):
4530 return 'rtsp'
4531
4532 ext = determine_ext(url)
4533 if ext == 'm3u8':
4534 return 'm3u8'
4535 elif ext == 'f4m':
4536 return 'f4m'
4537
4538 return compat_urllib_parse_urlparse(url).scheme
4539
4540
4541 def render_table(header_row, data):
4542 """ Render a list of rows, each as a list of values """
4543 table = [header_row] + data
4544 max_lens = [max(len(compat_str(v)) for v in col) for col in zip(*table)]
4545 format_str = ' '.join('%-' + compat_str(ml + 1) + 's' for ml in max_lens[:-1]) + '%s'
4546 return '\n'.join(format_str % tuple(row) for row in table)
4547
4548
4549 def _match_one(filter_part, dct):
4550 COMPARISON_OPERATORS = {
4551 '<': operator.lt,
4552 '<=': operator.le,
4553 '>': operator.gt,
4554 '>=': operator.ge,
4555 '=': operator.eq,
4556 '!=': operator.ne,
4557 }
4558 operator_rex = re.compile(r'''(?x)\s*
4559 (?P<key>[a-z_]+)
4560 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
4561 (?:
4562 (?P<intval>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)|
4563 (?P<quote>["\'])(?P<quotedstrval>(?:\\.|(?!(?P=quote)|\\).)+?)(?P=quote)|
4564 (?P<strval>(?![0-9.])[a-z0-9A-Z]*)
4565 )
4566 \s*$
4567 ''' % '|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
4568 m = operator_rex.search(filter_part)
4569 if m:
4570 op = COMPARISON_OPERATORS[m.group('op')]
4571 actual_value = dct.get(m.group('key'))
4572 if (m.group('quotedstrval') is not None
4573 or m.group('strval') is not None
4574 # If the original field is a string and matching comparisonvalue is
4575 # a number we should respect the origin of the original field
4576 # and process comparison value as a string (see
4577 # https://github.com/ytdl-org/youtube-dl/issues/11082).
4578 or actual_value is not None and m.group('intval') is not None
4579 and isinstance(actual_value, compat_str)):
4580 if m.group('op') not in ('=', '!='):
4581 raise ValueError(
4582 'Operator %s does not support string values!' % m.group('op'))
4583 comparison_value = m.group('quotedstrval') or m.group('strval') or m.group('intval')
4584 quote = m.group('quote')
4585 if quote is not None:
4586 comparison_value = comparison_value.replace(r'\%s' % quote, quote)
4587 else:
4588 try:
4589 comparison_value = int(m.group('intval'))
4590 except ValueError:
4591 comparison_value = parse_filesize(m.group('intval'))
4592 if comparison_value is None:
4593 comparison_value = parse_filesize(m.group('intval') + 'B')
4594 if comparison_value is None:
4595 raise ValueError(
4596 'Invalid integer value %r in filter part %r' % (
4597 m.group('intval'), filter_part))
4598 if actual_value is None:
4599 return m.group('none_inclusive')
4600 return op(actual_value, comparison_value)
4601
4602 UNARY_OPERATORS = {
4603 '': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
4604 '!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
4605 }
4606 operator_rex = re.compile(r'''(?x)\s*
4607 (?P<op>%s)\s*(?P<key>[a-z_]+)
4608 \s*$
4609 ''' % '|'.join(map(re.escape, UNARY_OPERATORS.keys())))
4610 m = operator_rex.search(filter_part)
4611 if m:
4612 op = UNARY_OPERATORS[m.group('op')]
4613 actual_value = dct.get(m.group('key'))
4614 return op(actual_value)
4615
4616 raise ValueError('Invalid filter part %r' % filter_part)
4617
4618
4619 def match_str(filter_str, dct):
4620 """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """
4621
4622 return all(
4623 _match_one(filter_part, dct) for filter_part in filter_str.split('&'))
4624
4625
4626 def match_filter_func(filter_str):
4627 def _match_func(info_dict):
4628 if match_str(filter_str, info_dict):
4629 return None
4630 else:
4631 video_title = info_dict.get('title', info_dict.get('id', 'video'))
4632 return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
4633 return _match_func
4634
4635
4636 def parse_dfxp_time_expr(time_expr):
4637 if not time_expr:
4638 return
4639
4640 mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
4641 if mobj:
4642 return float(mobj.group('time_offset'))
4643
4644 mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.|:)\d+)?)$', time_expr)
4645 if mobj:
4646 return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
4647
4648
4649 def srt_subtitles_timecode(seconds):
4650 return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
4651
4652
4653 def dfxp2srt(dfxp_data):
4654 '''
4655 @param dfxp_data A bytes-like object containing DFXP data
4656 @returns A unicode object containing converted SRT data
4657 '''
4658 LEGACY_NAMESPACES = (
4659 (b'http://www.w3.org/ns/ttml', [
4660 b'http://www.w3.org/2004/11/ttaf1',
4661 b'http://www.w3.org/2006/04/ttaf1',
4662 b'http://www.w3.org/2006/10/ttaf1',
4663 ]),
4664 (b'http://www.w3.org/ns/ttml#styling', [
4665 b'http://www.w3.org/ns/ttml#style',
4666 ]),
4667 )
4668
4669 SUPPORTED_STYLING = [
4670 'color',
4671 'fontFamily',
4672 'fontSize',
4673 'fontStyle',
4674 'fontWeight',
4675 'textDecoration'
4676 ]
4677
4678 _x = functools.partial(xpath_with_ns, ns_map={
4679 'xml': 'http://www.w3.org/XML/1998/namespace',
4680 'ttml': 'http://www.w3.org/ns/ttml',
4681 'tts': 'http://www.w3.org/ns/ttml#styling',
4682 })
4683
4684 styles = {}
4685 default_style = {}
4686
4687 class TTMLPElementParser(object):
4688 _out = ''
4689 _unclosed_elements = []
4690 _applied_styles = []
4691
4692 def start(self, tag, attrib):
4693 if tag in (_x('ttml:br'), 'br'):
4694 self._out += '\n'
4695 else:
4696 unclosed_elements = []
4697 style = {}
4698 element_style_id = attrib.get('style')
4699 if default_style:
4700 style.update(default_style)
4701 if element_style_id:
4702 style.update(styles.get(element_style_id, {}))
4703 for prop in SUPPORTED_STYLING:
4704 prop_val = attrib.get(_x('tts:' + prop))
4705 if prop_val:
4706 style[prop] = prop_val
4707 if style:
4708 font = ''
4709 for k, v in sorted(style.items()):
4710 if self._applied_styles and self._applied_styles[-1].get(k) == v:
4711 continue
4712 if k == 'color':
4713 font += ' color="%s"' % v
4714 elif k == 'fontSize':
4715 font += ' size="%s"' % v
4716 elif k == 'fontFamily':
4717 font += ' face="%s"' % v
4718 elif k == 'fontWeight' and v == 'bold':
4719 self._out += '<b>'
4720 unclosed_elements.append('b')
4721 elif k == 'fontStyle' and v == 'italic':
4722 self._out += '<i>'
4723 unclosed_elements.append('i')
4724 elif k == 'textDecoration' and v == 'underline':
4725 self._out += '<u>'
4726 unclosed_elements.append('u')
4727 if font:
4728 self._out += '<font' + font + '>'
4729 unclosed_elements.append('font')
4730 applied_style = {}
4731 if self._applied_styles:
4732 applied_style.update(self._applied_styles[-1])
4733 applied_style.update(style)
4734 self._applied_styles.append(applied_style)
4735 self._unclosed_elements.append(unclosed_elements)
4736
4737 def end(self, tag):
4738 if tag not in (_x('ttml:br'), 'br'):
4739 unclosed_elements = self._unclosed_elements.pop()
4740 for element in reversed(unclosed_elements):
4741 self._out += '</%s>' % element
4742 if unclosed_elements and self._applied_styles:
4743 self._applied_styles.pop()
4744
4745 def data(self, data):
4746 self._out += data
4747
4748 def close(self):
4749 return self._out.strip()
4750
4751 def parse_node(node):
4752 target = TTMLPElementParser()
4753 parser = xml.etree.ElementTree.XMLParser(target=target)
4754 parser.feed(xml.etree.ElementTree.tostring(node))
4755 return parser.close()
4756
4757 for k, v in LEGACY_NAMESPACES:
4758 for ns in v:
4759 dfxp_data = dfxp_data.replace(ns, k)
4760
4761 dfxp = compat_etree_fromstring(dfxp_data)
4762 out = []
4763 paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
4764
4765 if not paras:
4766 raise ValueError('Invalid dfxp/TTML subtitle')
4767
4768 repeat = False
4769 while True:
4770 for style in dfxp.findall(_x('.//ttml:style')):
4771 style_id = style.get('id') or style.get(_x('xml:id'))
4772 if not style_id:
4773 continue
4774 parent_style_id = style.get('style')
4775 if parent_style_id:
4776 if parent_style_id not in styles:
4777 repeat = True
4778 continue
4779 styles[style_id] = styles[parent_style_id].copy()
4780 for prop in SUPPORTED_STYLING:
4781 prop_val = style.get(_x('tts:' + prop))
4782 if prop_val:
4783 styles.setdefault(style_id, {})[prop] = prop_val
4784 if repeat:
4785 repeat = False
4786 else:
4787 break
4788
4789 for p in ('body', 'div'):
4790 ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
4791 if ele is None:
4792 continue
4793 style = styles.get(ele.get('style'))
4794 if not style:
4795 continue
4796 default_style.update(style)
4797
4798 for para, index in zip(paras, itertools.count(1)):
4799 begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
4800 end_time = parse_dfxp_time_expr(para.attrib.get('end'))
4801 dur = parse_dfxp_time_expr(para.attrib.get('dur'))
4802 if begin_time is None:
4803 continue
4804 if not end_time:
4805 if not dur:
4806 continue
4807 end_time = begin_time + dur
4808 out.append('%d\n%s --> %s\n%s\n\n' % (
4809 index,
4810 srt_subtitles_timecode(begin_time),
4811 srt_subtitles_timecode(end_time),
4812 parse_node(para)))
4813
4814 return ''.join(out)
4815
4816
4817 def cli_option(params, command_option, param):
4818 param = params.get(param)
4819 if param:
4820 param = compat_str(param)
4821 return [command_option, param] if param is not None else []
4822
4823
4824 def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
4825 param = params.get(param)
4826 if param is None:
4827 return []
4828 assert isinstance(param, bool)
4829 if separator:
4830 return [command_option + separator + (true_value if param else false_value)]
4831 return [command_option, true_value if param else false_value]
4832
4833
4834 def cli_valueless_option(params, command_option, param, expected_value=True):
4835 param = params.get(param)
4836 return [command_option] if param == expected_value else []
4837
4838
4839 def cli_configuration_args(params, param, default=[]):
4840 ex_args = params.get(param)
4841 if ex_args is None:
4842 return default
4843 assert isinstance(ex_args, list)
4844 return ex_args
4845
4846
4847 class ISO639Utils(object):
4848 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
4849 _lang_map = {
4850 'aa': 'aar',
4851 'ab': 'abk',
4852 'ae': 'ave',
4853 'af': 'afr',
4854 'ak': 'aka',
4855 'am': 'amh',
4856 'an': 'arg',
4857 'ar': 'ara',
4858 'as': 'asm',
4859 'av': 'ava',
4860 'ay': 'aym',
4861 'az': 'aze',
4862 'ba': 'bak',
4863 'be': 'bel',
4864 'bg': 'bul',
4865 'bh': 'bih',
4866 'bi': 'bis',
4867 'bm': 'bam',
4868 'bn': 'ben',
4869 'bo': 'bod',
4870 'br': 'bre',
4871 'bs': 'bos',
4872 'ca': 'cat',
4873 'ce': 'che',
4874 'ch': 'cha',
4875 'co': 'cos',
4876 'cr': 'cre',
4877 'cs': 'ces',
4878 'cu': 'chu',
4879 'cv': 'chv',
4880 'cy': 'cym',
4881 'da': 'dan',
4882 'de': 'deu',
4883 'dv': 'div',
4884 'dz': 'dzo',
4885 'ee': 'ewe',
4886 'el': 'ell',
4887 'en': 'eng',
4888 'eo': 'epo',
4889 'es': 'spa',
4890 'et': 'est',
4891 'eu': 'eus',
4892 'fa': 'fas',
4893 'ff': 'ful',
4894 'fi': 'fin',
4895 'fj': 'fij',
4896 'fo': 'fao',
4897 'fr': 'fra',
4898 'fy': 'fry',
4899 'ga': 'gle',
4900 'gd': 'gla',
4901 'gl': 'glg',
4902 'gn': 'grn',
4903 'gu': 'guj',
4904 'gv': 'glv',
4905 'ha': 'hau',
4906 'he': 'heb',
4907 'iw': 'heb', # Replaced by he in 1989 revision
4908 'hi': 'hin',
4909 'ho': 'hmo',
4910 'hr': 'hrv',
4911 'ht': 'hat',
4912 'hu': 'hun',
4913 'hy': 'hye',
4914 'hz': 'her',
4915 'ia': 'ina',
4916 'id': 'ind',
4917 'in': 'ind', # Replaced by id in 1989 revision
4918 'ie': 'ile',
4919 'ig': 'ibo',
4920 'ii': 'iii',
4921 'ik': 'ipk',
4922 'io': 'ido',
4923 'is': 'isl',
4924 'it': 'ita',
4925 'iu': 'iku',
4926 'ja': 'jpn',
4927 'jv': 'jav',
4928 'ka': 'kat',
4929 'kg': 'kon',
4930 'ki': 'kik',
4931 'kj': 'kua',
4932 'kk': 'kaz',
4933 'kl': 'kal',
4934 'km': 'khm',
4935 'kn': 'kan',
4936 'ko': 'kor',
4937 'kr': 'kau',
4938 'ks': 'kas',
4939 'ku': 'kur',
4940 'kv': 'kom',
4941 'kw': 'cor',
4942 'ky': 'kir',
4943 'la': 'lat',
4944 'lb': 'ltz',
4945 'lg': 'lug',
4946 'li': 'lim',
4947 'ln': 'lin',
4948 'lo': 'lao',
4949 'lt': 'lit',
4950 'lu': 'lub',
4951 'lv': 'lav',
4952 'mg': 'mlg',
4953 'mh': 'mah',
4954 'mi': 'mri',
4955 'mk': 'mkd',
4956 'ml': 'mal',
4957 'mn': 'mon',
4958 'mr': 'mar',
4959 'ms': 'msa',
4960 'mt': 'mlt',
4961 'my': 'mya',
4962 'na': 'nau',
4963 'nb': 'nob',
4964 'nd': 'nde',
4965 'ne': 'nep',
4966 'ng': 'ndo',
4967 'nl': 'nld',
4968 'nn': 'nno',
4969 'no': 'nor',
4970 'nr': 'nbl',
4971 'nv': 'nav',
4972 'ny': 'nya',
4973 'oc': 'oci',
4974 'oj': 'oji',
4975 'om': 'orm',
4976 'or': 'ori',
4977 'os': 'oss',
4978 'pa': 'pan',
4979 'pi': 'pli',
4980 'pl': 'pol',
4981 'ps': 'pus',
4982 'pt': 'por',
4983 'qu': 'que',
4984 'rm': 'roh',
4985 'rn': 'run',
4986 'ro': 'ron',
4987 'ru': 'rus',
4988 'rw': 'kin',
4989 'sa': 'san',
4990 'sc': 'srd',
4991 'sd': 'snd',
4992 'se': 'sme',
4993 'sg': 'sag',
4994 'si': 'sin',
4995 'sk': 'slk',
4996 'sl': 'slv',
4997 'sm': 'smo',
4998 'sn': 'sna',
4999 'so': 'som',
5000 'sq': 'sqi',
5001 'sr': 'srp',
5002 'ss': 'ssw',
5003 'st': 'sot',
5004 'su': 'sun',
5005 'sv': 'swe',
5006 'sw': 'swa',
5007 'ta': 'tam',
5008 'te': 'tel',
5009 'tg': 'tgk',
5010 'th': 'tha',
5011 'ti': 'tir',
5012 'tk': 'tuk',
5013 'tl': 'tgl',
5014 'tn': 'tsn',
5015 'to': 'ton',
5016 'tr': 'tur',
5017 'ts': 'tso',
5018 'tt': 'tat',
5019 'tw': 'twi',
5020 'ty': 'tah',
5021 'ug': 'uig',
5022 'uk': 'ukr',
5023 'ur': 'urd',
5024 'uz': 'uzb',
5025 've': 'ven',
5026 'vi': 'vie',
5027 'vo': 'vol',
5028 'wa': 'wln',
5029 'wo': 'wol',
5030 'xh': 'xho',
5031 'yi': 'yid',
5032 'ji': 'yid', # Replaced by yi in 1989 revision
5033 'yo': 'yor',
5034 'za': 'zha',
5035 'zh': 'zho',
5036 'zu': 'zul',
5037 }
5038
5039 @classmethod
5040 def short2long(cls, code):
5041 """Convert language code from ISO 639-1 to ISO 639-2/T"""
5042 return cls._lang_map.get(code[:2])
5043
5044 @classmethod
5045 def long2short(cls, code):
5046 """Convert language code from ISO 639-2/T to ISO 639-1"""
5047 for short_name, long_name in cls._lang_map.items():
5048 if long_name == code:
5049 return short_name
5050
5051
5052 class ISO3166Utils(object):
5053 # From http://data.okfn.org/data/core/country-list
5054 _country_map = {
5055 'AF': 'Afghanistan',
5056 'AX': 'Åland Islands',
5057 'AL': 'Albania',
5058 'DZ': 'Algeria',
5059 'AS': 'American Samoa',
5060 'AD': 'Andorra',
5061 'AO': 'Angola',
5062 'AI': 'Anguilla',
5063 'AQ': 'Antarctica',
5064 'AG': 'Antigua and Barbuda',
5065 'AR': 'Argentina',
5066 'AM': 'Armenia',
5067 'AW': 'Aruba',
5068 'AU': 'Australia',
5069 'AT': 'Austria',
5070 'AZ': 'Azerbaijan',
5071 'BS': 'Bahamas',
5072 'BH': 'Bahrain',
5073 'BD': 'Bangladesh',
5074 'BB': 'Barbados',
5075 'BY': 'Belarus',
5076 'BE': 'Belgium',
5077 'BZ': 'Belize',
5078 'BJ': 'Benin',
5079 'BM': 'Bermuda',
5080 'BT': 'Bhutan',
5081 'BO': 'Bolivia, Plurinational State of',
5082 'BQ': 'Bonaire, Sint Eustatius and Saba',
5083 'BA': 'Bosnia and Herzegovina',
5084 'BW': 'Botswana',
5085 'BV': 'Bouvet Island',
5086 'BR': 'Brazil',
5087 'IO': 'British Indian Ocean Territory',
5088 'BN': 'Brunei Darussalam',
5089 'BG': 'Bulgaria',
5090 'BF': 'Burkina Faso',
5091 'BI': 'Burundi',
5092 'KH': 'Cambodia',
5093 'CM': 'Cameroon',
5094 'CA': 'Canada',
5095 'CV': 'Cape Verde',
5096 'KY': 'Cayman Islands',
5097 'CF': 'Central African Republic',
5098 'TD': 'Chad',
5099 'CL': 'Chile',
5100 'CN': 'China',
5101 'CX': 'Christmas Island',
5102 'CC': 'Cocos (Keeling) Islands',
5103 'CO': 'Colombia',
5104 'KM': 'Comoros',
5105 'CG': 'Congo',
5106 'CD': 'Congo, the Democratic Republic of the',
5107 'CK': 'Cook Islands',
5108 'CR': 'Costa Rica',
5109 'CI': 'Côte d\'Ivoire',
5110 'HR': 'Croatia',
5111 'CU': 'Cuba',
5112 'CW': 'Curaçao',
5113 'CY': 'Cyprus',
5114 'CZ': 'Czech Republic',
5115 'DK': 'Denmark',
5116 'DJ': 'Djibouti',
5117 'DM': 'Dominica',
5118 'DO': 'Dominican Republic',
5119 'EC': 'Ecuador',
5120 'EG': 'Egypt',
5121 'SV': 'El Salvador',
5122 'GQ': 'Equatorial Guinea',
5123 'ER': 'Eritrea',
5124 'EE': 'Estonia',
5125 'ET': 'Ethiopia',
5126 'FK': 'Falkland Islands (Malvinas)',
5127 'FO': 'Faroe Islands',
5128 'FJ': 'Fiji',
5129 'FI': 'Finland',
5130 'FR': 'France',
5131 'GF': 'French Guiana',
5132 'PF': 'French Polynesia',
5133 'TF': 'French Southern Territories',
5134 'GA': 'Gabon',
5135 'GM': 'Gambia',
5136 'GE': 'Georgia',
5137 'DE': 'Germany',
5138 'GH': 'Ghana',
5139 'GI': 'Gibraltar',
5140 'GR': 'Greece',
5141 'GL': 'Greenland',
5142 'GD': 'Grenada',
5143 'GP': 'Guadeloupe',
5144 'GU': 'Guam',
5145 'GT': 'Guatemala',
5146 'GG': 'Guernsey',
5147 'GN': 'Guinea',
5148 'GW': 'Guinea-Bissau',
5149 'GY': 'Guyana',
5150 'HT': 'Haiti',
5151 'HM': 'Heard Island and McDonald Islands',
5152 'VA': 'Holy See (Vatican City State)',
5153 'HN': 'Honduras',
5154 'HK': 'Hong Kong',
5155 'HU': 'Hungary',
5156 'IS': 'Iceland',
5157 'IN': 'India',
5158 'ID': 'Indonesia',
5159 'IR': 'Iran, Islamic Republic of',
5160 'IQ': 'Iraq',
5161 'IE': 'Ireland',
5162 'IM': 'Isle of Man',
5163 'IL': 'Israel',
5164 'IT': 'Italy',
5165 'JM': 'Jamaica',
5166 'JP': 'Japan',
5167 'JE': 'Jersey',
5168 'JO': 'Jordan',
5169 'KZ': 'Kazakhstan',
5170 'KE': 'Kenya',
5171 'KI': 'Kiribati',
5172 'KP': 'Korea, Democratic People\'s Republic of',
5173 'KR': 'Korea, Republic of',
5174 'KW': 'Kuwait',
5175 'KG': 'Kyrgyzstan',
5176 'LA': 'Lao People\'s Democratic Republic',
5177 'LV': 'Latvia',
5178 'LB': 'Lebanon',
5179 'LS': 'Lesotho',
5180 'LR': 'Liberia',
5181 'LY': 'Libya',
5182 'LI': 'Liechtenstein',
5183 'LT': 'Lithuania',
5184 'LU': 'Luxembourg',
5185 'MO': 'Macao',
5186 'MK': 'Macedonia, the Former Yugoslav Republic of',
5187 'MG': 'Madagascar',
5188 'MW': 'Malawi',
5189 'MY': 'Malaysia',
5190 'MV': 'Maldives',
5191 'ML': 'Mali',
5192 'MT': 'Malta',
5193 'MH': 'Marshall Islands',
5194 'MQ': 'Martinique',
5195 'MR': 'Mauritania',
5196 'MU': 'Mauritius',
5197 'YT': 'Mayotte',
5198 'MX': 'Mexico',
5199 'FM': 'Micronesia, Federated States of',
5200 'MD': 'Moldova, Republic of',
5201 'MC': 'Monaco',
5202 'MN': 'Mongolia',
5203 'ME': 'Montenegro',
5204 'MS': 'Montserrat',
5205 'MA': 'Morocco',
5206 'MZ': 'Mozambique',
5207 'MM': 'Myanmar',
5208 'NA': 'Namibia',
5209 'NR': 'Nauru',
5210 'NP': 'Nepal',
5211 'NL': 'Netherlands',
5212 'NC': 'New Caledonia',
5213 'NZ': 'New Zealand',
5214 'NI': 'Nicaragua',
5215 'NE': 'Niger',
5216 'NG': 'Nigeria',
5217 'NU': 'Niue',
5218 'NF': 'Norfolk Island',
5219 'MP': 'Northern Mariana Islands',
5220 'NO': 'Norway',
5221 'OM': 'Oman',
5222 'PK': 'Pakistan',
5223 'PW': 'Palau',
5224 'PS': 'Palestine, State of',
5225 'PA': 'Panama',
5226 'PG': 'Papua New Guinea',
5227 'PY': 'Paraguay',
5228 'PE': 'Peru',
5229 'PH': 'Philippines',
5230 'PN': 'Pitcairn',
5231 'PL': 'Poland',
5232 'PT': 'Portugal',
5233 'PR': 'Puerto Rico',
5234 'QA': 'Qatar',
5235 'RE': 'Réunion',
5236 'RO': 'Romania',
5237 'RU': 'Russian Federation',
5238 'RW': 'Rwanda',
5239 'BL': 'Saint Barthélemy',
5240 'SH': 'Saint Helena, Ascension and Tristan da Cunha',
5241 'KN': 'Saint Kitts and Nevis',
5242 'LC': 'Saint Lucia',
5243 'MF': 'Saint Martin (French part)',
5244 'PM': 'Saint Pierre and Miquelon',
5245 'VC': 'Saint Vincent and the Grenadines',
5246 'WS': 'Samoa',
5247 'SM': 'San Marino',
5248 'ST': 'Sao Tome and Principe',
5249 'SA': 'Saudi Arabia',
5250 'SN': 'Senegal',
5251 'RS': 'Serbia',
5252 'SC': 'Seychelles',
5253 'SL': 'Sierra Leone',
5254 'SG': 'Singapore',
5255 'SX': 'Sint Maarten (Dutch part)',
5256 'SK': 'Slovakia',
5257 'SI': 'Slovenia',
5258 'SB': 'Solomon Islands',
5259 'SO': 'Somalia',
5260 'ZA': 'South Africa',
5261 'GS': 'South Georgia and the South Sandwich Islands',
5262 'SS': 'South Sudan',
5263 'ES': 'Spain',
5264 'LK': 'Sri Lanka',
5265 'SD': 'Sudan',
5266 'SR': 'Suriname',
5267 'SJ': 'Svalbard and Jan Mayen',
5268 'SZ': 'Swaziland',
5269 'SE': 'Sweden',
5270 'CH': 'Switzerland',
5271 'SY': 'Syrian Arab Republic',
5272 'TW': 'Taiwan, Province of China',
5273 'TJ': 'Tajikistan',
5274 'TZ': 'Tanzania, United Republic of',
5275 'TH': 'Thailand',
5276 'TL': 'Timor-Leste',
5277 'TG': 'Togo',
5278 'TK': 'Tokelau',
5279 'TO': 'Tonga',
5280 'TT': 'Trinidad and Tobago',
5281 'TN': 'Tunisia',
5282 'TR': 'Turkey',
5283 'TM': 'Turkmenistan',
5284 'TC': 'Turks and Caicos Islands',
5285 'TV': 'Tuvalu',
5286 'UG': 'Uganda',
5287 'UA': 'Ukraine',
5288 'AE': 'United Arab Emirates',
5289 'GB': 'United Kingdom',
5290 'US': 'United States',
5291 'UM': 'United States Minor Outlying Islands',
5292 'UY': 'Uruguay',
5293 'UZ': 'Uzbekistan',
5294 'VU': 'Vanuatu',
5295 'VE': 'Venezuela, Bolivarian Republic of',
5296 'VN': 'Viet Nam',
5297 'VG': 'Virgin Islands, British',
5298 'VI': 'Virgin Islands, U.S.',
5299 'WF': 'Wallis and Futuna',
5300 'EH': 'Western Sahara',
5301 'YE': 'Yemen',
5302 'ZM': 'Zambia',
5303 'ZW': 'Zimbabwe',
5304 }
5305
5306 @classmethod
5307 def short2full(cls, code):
5308 """Convert an ISO 3166-2 country code to the corresponding full name"""
5309 return cls._country_map.get(code.upper())
5310
5311
5312 class GeoUtils(object):
5313 # Major IPv4 address blocks per country
5314 _country_ip_map = {
5315 'AD': '46.172.224.0/19',
5316 'AE': '94.200.0.0/13',
5317 'AF': '149.54.0.0/17',
5318 'AG': '209.59.64.0/18',
5319 'AI': '204.14.248.0/21',
5320 'AL': '46.99.0.0/16',
5321 'AM': '46.70.0.0/15',
5322 'AO': '105.168.0.0/13',
5323 'AP': '182.50.184.0/21',
5324 'AQ': '23.154.160.0/24',
5325 'AR': '181.0.0.0/12',
5326 'AS': '202.70.112.0/20',
5327 'AT': '77.116.0.0/14',
5328 'AU': '1.128.0.0/11',
5329 'AW': '181.41.0.0/18',
5330 'AX': '185.217.4.0/22',
5331 'AZ': '5.197.0.0/16',
5332 'BA': '31.176.128.0/17',
5333 'BB': '65.48.128.0/17',
5334 'BD': '114.130.0.0/16',
5335 'BE': '57.0.0.0/8',
5336 'BF': '102.178.0.0/15',
5337 'BG': '95.42.0.0/15',
5338 'BH': '37.131.0.0/17',
5339 'BI': '154.117.192.0/18',
5340 'BJ': '137.255.0.0/16',
5341 'BL': '185.212.72.0/23',
5342 'BM': '196.12.64.0/18',
5343 'BN': '156.31.0.0/16',
5344 'BO': '161.56.0.0/16',
5345 'BQ': '161.0.80.0/20',
5346 'BR': '191.128.0.0/12',
5347 'BS': '24.51.64.0/18',
5348 'BT': '119.2.96.0/19',
5349 'BW': '168.167.0.0/16',
5350 'BY': '178.120.0.0/13',
5351 'BZ': '179.42.192.0/18',
5352 'CA': '99.224.0.0/11',
5353 'CD': '41.243.0.0/16',
5354 'CF': '197.242.176.0/21',
5355 'CG': '160.113.0.0/16',
5356 'CH': '85.0.0.0/13',
5357 'CI': '102.136.0.0/14',
5358 'CK': '202.65.32.0/19',
5359 'CL': '152.172.0.0/14',
5360 'CM': '102.244.0.0/14',
5361 'CN': '36.128.0.0/10',
5362 'CO': '181.240.0.0/12',
5363 'CR': '201.192.0.0/12',
5364 'CU': '152.206.0.0/15',
5365 'CV': '165.90.96.0/19',
5366 'CW': '190.88.128.0/17',
5367 'CY': '31.153.0.0/16',
5368 'CZ': '88.100.0.0/14',
5369 'DE': '53.0.0.0/8',
5370 'DJ': '197.241.0.0/17',
5371 'DK': '87.48.0.0/12',
5372 'DM': '192.243.48.0/20',
5373 'DO': '152.166.0.0/15',
5374 'DZ': '41.96.0.0/12',
5375 'EC': '186.68.0.0/15',
5376 'EE': '90.190.0.0/15',
5377 'EG': '156.160.0.0/11',
5378 'ER': '196.200.96.0/20',
5379 'ES': '88.0.0.0/11',
5380 'ET': '196.188.0.0/14',
5381 'EU': '2.16.0.0/13',
5382 'FI': '91.152.0.0/13',
5383 'FJ': '144.120.0.0/16',
5384 'FK': '80.73.208.0/21',
5385 'FM': '119.252.112.0/20',
5386 'FO': '88.85.32.0/19',
5387 'FR': '90.0.0.0/9',
5388 'GA': '41.158.0.0/15',
5389 'GB': '25.0.0.0/8',
5390 'GD': '74.122.88.0/21',
5391 'GE': '31.146.0.0/16',
5392 'GF': '161.22.64.0/18',
5393 'GG': '62.68.160.0/19',
5394 'GH': '154.160.0.0/12',
5395 'GI': '95.164.0.0/16',
5396 'GL': '88.83.0.0/19',
5397 'GM': '160.182.0.0/15',
5398 'GN': '197.149.192.0/18',
5399 'GP': '104.250.0.0/19',
5400 'GQ': '105.235.224.0/20',
5401 'GR': '94.64.0.0/13',
5402 'GT': '168.234.0.0/16',
5403 'GU': '168.123.0.0/16',
5404 'GW': '197.214.80.0/20',
5405 'GY': '181.41.64.0/18',
5406 'HK': '113.252.0.0/14',
5407 'HN': '181.210.0.0/16',
5408 'HR': '93.136.0.0/13',
5409 'HT': '148.102.128.0/17',
5410 'HU': '84.0.0.0/14',
5411 'ID': '39.192.0.0/10',
5412 'IE': '87.32.0.0/12',
5413 'IL': '79.176.0.0/13',
5414 'IM': '5.62.80.0/20',
5415 'IN': '117.192.0.0/10',
5416 'IO': '203.83.48.0/21',
5417 'IQ': '37.236.0.0/14',
5418 'IR': '2.176.0.0/12',
5419 'IS': '82.221.0.0/16',
5420 'IT': '79.0.0.0/10',
5421 'JE': '87.244.64.0/18',
5422 'JM': '72.27.0.0/17',
5423 'JO': '176.29.0.0/16',
5424 'JP': '133.0.0.0/8',
5425 'KE': '105.48.0.0/12',
5426 'KG': '158.181.128.0/17',
5427 'KH': '36.37.128.0/17',
5428 'KI': '103.25.140.0/22',
5429 'KM': '197.255.224.0/20',
5430 'KN': '198.167.192.0/19',
5431 'KP': '175.45.176.0/22',
5432 'KR': '175.192.0.0/10',
5433 'KW': '37.36.0.0/14',
5434 'KY': '64.96.0.0/15',
5435 'KZ': '2.72.0.0/13',
5436 'LA': '115.84.64.0/18',
5437 'LB': '178.135.0.0/16',
5438 'LC': '24.92.144.0/20',
5439 'LI': '82.117.0.0/19',
5440 'LK': '112.134.0.0/15',
5441 'LR': '102.183.0.0/16',
5442 'LS': '129.232.0.0/17',
5443 'LT': '78.56.0.0/13',
5444 'LU': '188.42.0.0/16',
5445 'LV': '46.109.0.0/16',
5446 'LY': '41.252.0.0/14',
5447 'MA': '105.128.0.0/11',
5448 'MC': '88.209.64.0/18',
5449 'MD': '37.246.0.0/16',
5450 'ME': '178.175.0.0/17',
5451 'MF': '74.112.232.0/21',
5452 'MG': '154.126.0.0/17',
5453 'MH': '117.103.88.0/21',
5454 'MK': '77.28.0.0/15',
5455 'ML': '154.118.128.0/18',
5456 'MM': '37.111.0.0/17',
5457 'MN': '49.0.128.0/17',
5458 'MO': '60.246.0.0/16',
5459 'MP': '202.88.64.0/20',
5460 'MQ': '109.203.224.0/19',
5461 'MR': '41.188.64.0/18',
5462 'MS': '208.90.112.0/22',
5463 'MT': '46.11.0.0/16',
5464 'MU': '105.16.0.0/12',
5465 'MV': '27.114.128.0/18',
5466 'MW': '102.70.0.0/15',
5467 'MX': '187.192.0.0/11',
5468 'MY': '175.136.0.0/13',
5469 'MZ': '197.218.0.0/15',
5470 'NA': '41.182.0.0/16',
5471 'NC': '101.101.0.0/18',
5472 'NE': '197.214.0.0/18',
5473 'NF': '203.17.240.0/22',
5474 'NG': '105.112.0.0/12',
5475 'NI': '186.76.0.0/15',
5476 'NL': '145.96.0.0/11',
5477 'NO': '84.208.0.0/13',
5478 'NP': '36.252.0.0/15',
5479 'NR': '203.98.224.0/19',
5480 'NU': '49.156.48.0/22',
5481 'NZ': '49.224.0.0/14',
5482 'OM': '5.36.0.0/15',
5483 'PA': '186.72.0.0/15',
5484 'PE': '186.160.0.0/14',
5485 'PF': '123.50.64.0/18',
5486 'PG': '124.240.192.0/19',
5487 'PH': '49.144.0.0/13',
5488 'PK': '39.32.0.0/11',
5489 'PL': '83.0.0.0/11',
5490 'PM': '70.36.0.0/20',
5491 'PR': '66.50.0.0/16',
5492 'PS': '188.161.0.0/16',
5493 'PT': '85.240.0.0/13',
5494 'PW': '202.124.224.0/20',
5495 'PY': '181.120.0.0/14',
5496 'QA': '37.210.0.0/15',
5497 'RE': '102.35.0.0/16',
5498 'RO': '79.112.0.0/13',
5499 'RS': '93.86.0.0/15',
5500 'RU': '5.136.0.0/13',
5501 'RW': '41.186.0.0/16',
5502 'SA': '188.48.0.0/13',
5503 'SB': '202.1.160.0/19',
5504 'SC': '154.192.0.0/11',
5505 'SD': '102.120.0.0/13',
5506 'SE': '78.64.0.0/12',
5507 'SG': '8.128.0.0/10',
5508 'SI': '188.196.0.0/14',
5509 'SK': '78.98.0.0/15',
5510 'SL': '102.143.0.0/17',
5511 'SM': '89.186.32.0/19',
5512 'SN': '41.82.0.0/15',
5513 'SO': '154.115.192.0/18',
5514 'SR': '186.179.128.0/17',
5515 'SS': '105.235.208.0/21',
5516 'ST': '197.159.160.0/19',
5517 'SV': '168.243.0.0/16',
5518 'SX': '190.102.0.0/20',
5519 'SY': '5.0.0.0/16',
5520 'SZ': '41.84.224.0/19',
5521 'TC': '65.255.48.0/20',
5522 'TD': '154.68.128.0/19',
5523 'TG': '196.168.0.0/14',
5524 'TH': '171.96.0.0/13',
5525 'TJ': '85.9.128.0/18',
5526 'TK': '27.96.24.0/21',
5527 'TL': '180.189.160.0/20',
5528 'TM': '95.85.96.0/19',
5529 'TN': '197.0.0.0/11',
5530 'TO': '175.176.144.0/21',
5531 'TR': '78.160.0.0/11',
5532 'TT': '186.44.0.0/15',
5533 'TV': '202.2.96.0/19',
5534 'TW': '120.96.0.0/11',
5535 'TZ': '156.156.0.0/14',
5536 'UA': '37.52.0.0/14',
5537 'UG': '102.80.0.0/13',
5538 'US': '6.0.0.0/8',
5539 'UY': '167.56.0.0/13',
5540 'UZ': '84.54.64.0/18',
5541 'VA': '212.77.0.0/19',
5542 'VC': '207.191.240.0/21',
5543 'VE': '186.88.0.0/13',
5544 'VG': '66.81.192.0/20',
5545 'VI': '146.226.0.0/16',
5546 'VN': '14.160.0.0/11',
5547 'VU': '202.80.32.0/20',
5548 'WF': '117.20.32.0/21',
5549 'WS': '202.4.32.0/19',
5550 'YE': '134.35.0.0/16',
5551 'YT': '41.242.116.0/22',
5552 'ZA': '41.0.0.0/11',
5553 'ZM': '102.144.0.0/13',
5554 'ZW': '102.177.192.0/18',
5555 }
5556
5557 @classmethod
5558 def random_ipv4(cls, code_or_block):
5559 if len(code_or_block) == 2:
5560 block = cls._country_ip_map.get(code_or_block.upper())
5561 if not block:
5562 return None
5563 else:
5564 block = code_or_block
5565 addr, preflen = block.split('/')
5566 addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
5567 addr_max = addr_min | (0xffffffff >> int(preflen))
5568 return compat_str(socket.inet_ntoa(
5569 compat_struct_pack('!L', random.randint(addr_min, addr_max))))
5570
5571
5572 class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
5573 def __init__(self, proxies=None):
5574 # Set default handlers
5575 for type in ('http', 'https'):
5576 setattr(self, '%s_open' % type,
5577 lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
5578 meth(r, proxy, type))
5579 compat_urllib_request.ProxyHandler.__init__(self, proxies)
5580
5581 def proxy_open(self, req, proxy, type):
5582 req_proxy = req.headers.get('Ytdl-request-proxy')
5583 if req_proxy is not None:
5584 proxy = req_proxy
5585 del req.headers['Ytdl-request-proxy']
5586
5587 if proxy == '__noproxy__':
5588 return None # No Proxy
5589 if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
5590 req.add_header('Ytdl-socks-proxy', proxy)
5591 # youtube-dl's http/https handlers do wrapping the socket with socks
5592 return None
5593 return compat_urllib_request.ProxyHandler.proxy_open(
5594 self, req, proxy, type)
5595
5596
5597 # Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
5598 # released into Public Domain
5599 # https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
5600
5601 def long_to_bytes(n, blocksize=0):
5602 """long_to_bytes(n:long, blocksize:int) : string
5603 Convert a long integer to a byte string.
5604
5605 If optional blocksize is given and greater than zero, pad the front of the
5606 byte string with binary zeros so that the length is a multiple of
5607 blocksize.
5608 """
5609 # after much testing, this algorithm was deemed to be the fastest
5610 s = b''
5611 n = int(n)
5612 while n > 0:
5613 s = compat_struct_pack('>I', n & 0xffffffff) + s
5614 n = n >> 32
5615 # strip off leading zeros
5616 for i in range(len(s)):
5617 if s[i] != b'\000'[0]:
5618 break
5619 else:
5620 # only happens when n == 0
5621 s = b'\000'
5622 i = 0
5623 s = s[i:]
5624 # add back some pad bytes. this could be done more efficiently w.r.t. the
5625 # de-padding being done above, but sigh...
5626 if blocksize > 0 and len(s) % blocksize:
5627 s = (blocksize - len(s) % blocksize) * b'\000' + s
5628 return s
5629
5630
5631 def bytes_to_long(s):
5632 """bytes_to_long(string) : long
5633 Convert a byte string to a long integer.
5634
5635 This is (essentially) the inverse of long_to_bytes().
5636 """
5637 acc = 0
5638 length = len(s)
5639 if length % 4:
5640 extra = (4 - length % 4)
5641 s = b'\000' * extra + s
5642 length = length + extra
5643 for i in range(0, length, 4):
5644 acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
5645 return acc
5646
5647
5648 def ohdave_rsa_encrypt(data, exponent, modulus):
5649 '''
5650 Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
5651
5652 Input:
5653 data: data to encrypt, bytes-like object
5654 exponent, modulus: parameter e and N of RSA algorithm, both integer
5655 Output: hex string of encrypted data
5656
5657 Limitation: supports one block encryption only
5658 '''
5659
5660 payload = int(binascii.hexlify(data[::-1]), 16)
5661 encrypted = pow(payload, exponent, modulus)
5662 return '%x' % encrypted
5663
5664
5665 def pkcs1pad(data, length):
5666 """
5667 Padding input data with PKCS#1 scheme
5668
5669 @param {int[]} data input data
5670 @param {int} length target length
5671 @returns {int[]} padded data
5672 """
5673 if len(data) > length - 11:
5674 raise ValueError('Input data too long for PKCS#1 padding')
5675
5676 pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
5677 return [0, 2] + pseudo_random + [0] + data
5678
5679
5680 def encode_base_n(num, n, table=None):
5681 FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
5682 if not table:
5683 table = FULL_TABLE[:n]
5684
5685 if n > len(table):
5686 raise ValueError('base %d exceeds table length %d' % (n, len(table)))
5687
5688 if num == 0:
5689 return table[0]
5690
5691 ret = ''
5692 while num:
5693 ret = table[num % n] + ret
5694 num = num // n
5695 return ret
5696
5697
5698 def decode_packed_codes(code):
5699 mobj = re.search(PACKED_CODES_RE, code)
5700 obfuscated_code, base, count, symbols = mobj.groups()
5701 base = int(base)
5702 count = int(count)
5703 symbols = symbols.split('|')
5704 symbol_table = {}
5705
5706 while count:
5707 count -= 1
5708 base_n_count = encode_base_n(count, base)
5709 symbol_table[base_n_count] = symbols[count] or base_n_count
5710
5711 return re.sub(
5712 r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
5713 obfuscated_code)
5714
5715
5716 def caesar(s, alphabet, shift):
5717 if shift == 0:
5718 return s
5719 l = len(alphabet)
5720 return ''.join(
5721 alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
5722 for c in s)
5723
5724
5725 def rot47(s):
5726 return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~''', 47)
5727
5728
5729 def parse_m3u8_attributes(attrib):
5730 info = {}
5731 for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"|[^",]+)(?:,|$)', attrib):
5732 if val.startswith('"'):
5733 val = val[1:-1]
5734 info[key] = val
5735 return info
5736
5737
5738 def urshift(val, n):
5739 return val >> n if val >= 0 else (val + 0x100000000) >> n
5740
5741
5742 # Based on png2str() written by @gdkchan and improved by @yokrysty
5743 # Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
5744 def decode_png(png_data):
5745 # Reference: https://www.w3.org/TR/PNG/
5746 header = png_data[8:]
5747
5748 if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
5749 raise IOError('Not a valid PNG file.')
5750
5751 int_map = {1: '>B', 2: '>H', 4: '>I'}
5752 unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
5753
5754 chunks = []
5755
5756 while header:
5757 length = unpack_integer(header[:4])
5758 header = header[4:]
5759
5760 chunk_type = header[:4]
5761 header = header[4:]
5762
5763 chunk_data = header[:length]
5764 header = header[length:]
5765
5766 header = header[4:] # Skip CRC
5767
5768 chunks.append({
5769 'type': chunk_type,
5770 'length': length,
5771 'data': chunk_data
5772 })
5773
5774 ihdr = chunks[0]['data']
5775
5776 width = unpack_integer(ihdr[:4])
5777 height = unpack_integer(ihdr[4:8])
5778
5779 idat = b''
5780
5781 for chunk in chunks:
5782 if chunk['type'] == b'IDAT':
5783 idat += chunk['data']
5784
5785 if not idat:
5786 raise IOError('Unable to read PNG data.')
5787
5788 decompressed_data = bytearray(zlib.decompress(idat))
5789
5790 stride = width * 3
5791 pixels = []
5792
5793 def _get_pixel(idx):
5794 x = idx % stride
5795 y = idx // stride
5796 return pixels[y][x]
5797
5798 for y in range(height):
5799 basePos = y * (1 + stride)
5800 filter_type = decompressed_data[basePos]
5801
5802 current_row = []
5803
5804 pixels.append(current_row)
5805
5806 for x in range(stride):
5807 color = decompressed_data[1 + basePos + x]
5808 basex = y * stride + x
5809 left = 0
5810 up = 0
5811
5812 if x > 2:
5813 left = _get_pixel(basex - 3)
5814 if y > 0:
5815 up = _get_pixel(basex - stride)
5816
5817 if filter_type == 1: # Sub
5818 color = (color + left) & 0xff
5819 elif filter_type == 2: # Up
5820 color = (color + up) & 0xff
5821 elif filter_type == 3: # Average
5822 color = (color + ((left + up) >> 1)) & 0xff
5823 elif filter_type == 4: # Paeth
5824 a = left
5825 b = up
5826 c = 0
5827
5828 if x > 2 and y > 0:
5829 c = _get_pixel(basex - stride - 3)
5830
5831 p = a + b - c
5832
5833 pa = abs(p - a)
5834 pb = abs(p - b)
5835 pc = abs(p - c)
5836
5837 if pa <= pb and pa <= pc:
5838 color = (color + a) & 0xff
5839 elif pb <= pc:
5840 color = (color + b) & 0xff
5841 else:
5842 color = (color + c) & 0xff
5843
5844 current_row.append(color)
5845
5846 return width, height, pixels
5847
5848
5849 def write_xattr(path, key, value):
5850 # This mess below finds the best xattr tool for the job
5851 try:
5852 # try the pyxattr module...
5853 import xattr
5854
5855 if hasattr(xattr, 'set'): # pyxattr
5856 # Unicode arguments are not supported in python-pyxattr until
5857 # version 0.5.0
5858 # See https://github.com/ytdl-org/youtube-dl/issues/5498
5859 pyxattr_required_version = '0.5.0'
5860 if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
5861 # TODO: fallback to CLI tools
5862 raise XAttrUnavailableError(
5863 'python-pyxattr is detected but is too old. '
5864 'youtube-dl requires %s or above while your version is %s. '
5865 'Falling back to other xattr implementations' % (
5866 pyxattr_required_version, xattr.__version__))
5867
5868 setxattr = xattr.set
5869 else: # xattr
5870 setxattr = xattr.setxattr
5871
5872 try:
5873 setxattr(path, key, value)
5874 except EnvironmentError as e:
5875 raise XAttrMetadataError(e.errno, e.strerror)
5876
5877 except ImportError:
5878 if compat_os_name == 'nt':
5879 # Write xattrs to NTFS Alternate Data Streams:
5880 # http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
5881 assert ':' not in key
5882 assert os.path.exists(path)
5883
5884 ads_fn = path + ':' + key
5885 try:
5886 with open(ads_fn, 'wb') as f:
5887 f.write(value)
5888 except EnvironmentError as e:
5889 raise XAttrMetadataError(e.errno, e.strerror)
5890 else:
5891 user_has_setfattr = check_executable('setfattr', ['--version'])
5892 user_has_xattr = check_executable('xattr', ['-h'])
5893
5894 if user_has_setfattr or user_has_xattr:
5895
5896 value = value.decode('utf-8')
5897 if user_has_setfattr:
5898 executable = 'setfattr'
5899 opts = ['-n', key, '-v', value]
5900 elif user_has_xattr:
5901 executable = 'xattr'
5902 opts = ['-w', key, value]
5903
5904 cmd = ([encodeFilename(executable, True)]
5905 + [encodeArgument(o) for o in opts]
5906 + [encodeFilename(path, True)])
5907
5908 try:
5909 p = subprocess.Popen(
5910 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
5911 except EnvironmentError as e:
5912 raise XAttrMetadataError(e.errno, e.strerror)
5913 stdout, stderr = process_communicate_or_kill(p)
5914 stderr = stderr.decode('utf-8', 'replace')
5915 if p.returncode != 0:
5916 raise XAttrMetadataError(p.returncode, stderr)
5917
5918 else:
5919 # On Unix, and can't find pyxattr, setfattr, or xattr.
5920 if sys.platform.startswith('linux'):
5921 raise XAttrUnavailableError(
5922 "Couldn't find a tool to set the xattrs. "
5923 "Install either the python 'pyxattr' or 'xattr' "
5924 "modules, or the GNU 'attr' package "
5925 "(which contains the 'setfattr' tool).")
5926 else:
5927 raise XAttrUnavailableError(
5928 "Couldn't find a tool to set the xattrs. "
5929 "Install either the python 'xattr' module, "
5930 "or the 'xattr' binary.")
5931
5932
5933 def random_birthday(year_field, month_field, day_field):
5934 start_date = datetime.date(1950, 1, 1)
5935 end_date = datetime.date(1995, 12, 31)
5936 offset = random.randint(0, (end_date - start_date).days)
5937 random_date = start_date + datetime.timedelta(offset)
5938 return {
5939 year_field: str(random_date.year),
5940 month_field: str(random_date.month),
5941 day_field: str(random_date.day),
5942 }
5943
5944
5945 def clean_podcast_url(url):
5946 return re.sub(r'''(?x)
5947 (?:
5948 (?:
5949 chtbl\.com/track|
5950 media\.blubrry\.com| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
5951 play\.podtrac\.com
5952 )/[^/]+|
5953 (?:dts|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}| # http://analytics.podtrac.com/how-to-measure
5954 flex\.acast\.com|
5955 pd(?:
5956 cn\.co| # https://podcorn.com/analytics-prefix/
5957 st\.fm # https://podsights.com/docs/
5958 )/e
5959 )/''', '', url)
5960
5961
5962 def traverse_obj(obj, *paths, **kwargs):
5963 """
5964 Safely traverse nested `dict`s and `Sequence`s
5965
5966 >>> obj = [{}, {"key": "value"}]
5967 >>> traverse_obj(obj, (1, "key"))
5968 "value"
5969
5970 Each of the provided `paths` is tested and the first producing a valid result will be returned.
5971 The next path will also be tested if the path branched but no results could be found.
5972 Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
5973 A value of None is treated as the absence of a value.
5974
5975 The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
5976
5977 The keys in the path can be one of:
5978 - `None`: Return the current object.
5979 - `str`/`int`: Return `obj[key]`. For `re.Match, return `obj.group(key)`.
5980 - `slice`: Branch out and return all values in `obj[key]`.
5981 - `Ellipsis`: Branch out and return a list of all values.
5982 - `tuple`/`list`: Branch out and return a list of all matching values.
5983 Read as: `[traverse_obj(obj, branch) for branch in branches]`.
5984 - `function`: Branch out and return values filtered by the function.
5985 Read as: `[value for key, value in obj if function(key, value)]`.
5986 For `Sequence`s, `key` is the index of the value.
5987 - `dict` Transform the current object and return a matching dict.
5988 Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
5989
5990 `tuple`, `list`, and `dict` all support nested paths and branches.
5991
5992 @params paths Paths which to traverse by.
5993 Keyword arguments:
5994 @param default Value to return if the paths do not match.
5995 @param expected_type If a `type`, only accept final values of this type.
5996 If any other callable, try to call the function on each result.
5997 @param get_all If `False`, return the first matching result, otherwise all matching ones.
5998 @param casesense If `False`, consider string dictionary keys as case insensitive.
5999
6000 The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
6001
6002 @param _is_user_input Whether the keys are generated from user input.
6003 If `True` strings get converted to `int`/`slice` if needed.
6004 @param _traverse_string Whether to traverse into objects as strings.
6005 If `True`, any non-compatible object will first be
6006 converted into a string and then traversed into.
6007
6008
6009 @returns The result of the object traversal.
6010 If successful, `get_all=True`, and the path branches at least once,
6011 then a list of results is returned instead.
6012 A list is always returned if the last path branches and no `default` is given.
6013 """
6014
6015 # parameter defaults
6016 default = kwargs.get('default', NO_DEFAULT)
6017 expected_type = kwargs.get('expected_type')
6018 get_all = kwargs.get('get_all', True)
6019 casesense = kwargs.get('casesense', True)
6020 _is_user_input = kwargs.get('_is_user_input', False)
6021 _traverse_string = kwargs.get('_traverse_string', False)
6022
6023 # instant compat
6024 str = compat_str
6025
6026 is_sequence = lambda x: isinstance(x, compat_collections_abc.Sequence) and not isinstance(x, (str, bytes))
6027 # stand-in until compat_re_Match is added
6028 compat_re_Match = type(re.match('a', 'a'))
6029 # stand-in until casefold.py is added
6030 try:
6031 ''.casefold()
6032 compat_casefold = lambda s: s.casefold()
6033 except AttributeError:
6034 compat_casefold = lambda s: s.lower()
6035 casefold = lambda k: compat_casefold(k) if isinstance(k, str) else k
6036
6037 if isinstance(expected_type, type):
6038 type_test = lambda val: val if isinstance(val, expected_type) else None
6039 else:
6040 type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
6041
6042 def from_iterable(iterables):
6043 # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F
6044 for it in iterables:
6045 for item in it:
6046 yield item
6047
6048 def apply_key(key, obj):
6049 if obj is None:
6050 return
6051
6052 elif key is None:
6053 yield obj
6054
6055 elif isinstance(key, (list, tuple)):
6056 for branch in key:
6057 _, result = apply_path(obj, branch)
6058 for item in result:
6059 yield item
6060
6061 elif key is Ellipsis:
6062 result = []
6063 if isinstance(obj, compat_collections_abc.Mapping):
6064 result = obj.values()
6065 elif is_sequence(obj):
6066 result = obj
6067 elif isinstance(obj, compat_re_Match):
6068 result = obj.groups()
6069 elif _traverse_string:
6070 result = str(obj)
6071 for item in result:
6072 yield item
6073
6074 elif callable(key):
6075 if is_sequence(obj):
6076 iter_obj = enumerate(obj)
6077 elif isinstance(obj, compat_collections_abc.Mapping):
6078 iter_obj = obj.items()
6079 elif isinstance(obj, compat_re_Match):
6080 iter_obj = enumerate(itertools.chain([obj.group()], obj.groups()))
6081 elif _traverse_string:
6082 iter_obj = enumerate(str(obj))
6083 else:
6084 return
6085 for item in (v for k, v in iter_obj if try_call(key, args=(k, v))):
6086 yield item
6087
6088 elif isinstance(key, dict):
6089 iter_obj = ((k, _traverse_obj(obj, v)) for k, v in key.items())
6090 yield dict((k, v if v is not None else default) for k, v in iter_obj
6091 if v is not None or default is not NO_DEFAULT)
6092
6093 elif isinstance(obj, compat_collections_abc.Mapping):
6094 yield (obj.get(key) if casesense or (key in obj)
6095 else next((v for k, v in obj.items() if casefold(k) == key), None))
6096
6097 elif isinstance(obj, compat_re_Match):
6098 if isinstance(key, int) or casesense:
6099 try:
6100 yield obj.group(key)
6101 return
6102 except IndexError:
6103 pass
6104 if not isinstance(key, str):
6105 return
6106
6107 yield next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
6108
6109 else:
6110 if _is_user_input:
6111 key = (int_or_none(key) if ':' not in key
6112 else slice(*map(int_or_none, key.split(':'))))
6113
6114 if not isinstance(key, (int, slice)):
6115 return
6116
6117 if not is_sequence(obj):
6118 if not _traverse_string:
6119 return
6120 obj = str(obj)
6121
6122 try:
6123 yield obj[key]
6124 except IndexError:
6125 pass
6126
6127 def apply_path(start_obj, path):
6128 objs = (start_obj,)
6129 has_branched = False
6130
6131 for key in variadic(path):
6132 if _is_user_input and key == ':':
6133 key = Ellipsis
6134
6135 if not casesense and isinstance(key, str):
6136 key = compat_casefold(key)
6137
6138 if key is Ellipsis or isinstance(key, (list, tuple)) or callable(key):
6139 has_branched = True
6140
6141 key_func = functools.partial(apply_key, key)
6142 objs = from_iterable(map(key_func, objs))
6143
6144 return has_branched, objs
6145
6146 def _traverse_obj(obj, path, use_list=True):
6147 has_branched, results = apply_path(obj, path)
6148 results = LazyList(x for x in map(type_test, results) if x is not None)
6149
6150 if get_all and has_branched:
6151 return results.exhaust() if results or use_list else None
6152
6153 return results[0] if results else None
6154
6155 for index, path in enumerate(paths, 1):
6156 use_list = default is NO_DEFAULT and index == len(paths)
6157 result = _traverse_obj(obj, path, use_list)
6158 if result is not None:
6159 return result
6160
6161 return None if default is NO_DEFAULT else default
6162
6163
6164 def get_first(obj, keys, **kwargs):
6165 return traverse_obj(obj, (Ellipsis,) + tuple(variadic(keys)), get_all=False, **kwargs)
6166
6167
6168 def join_nonempty(*values, **kwargs):
6169
6170 # parameter defaults
6171 delim = kwargs.get('delim', '-')
6172 from_dict = kwargs.get('from_dict')
6173
6174 if from_dict is not None:
6175 values = (traverse_obj(from_dict, variadic(v)) for v in values)
6176 return delim.join(map(compat_str, filter(None, values)))
|