summaryrefslogtreecommitdiff
path: root/test/test_InfoExtractor.py
blob: 437c7270ee6aeaa8eba588badfb3bf26d79ea37d (plain)
    1 #!/usr/bin/env python
    2 
    3 from __future__ import unicode_literals
    4 
    5 # Allow direct execution
    6 import os
    7 import sys
    8 import unittest
    9 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
   10 
   11 from test.helper import FakeYDL
   12 from youtube_dl.extractor.common import InfoExtractor
   13 from youtube_dl.extractor import YoutubeIE, get_info_extractor
   14 from youtube_dl.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
   15 
   16 
   17 class TestIE(InfoExtractor):
   18     pass
   19 
   20 
   21 class TestInfoExtractor(unittest.TestCase):
   22     def setUp(self):
   23         self.ie = TestIE(FakeYDL())
   24 
   25     def test_ie_key(self):
   26         self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
   27 
   28     def test_html_search_regex(self):
   29         html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>'
   30         search = lambda re, *args: self.ie._html_search_regex(re, html, *args)
   31         self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video')
   32 
   33     def test_opengraph(self):
   34         ie = self.ie
   35         html = '''
   36             <meta name="og:title" content='Foo'/>
   37             <meta content="Some video's description " name="og:description"/>
   38             <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&amp;key2=val2'/>
   39             <meta content='application/x-shockwave-flash' property='og:video:type'>
   40             <meta content='Foo' property=og:foobar>
   41             <meta name="og:test1" content='foo > < bar'/>
   42             <meta name="og:test2" content="foo >//< bar"/>
   43             '''
   44         self.assertEqual(ie._og_search_title(html), 'Foo')
   45         self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
   46         self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
   47         self.assertEqual(ie._og_search_video_url(html, default=None), None)
   48         self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
   49         self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
   50         self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
   51         self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar')
   52         self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True)
   53         self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True)
   54 
   55     def test_html_search_meta(self):
   56         ie = self.ie
   57         html = '''
   58             <meta name="a" content="1" />
   59             <meta name='b' content='2'>
   60             <meta name="c" content='3'>
   61             <meta name=d content='4'>
   62             <meta property="e" content='5' >
   63             <meta content="6" name="f">
   64         '''
   65 
   66         self.assertEqual(ie._html_search_meta('a', html), '1')
   67         self.assertEqual(ie._html_search_meta('b', html), '2')
   68         self.assertEqual(ie._html_search_meta('c', html), '3')
   69         self.assertEqual(ie._html_search_meta('d', html), '4')
   70         self.assertEqual(ie._html_search_meta('e', html), '5')
   71         self.assertEqual(ie._html_search_meta('f', html), '6')
   72         self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1')
   73         self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3')
   74         self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3')
   75         self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
   76         self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
   77 
   78     def test_download_json(self):
   79         uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
   80         self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
   81         uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript')
   82         self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'})
   83         uri = encode_data_uri(b'{"foo": invalid}', 'application/json')
   84         self.assertRaises(ExtractorError, self.ie._download_json, uri, None)
   85         self.assertEqual(self.ie._download_json(uri, None, fatal=False), None)
   86 
   87 
   88 if __name__ == '__main__':
   89     unittest.main()

Generated by cgit