|  | @@ -0,0 +1,160 @@
 | 
	
		
			
			|  | 1 | +# -*- coding: utf-8 -*-
 | 
	
		
			
			|  | 2 | +from collections import defaultdict
 | 
	
		
			
			|  | 3 | +import mock
 | 
	
		
			
			|  | 4 | +from searx.engines import wikipedia
 | 
	
		
			
			|  | 5 | +from searx.testing import SearxTestCase
 | 
	
		
			
			|  | 6 | +
 | 
	
		
			
			|  | 7 | +
 | 
	
		
			
			|  | 8 | +class TestWikipediaEngine(SearxTestCase):
 | 
	
		
			
			|  | 9 | +
 | 
	
		
			
			|  | 10 | +    def test_request(self):
 | 
	
		
			
			|  | 11 | +        query = 'test_query'
 | 
	
		
			
			|  | 12 | +        dicto = defaultdict(dict)
 | 
	
		
			
			|  | 13 | +        dicto['language'] = 'fr_FR'
 | 
	
		
			
			|  | 14 | +        params = wikipedia.request(query, dicto)
 | 
	
		
			
			|  | 15 | +        self.assertIn('url', params)
 | 
	
		
			
			|  | 16 | +        self.assertIn(query, params['url'])
 | 
	
		
			
			|  | 17 | +        self.assertIn('test_query', params['url'])
 | 
	
		
			
			|  | 18 | +        self.assertIn('Test_Query', params['url'])
 | 
	
		
			
			|  | 19 | +        self.assertIn('fr.wikipedia.org', params['url'])
 | 
	
		
			
			|  | 20 | +
 | 
	
		
			
			|  | 21 | +        query = 'Test_Query'
 | 
	
		
			
			|  | 22 | +        params = wikipedia.request(query, dicto)
 | 
	
		
			
			|  | 23 | +        self.assertIn('Test_Query', params['url'])
 | 
	
		
			
			|  | 24 | +        self.assertNotIn('test_query', params['url'])
 | 
	
		
			
			|  | 25 | +
 | 
	
		
			
			|  | 26 | +        dicto['language'] = 'all'
 | 
	
		
			
			|  | 27 | +        params = wikipedia.request(query, dicto)
 | 
	
		
			
			|  | 28 | +        self.assertIn('en', params['url'])
 | 
	
		
			
			|  | 29 | +
 | 
	
		
			
			|  | 30 | +    def test_response(self):
 | 
	
		
			
			|  | 31 | +        dicto = defaultdict(dict)
 | 
	
		
			
			|  | 32 | +        dicto['language'] = 'fr'
 | 
	
		
			
			|  | 33 | +
 | 
	
		
			
			|  | 34 | +        self.assertRaises(AttributeError, wikipedia.response, None)
 | 
	
		
			
			|  | 35 | +        self.assertRaises(AttributeError, wikipedia.response, [])
 | 
	
		
			
			|  | 36 | +        self.assertRaises(AttributeError, wikipedia.response, '')
 | 
	
		
			
			|  | 37 | +        self.assertRaises(AttributeError, wikipedia.response, '[]')
 | 
	
		
			
			|  | 38 | +
 | 
	
		
			
			|  | 39 | +        # page not found
 | 
	
		
			
			|  | 40 | +        json = """
 | 
	
		
			
			|  | 41 | +        {
 | 
	
		
			
			|  | 42 | +            "batchcomplete": "",
 | 
	
		
			
			|  | 43 | +            "query": {
 | 
	
		
			
			|  | 44 | +                "normalized": [],
 | 
	
		
			
			|  | 45 | +                "pages": {
 | 
	
		
			
			|  | 46 | +                    "-1": {
 | 
	
		
			
			|  | 47 | +                        "ns": 0,
 | 
	
		
			
			|  | 48 | +                        "title": "",
 | 
	
		
			
			|  | 49 | +                        "missing": ""
 | 
	
		
			
			|  | 50 | +                    }
 | 
	
		
			
			|  | 51 | +                }
 | 
	
		
			
			|  | 52 | +            }
 | 
	
		
			
			|  | 53 | +        }"""
 | 
	
		
			
			|  | 54 | +        response = mock.Mock(content=json, search_params=dicto)
 | 
	
		
			
			|  | 55 | +        self.assertEqual(wikipedia.response(response), [])
 | 
	
		
			
			|  | 56 | +
 | 
	
		
			
			|  | 57 | +        # normal case
 | 
	
		
			
			|  | 58 | +        json = """
 | 
	
		
			
			|  | 59 | +        {
 | 
	
		
			
			|  | 60 | +            "batchcomplete": "",
 | 
	
		
			
			|  | 61 | +            "query": {
 | 
	
		
			
			|  | 62 | +                "normalized": [],
 | 
	
		
			
			|  | 63 | +                "pages": {
 | 
	
		
			
			|  | 64 | +                    "12345": {
 | 
	
		
			
			|  | 65 | +                        "pageid": 12345,
 | 
	
		
			
			|  | 66 | +                        "ns": 0,
 | 
	
		
			
			|  | 67 | +                        "title": "The Title",
 | 
	
		
			
			|  | 68 | +                        "extract": "The Title is...",
 | 
	
		
			
			|  | 69 | +                        "thumbnail": {
 | 
	
		
			
			|  | 70 | +                            "source": "img_src.jpg"
 | 
	
		
			
			|  | 71 | +                        },
 | 
	
		
			
			|  | 72 | +                        "pageimage": "img_name.jpg"
 | 
	
		
			
			|  | 73 | +                    }
 | 
	
		
			
			|  | 74 | +                }
 | 
	
		
			
			|  | 75 | +            }
 | 
	
		
			
			|  | 76 | +        }"""
 | 
	
		
			
			|  | 77 | +        response = mock.Mock(content=json, search_params=dicto)
 | 
	
		
			
			|  | 78 | +        results = wikipedia.response(response)
 | 
	
		
			
			|  | 79 | +        self.assertEqual(type(results), list)
 | 
	
		
			
			|  | 80 | +        self.assertEqual(len(results), 2)
 | 
	
		
			
			|  | 81 | +        self.assertEqual(results[0]['title'], u'The Title')
 | 
	
		
			
			|  | 82 | +        self.assertIn('fr.wikipedia.org/wiki/The_Title', results[0]['url'])
 | 
	
		
			
			|  | 83 | +        self.assertEqual(results[1]['infobox'], u'The Title')
 | 
	
		
			
			|  | 84 | +        self.assertIn('fr.wikipedia.org/wiki/The_Title', results[1]['id'])
 | 
	
		
			
			|  | 85 | +        self.assertIn('The Title is...', results[1]['content'])
 | 
	
		
			
			|  | 86 | +        self.assertEqual(results[1]['img_src'], 'img_src.jpg')
 | 
	
		
			
			|  | 87 | +
 | 
	
		
			
			|  | 88 | +        # disambiguation page
 | 
	
		
			
			|  | 89 | +        json = """
 | 
	
		
			
			|  | 90 | +        {
 | 
	
		
			
			|  | 91 | +            "batchcomplete": "",
 | 
	
		
			
			|  | 92 | +            "query": {
 | 
	
		
			
			|  | 93 | +                "normalized": [],
 | 
	
		
			
			|  | 94 | +                "pages": {
 | 
	
		
			
			|  | 95 | +                    "12345": {
 | 
	
		
			
			|  | 96 | +                        "pageid": 12345,
 | 
	
		
			
			|  | 97 | +                        "ns": 0,
 | 
	
		
			
			|  | 98 | +                        "title": "The Title",
 | 
	
		
			
			|  | 99 | +                        "extract": "The Title can be:\\nThe Title 1\\nThe Title 2\\nThe Title 3\\nThe Title 4......................................................................................................................................." """  # noqa
 | 
	
		
			
			|  | 100 | +        json += """
 | 
	
		
			
			|  | 101 | +                    }
 | 
	
		
			
			|  | 102 | +                }
 | 
	
		
			
			|  | 103 | +            }
 | 
	
		
			
			|  | 104 | +        }"""
 | 
	
		
			
			|  | 105 | +        response = mock.Mock(content=json, search_params=dicto)
 | 
	
		
			
			|  | 106 | +        results = wikipedia.response(response)
 | 
	
		
			
			|  | 107 | +        self.assertEqual(type(results), list)
 | 
	
		
			
			|  | 108 | +        self.assertEqual(len(results), 0)
 | 
	
		
			
			|  | 109 | +
 | 
	
		
			
			|  | 110 | +        # no image
 | 
	
		
			
			|  | 111 | +        json = """
 | 
	
		
			
			|  | 112 | +        {
 | 
	
		
			
			|  | 113 | +            "batchcomplete": "",
 | 
	
		
			
			|  | 114 | +            "query": {
 | 
	
		
			
			|  | 115 | +                "normalized": [],
 | 
	
		
			
			|  | 116 | +                "pages": {
 | 
	
		
			
			|  | 117 | +                    "12345": {
 | 
	
		
			
			|  | 118 | +                        "pageid": 12345,
 | 
	
		
			
			|  | 119 | +                        "ns": 0,
 | 
	
		
			
			|  | 120 | +                        "title": "The Title",
 | 
	
		
			
			|  | 121 | +                        "extract": "The Title is......................................................................................................................................................................................." """  # noqa
 | 
	
		
			
			|  | 122 | +        json += """
 | 
	
		
			
			|  | 123 | +                    }
 | 
	
		
			
			|  | 124 | +                }
 | 
	
		
			
			|  | 125 | +            }
 | 
	
		
			
			|  | 126 | +        }"""
 | 
	
		
			
			|  | 127 | +        response = mock.Mock(content=json, search_params=dicto)
 | 
	
		
			
			|  | 128 | +        results = wikipedia.response(response)
 | 
	
		
			
			|  | 129 | +        self.assertEqual(type(results), list)
 | 
	
		
			
			|  | 130 | +        self.assertEqual(len(results), 2)
 | 
	
		
			
			|  | 131 | +        self.assertIn('The Title is...', results[1]['content'])
 | 
	
		
			
			|  | 132 | +        self.assertEqual(results[1]['img_src'], None)
 | 
	
		
			
			|  | 133 | +
 | 
	
		
			
			|  | 134 | +        # title not in first paragraph
 | 
	
		
			
			|  | 135 | +        json = u"""
 | 
	
		
			
			|  | 136 | +        {
 | 
	
		
			
			|  | 137 | +            "batchcomplete": "",
 | 
	
		
			
			|  | 138 | +            "query": {
 | 
	
		
			
			|  | 139 | +                "normalized": [],
 | 
	
		
			
			|  | 140 | +                "pages": {
 | 
	
		
			
			|  | 141 | +                    "12345": {
 | 
	
		
			
			|  | 142 | +                        "pageid": 12345,
 | 
	
		
			
			|  | 143 | +                        "ns": 0,
 | 
	
		
			
			|  | 144 | +                        "title": "披頭四樂隊",
 | 
	
		
			
			|  | 145 | +                        "extract": "披头士乐队....................................................................................................................................................................................................\\n披頭四樂隊...", """  # noqa
 | 
	
		
			
			|  | 146 | +        json += """
 | 
	
		
			
			|  | 147 | +                        "thumbnail": {
 | 
	
		
			
			|  | 148 | +                            "source": "img_src.jpg"
 | 
	
		
			
			|  | 149 | +                        },
 | 
	
		
			
			|  | 150 | +                        "pageimage": "img_name.jpg"
 | 
	
		
			
			|  | 151 | +                    }
 | 
	
		
			
			|  | 152 | +                }
 | 
	
		
			
			|  | 153 | +            }
 | 
	
		
			
			|  | 154 | +        }"""
 | 
	
		
			
			|  | 155 | +        response = mock.Mock(content=json, search_params=dicto)
 | 
	
		
			
			|  | 156 | +        results = wikipedia.response(response)
 | 
	
		
			
			|  | 157 | +        self.assertEqual(type(results), list)
 | 
	
		
			
			|  | 158 | +        self.assertEqual(len(results), 2)
 | 
	
		
			
			|  | 159 | +        self.assertEqual(results[1]['infobox'], u'披頭四樂隊')
 | 
	
		
			
			|  | 160 | +        self.assertIn(u'披头士乐队...', results[1]['content'])
 |