test_google.py 7.3KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. # -*- coding: utf-8 -*-
  2. from collections import defaultdict
  3. import mock
  4. import lxml
  5. from searx.engines import google
  6. from searx.testing import SearxTestCase
  7. class TestGoogleEngine(SearxTestCase):
  8. def mock_response(self, text):
  9. response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1')
  10. response.search_params = mock.Mock()
  11. response.search_params.get = mock.Mock(return_value='www.google.com')
  12. return response
  13. def test_request(self):
  14. query = 'test_query'
  15. dicto = defaultdict(dict)
  16. dicto['pageno'] = 1
  17. dicto['language'] = 'fr_FR'
  18. params = google.request(query, dicto)
  19. self.assertIn('url', params)
  20. self.assertIn(query, params['url'])
  21. self.assertIn('google.fr', params['url'])
  22. self.assertNotIn('PREF', params['cookies'])
  23. self.assertIn('NID', params['cookies'])
  24. self.assertIn('fr', params['headers']['Accept-Language'])
  25. dicto['language'] = 'all'
  26. params = google.request(query, dicto)
  27. self.assertIn('google.com', params['url'])
  28. self.assertIn('en', params['headers']['Accept-Language'])
  29. self.assertIn('PREF', params['cookies'])
  30. self.assertIn('NID', params['cookies'])
  31. def test_response(self):
  32. self.assertRaises(AttributeError, google.response, None)
  33. self.assertRaises(AttributeError, google.response, [])
  34. self.assertRaises(AttributeError, google.response, '')
  35. self.assertRaises(AttributeError, google.response, '[]')
  36. response = self.mock_response('<html></html>')
  37. self.assertEqual(google.response(response), [])
  38. html = """
  39. <li class="g">
  40. <h3 class="r">
  41. <a href="http://this.should.be.the.link/">
  42. <b>This</b> is <b>the</b> title
  43. </a>
  44. </h3>
  45. <div class="s">
  46. <div class="kv" style="margin-bottom:2px">
  47. <cite>
  48. <b>test</b>.psychologies.com/
  49. </cite>
  50. <div class="_nBb">‎
  51. <div style="display:inline" onclick="google.sham(this);" aria-expanded="false"
  52. aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA">
  53. <span class="_O0">
  54. </span>
  55. </div>
  56. <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1">
  57. <ul>
  58. <li class="_Ykb">
  59. <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent
  60. .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/">
  61. En cache
  62. </a>
  63. </li>
  64. <li class="_Ykb">
  65. <a class="_Zkb" href="/search?safe=off&amp;q=related:test.psy.com/">
  66. Pages similaires
  67. </a>
  68. </li>
  69. </ul>
  70. </div>
  71. </div>
  72. </div>
  73. <span class="st">
  74. This should be the content.
  75. </span>
  76. <br>
  77. <div class="osl">‎
  78. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/">
  79. Test Personnalité
  80. </a> - ‎
  81. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/">
  82. Tests - Moi
  83. </a> - ‎
  84. <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple">
  85. Test Couple
  86. </a>
  87. - ‎
  88. <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour">
  89. Test Amour
  90. </a>
  91. </div>
  92. </div>
  93. </li>
  94. <li class="g">
  95. <h3 class="r">
  96. <a href="http://www.google.com/images?q=toto">
  97. <b>This</b>
  98. </a>
  99. </h3>
  100. </li>
  101. <li class="g">
  102. <h3 class="r">
  103. <a href="http://www.google.com/search?q=toto">
  104. <b>This</b> is
  105. </a>
  106. </h3>
  107. </li>
  108. <li class="g">
  109. <h3 class="r">
  110. <a href="€">
  111. <b>This</b> is <b>the</b>
  112. </a>
  113. </h3>
  114. </li>
  115. <li class="g">
  116. <h3 class="r">
  117. <a href="/url?q=url">
  118. <b>This</b> is <b>the</b>
  119. </a>
  120. </h3>
  121. </li>
  122. <p class="_Bmc" style="margin:3px 8px">
  123. <a href="/search?num=20&amp;safe=off&amp;q=t&amp;revid=1754833769&amp;sa=X&amp;ei=-&amp;ved=">
  124. suggestion <b>title</b>
  125. </a>
  126. </p>
  127. """
  128. response = self.mock_response(html)
  129. results = google.response(response)
  130. self.assertEqual(type(results), list)
  131. self.assertEqual(len(results), 2)
  132. self.assertEqual(results[0]['title'], 'This is the title')
  133. self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/')
  134. self.assertEqual(results[0]['content'], 'This should be the content.')
  135. self.assertEqual(results[1]['suggestion'], 'suggestion title')
  136. html = """
  137. <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO">
  138. </li>
  139. """
  140. response = self.mock_response(html)
  141. results = google.response(response)
  142. self.assertEqual(type(results), list)
  143. self.assertEqual(len(results), 0)
  144. response = mock.Mock(text='<html></html>', url='https://sorry.google.com')
  145. response.search_params = mock.Mock()
  146. response.search_params.get = mock.Mock(return_value='www.google.com')
  147. self.assertRaises(RuntimeWarning, google.response, response)
  148. response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect')
  149. response.search_params = mock.Mock()
  150. response.search_params.get = mock.Mock(return_value='www.google.com')
  151. self.assertRaises(RuntimeWarning, google.response, response)
  152. def test_parse_images(self):
  153. html = """
  154. <li>
  155. <div>
  156. <a href="http://www.google.com/url?q=http://this.is.the.url/">
  157. <img style="margin:3px 0;margin-right:6px;padding:0" height="90"
  158. src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0">
  159. </a>
  160. </div>
  161. </li>
  162. """
  163. dom = lxml.html.fromstring(html)
  164. results = google.parse_images(dom, 'www.google.com')
  165. self.assertEqual(type(results), list)
  166. self.assertEqual(len(results), 1)
  167. self.assertEqual(results[0]['url'], 'http://this.is.the.url/')
  168. self.assertEqual(results[0]['title'], '')
  169. self.assertEqual(results[0]['content'], '')
  170. self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg')