Bläddra i källkod

Merge remote-tracking branch 'asciimoo/master'

Thomas Pointhuber 11 år sedan
förälder
incheckning
07f83cab22
4 ändrade filer med 8 tillägg och 5 borttagningar
  1. 1
    1
      searx/engines/google_news.py
  2. 1
    1
      searx/engines/yahoo.py
  3. 1
    1
      searx/engines/yahoo_news.py
  4. 5
    2
      searx/utils.py

+ 1
- 1
searx/engines/google_news.py Visa fil

6
 categories = ['news']
6
 categories = ['news']
7
 
7
 
8
 url = 'https://ajax.googleapis.com/'
8
 url = 'https://ajax.googleapis.com/'
9
-search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
9
+search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}'  # noqa
10
 
10
 
11
 paging = True
11
 paging = True
12
 language_support = True
12
 language_support = True

+ 1
- 1
searx/engines/yahoo.py Visa fil

35
 
35
 
36
     for result in dom.xpath(results_xpath):
36
     for result in dom.xpath(results_xpath):
37
         url_string = extract_url(result.xpath(url_xpath), search_url)
37
         url_string = extract_url(result.xpath(url_xpath), search_url)
38
-        start = url_string.find('/RU=')+4
38
+        start = url_string.find('http', url_string.find('/RU=')+1)
39
         end = url_string.rfind('/RS')
39
         end = url_string.rfind('/RS')
40
         url = unquote(url_string[start:end])
40
         url = unquote(url_string[start:end])
41
         title = extract_text(result.xpath(title_xpath)[0])
41
         title = extract_text(result.xpath(title_xpath)[0])

+ 1
- 1
searx/engines/yahoo_news.py Visa fil

35
 
35
 
36
     for result in dom.xpath(results_xpath):
36
     for result in dom.xpath(results_xpath):
37
         url_string = extract_url(result.xpath(url_xpath), search_url)
37
         url_string = extract_url(result.xpath(url_xpath), search_url)
38
-        start = url_string.find('/RU=')+4
38
+        start = url_string.find('http', url_string.find('/RU=')+1)
39
         end = url_string.rfind('/RS')
39
         end = url_string.rfind('/RS')
40
         url = unquote(url_string[start:end])
40
         url = unquote(url_string[start:end])
41
         title = extract_text(result.xpath(title_xpath)[0])
41
         title = extract_text(result.xpath(title_xpath)[0])

+ 5
- 2
searx/utils.py Visa fil

4
 from codecs import getincrementalencoder
4
 from codecs import getincrementalencoder
5
 import cStringIO
5
 import cStringIO
6
 import re
6
 import re
7
+from random import choice
7
 
8
 
9
+ua_versions = ('26.0', '27.0', '28.0')
10
+ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
11
+ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
8
 
12
 
9
 def gen_useragent():
13
 def gen_useragent():
10
     # TODO
14
     # TODO
11
-    ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
12
-    return ua
15
+    return ua.format(os=choice(ua_os), version=choice(ua_versions))
13
 
16
 
14
 
17
 
15
 def highlight_content(content, query):
18
 def highlight_content(content, query):