|
@@ -1,6 +1,7 @@
|
1
|
1
|
import re
|
2
|
2
|
from urlparse import urljoin
|
3
|
3
|
from lxml import html
|
|
4
|
+from cgi import escape
|
4
|
5
|
from searx.engines.xpath import extract_text
|
5
|
6
|
from searx.languages import language_codes
|
6
|
7
|
|
|
@@ -12,6 +13,19 @@ parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.+)', re.I)
|
12
|
13
|
results_xpath = './/table[@id="r"]/tr'
|
13
|
14
|
|
14
|
15
|
|
|
16
|
+def is_valid_lang(lang):
|
|
17
|
+ is_abbr = (len(lang) == 2)
|
|
18
|
+ if is_abbr:
|
|
19
|
+ for l in language_codes:
|
|
20
|
+ if l[0][:2] == lang.lower():
|
|
21
|
+ return (True, l[1].lower())
|
|
22
|
+ return False
|
|
23
|
+ else:
|
|
24
|
+ for l in language_codes:
|
|
25
|
+ if l[1].lower() == lang.lower():
|
|
26
|
+ return (True, l[1].lower())
|
|
27
|
+ return False
|
|
28
|
+
|
15
|
29
|
def request(query, params):
|
16
|
30
|
m = parser_re.match(unicode(query, 'utf8'))
|
17
|
31
|
if not m:
|
|
@@ -19,28 +33,15 @@ def request(query, params):
|
19
|
33
|
|
20
|
34
|
from_lang, to_lang, query = m.groups()
|
21
|
35
|
|
22
|
|
- if len(from_lang) == 2:
|
23
|
|
- lan = filter(lambda x: x[0][:2] == from_lang, language_codes)
|
24
|
|
- if lan:
|
25
|
|
- from_lang = lan[0][1].lower()
|
26
|
|
- else:
|
27
|
|
- return params
|
28
|
|
- elif from_lang.lower() not in [x[1].lower() for x in language_codes]:
|
29
|
|
- return params
|
30
|
|
-
|
|
36
|
+ from_lang = is_valid_lang(from_lang)
|
|
37
|
+ to_lang = is_valid_lang(to_lang)
|
31
|
38
|
|
32
|
|
- if len(to_lang) == 2:
|
33
|
|
- lan = filter(lambda x: x[0][:2] == to_lang, language_codes)
|
34
|
|
- if lan:
|
35
|
|
- to_lang = lan[0][1].lower()
|
36
|
|
- else:
|
37
|
|
- return params
|
38
|
|
- elif to_lang.lower() not in [x[1].lower() for x in language_codes]:
|
|
39
|
+ if not from_lang or not to_lang:
|
39
|
40
|
return params
|
40
|
41
|
|
41
|
|
- params['url'] = url.format(from_lang=from_lang, to_lang=to_lang,query=query)
|
42
|
|
- params['from_lang'] = from_lang
|
43
|
|
- params['to_lang'] = to_lang
|
|
42
|
+ params['url'] = url.format(from_lang=from_lang[1], to_lang=to_lang[1],query=query)
|
|
43
|
+ params['from_lang'] = from_lang[1]
|
|
44
|
+ params['to_lang'] = to_lang[1]
|
44
|
45
|
params['query'] = query
|
45
|
46
|
|
46
|
47
|
return params
|
|
@@ -64,8 +65,8 @@ def response(resp):
|
64
|
65
|
|
65
|
66
|
results.append({
|
66
|
67
|
'url': urljoin(resp.url, '?%d' % k),
|
67
|
|
- 'title': from_result.text_content(),
|
68
|
|
- 'content': '; '.join(to_results)
|
|
68
|
+ 'title': escape(from_result.text_content()),
|
|
69
|
+ 'content': escape('; '.join(to_results))
|
69
|
70
|
})
|
70
|
71
|
|
71
|
72
|
return results
|