Bläddra i källkod

Merge pull request #678 from potato/master

[engine] dictzone + mymemory.translated engine
Adam Tauber 8 år sedan
förälder
incheckning
596c6b6c93
4 ändrade filer med 167 tillägg och 0 borttagningar
  1. 70
    0
      searx/engines/dictzone.py
  2. 69
    0
      searx/engines/translated.py
  3. 13
    0
      searx/settings.yml
  4. 15
    0
      searx/utils.py

+ 70
- 0
searx/engines/dictzone.py Visa fil

@@ -0,0 +1,70 @@
1
+"""
2
+ Dictzone
3
+
4
+ @website     https://dictzone.com/
5
+ @provide-api no
6
+ @using-api   no
7
+ @results     HTML (using search portal)
8
+ @stable      no (HTML can change)
9
+ @parse       url, title, content
10
+"""
11
+
12
+import re
13
+from urlparse import urljoin
14
+from lxml import html
15
+from cgi import escape
16
+from searx.engines.xpath import extract_text
17
+from searx.utils import is_valid_lang
18
+
19
+categories = ['general']
20
+url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
21
+weight = 100
22
+
23
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
24
+results_xpath = './/table[@id="r"]/tr'
25
+
26
+
27
+def request(query, params):
28
+    m = parser_re.match(unicode(query, 'utf8'))
29
+    if not m:
30
+        return params
31
+
32
+    from_lang, to_lang, query = m.groups()
33
+
34
+    from_lang = is_valid_lang(from_lang)
35
+    to_lang = is_valid_lang(to_lang)
36
+
37
+    if not from_lang or not to_lang:
38
+        return params
39
+
40
+    params['url'] = url.format(from_lang=from_lang[2],
41
+                               to_lang=to_lang[2],
42
+                               query=query)
43
+
44
+    return params
45
+
46
+
47
+def response(resp):
48
+    results = []
49
+
50
+    dom = html.fromstring(resp.text)
51
+
52
+    for k, result in enumerate(dom.xpath(results_xpath)[1:]):
53
+        try:
54
+            from_result, to_results_raw = result.xpath('./td')
55
+        except:
56
+            continue
57
+
58
+        to_results = []
59
+        for to_result in to_results_raw.xpath('./p/a'):
60
+            t = to_result.text_content()
61
+            if t.strip():
62
+                to_results.append(to_result.text_content())
63
+
64
+        results.append({
65
+            'url': urljoin(resp.url, '?%d' % k),
66
+            'title': escape(from_result.text_content()),
67
+            'content': escape('; '.join(to_results))
68
+        })
69
+
70
+    return results

+ 69
- 0
searx/engines/translated.py Visa fil

@@ -0,0 +1,69 @@
1
+"""
2
+ MyMemory Translated
3
+
4
+ @website     https://mymemory.translated.net/
5
+ @provide-api yes (https://mymemory.translated.net/doc/spec.php)
6
+ @using-api   yes
7
+ @results     JSON
8
+ @stable      yes
9
+ @parse       url, title, content
10
+"""
11
+import re
12
+from urlparse import urljoin
13
+from lxml import html
14
+from cgi import escape
15
+from searx.engines.xpath import extract_text
16
+from searx.utils import is_valid_lang
17
+
18
+categories = ['general']
19
+url = 'http://api.mymemory.translated.net/get?q={query}' \
20
+      '&langpair={from_lang}|{to_lang}{key}'
21
+web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
22
+weight = 100
23
+
24
+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
25
+api_key = ''
26
+
27
+
28
+def request(query, params):
29
+    m = parser_re.match(unicode(query, 'utf8'))
30
+    if not m:
31
+        return params
32
+
33
+    from_lang, to_lang, query = m.groups()
34
+
35
+    from_lang = is_valid_lang(from_lang)
36
+    to_lang = is_valid_lang(to_lang)
37
+
38
+    if not from_lang or not to_lang:
39
+        return params
40
+
41
+    if api_key:
42
+        key_form = '&key=' + api_key
43
+    else:
44
+        key_form = ''
45
+    params['url'] = url.format(from_lang=from_lang[1],
46
+                               to_lang=to_lang[1],
47
+                               query=query,
48
+                               key=key_form)
49
+    params['query'] = query
50
+    params['from_lang'] = from_lang
51
+    params['to_lang'] = to_lang
52
+
53
+    return params
54
+
55
+
56
+def response(resp):
57
+    results = []
58
+    results.append({
59
+        'url': escape(web_url.format(
60
+            from_lang=resp.search_params['from_lang'][2],
61
+            to_lang=resp.search_params['to_lang'][2],
62
+            query=resp.search_params['query'])),
63
+        'title': escape('[{0}-{1}] {2}'.format(
64
+            resp.search_params['from_lang'][1],
65
+            resp.search_params['to_lang'][1],
66
+            resp.search_params['query'])),
67
+        'content': escape(resp.json()['responseData']['translatedText'])
68
+    })
69
+    return results

+ 13
- 0
searx/settings.yml Visa fil

@@ -495,6 +495,19 @@ engines:
495 495
     timeout: 6.0
496 496
     categories : science
497 497
 
498
+  - name : dictzone
499
+    engine : dictzone
500
+    shortcut : dc
501
+
502
+  - name : mymemory translated
503
+    engine : translated
504
+    shortcut : tl
505
+    timeout : 5.0
506
+    disabled : True
507
+    # You can use without an API key, but you are limited to 1000 words/day
508
+    # See : http://mymemory.translated.net/doc/usagelimits.php
509
+    # api_key : ''
510
+
498 511
 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
499 512
 #  - name : blekko images
500 513
 #    engine : blekko_images

+ 15
- 0
searx/utils.py Visa fil

@@ -9,6 +9,7 @@ from HTMLParser import HTMLParser
9 9
 from random import choice
10 10
 
11 11
 from searx.version import VERSION_STRING
12
+from searx.languages import language_codes
12 13
 from searx import settings
13 14
 from searx import logger
14 15
 
@@ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier):
255 256
         filesize = None
256 257
 
257 258
     return filesize
259
+
260
+
261
+def is_valid_lang(lang):
262
+    is_abbr = (len(lang) == 2)
263
+    if is_abbr:
264
+        for l in language_codes:
265
+            if l[0][:2] == lang.lower():
266
+                return (True, l[0][:2], l[1].lower())
267
+        return False
268
+    else:
269
+        for l in language_codes:
270
+            if l[1].lower() == lang.lower():
271
+                return (True, l[0][:2], l[1].lower())
272
+        return False