[engine] dictzone + mymemory.translated engine

9 anos atrás · 596c6b6c93
--- a/searx/engines/dictzone.py
+++ b/searx/engines/dictzone.py
@@ -0,0 +1,70 @@
 
				+"""
			
 
				+ Dictzone
			
 
				+
			
 
				+ @website     https://dictzone.com/
			
 
				+ @provide-api no
			
 
				+ @using-api   no
			
 
				+ @results     HTML (using search portal)
			
 
				+ @stable      no (HTML can change)
			
 
				+ @parse       url, title, content
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+from urlparse import urljoin
			
 
				+from lxml import html
			
 
				+from cgi import escape
			
 
				+from searx.engines.xpath import extract_text
			
 
				+from searx.utils import is_valid_lang
			
 
				+
			
 
				+categories = ['general']
			
 
				+url = 'http://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}'
			
 
				+weight = 100
			
 
				+
			
 
				+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I)
			
 
				+results_xpath = './/table[@id="r"]/tr'
			
 
				+
			
 
				+
			
 
				+def request(query, params):
			
 
				+    m = parser_re.match(unicode(query, 'utf8'))
			
 
				+    if not m:
			
 
				+        return params
			
 
				+
			
 
				+    from_lang, to_lang, query = m.groups()
			
 
				+
			
 
				+    from_lang = is_valid_lang(from_lang)
			
 
				+    to_lang = is_valid_lang(to_lang)
			
 
				+
			
 
				+    if not from_lang or not to_lang:
			
 
				+        return params
			
 
				+
			
 
				+    params['url'] = url.format(from_lang=from_lang[2],
			
 
				+                               to_lang=to_lang[2],
			
 
				+                               query=query)
			
 
				+
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+def response(resp):
			
 
				+    results = []
			
 
				+
			
 
				+    dom = html.fromstring(resp.text)
			
 
				+
			
 
				+    for k, result in enumerate(dom.xpath(results_xpath)[1:]):
			
 
				+        try:
			
 
				+            from_result, to_results_raw = result.xpath('./td')
			
 
				+        except:
			
 
				+            continue
			
 
				+
			
 
				+        to_results = []
			
 
				+        for to_result in to_results_raw.xpath('./p/a'):
			
 
				+            t = to_result.text_content()
			
 
				+            if t.strip():
			
 
				+                to_results.append(to_result.text_content())
			
 
				+
			
 
				+        results.append({
			
 
				+            'url': urljoin(resp.url, '?%d' % k),
			
 
				+            'title': escape(from_result.text_content()),
			
 
				+            'content': escape('; '.join(to_results))
			
 
				+        })
			
 
				+
			
 
				+    return results
			
--- a/searx/engines/translated.py
+++ b/searx/engines/translated.py
@@ -0,0 +1,69 @@
 
				+"""
			
 
				+ MyMemory Translated
			
 
				+
			
 
				+ @website     https://mymemory.translated.net/
			
 
				+ @provide-api yes (https://mymemory.translated.net/doc/spec.php)
			
 
				+ @using-api   yes
			
 
				+ @results     JSON
			
 
				+ @stable      yes
			
 
				+ @parse       url, title, content
			
 
				+"""
			
 
				+import re
			
 
				+from urlparse import urljoin
			
 
				+from lxml import html
			
 
				+from cgi import escape
			
 
				+from searx.engines.xpath import extract_text
			
 
				+from searx.utils import is_valid_lang
			
 
				+
			
 
				+categories = ['general']
			
 
				+url = 'http://api.mymemory.translated.net/get?q={query}' \
			
 
				+      '&langpair={from_lang}|{to_lang}{key}'
			
 
				+web_url = 'http://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}'
			
 
				+weight = 100
			
 
				+
			
 
				+parser_re = re.compile(u'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I)
			
 
				+api_key = ''
			
 
				+
			
 
				+
			
 
				+def request(query, params):
			
 
				+    m = parser_re.match(unicode(query, 'utf8'))
			
 
				+    if not m:
			
 
				+        return params
			
 
				+
			
 
				+    from_lang, to_lang, query = m.groups()
			
 
				+
			
 
				+    from_lang = is_valid_lang(from_lang)
			
 
				+    to_lang = is_valid_lang(to_lang)
			
 
				+
			
 
				+    if not from_lang or not to_lang:
			
 
				+        return params
			
 
				+
			
 
				+    if api_key:
			
 
				+        key_form = '&key=' + api_key
			
 
				+    else:
			
 
				+        key_form = ''
			
 
				+    params['url'] = url.format(from_lang=from_lang[1],
			
 
				+                               to_lang=to_lang[1],
			
 
				+                               query=query,
			
 
				+                               key=key_form)
			
 
				+    params['query'] = query
			
 
				+    params['from_lang'] = from_lang
			
 
				+    params['to_lang'] = to_lang
			
 
				+
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+def response(resp):
			
 
				+    results = []
			
 
				+    results.append({
			
 
				+        'url': escape(web_url.format(
			
 
				+            from_lang=resp.search_params['from_lang'][2],
			
 
				+            to_lang=resp.search_params['to_lang'][2],
			
 
				+            query=resp.search_params['query'])),
			
 
				+        'title': escape('[{0}-{1}] {2}'.format(
			
 
				+            resp.search_params['from_lang'][1],
			
 
				+            resp.search_params['to_lang'][1],
			
 
				+            resp.search_params['query'])),
			
 
				+        'content': escape(resp.json()['responseData']['translatedText'])
			
 
				+    })
			
 
				+    return results
			
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -495,6 +495,19 @@ engines:
 
				     timeout: 6.0
			
 
				     categories : science
			
 
				 
			
 
				+  - name : dictzone
			
 
				+    engine : dictzone
			
 
				+    shortcut : dc
			
 
				+
			
 
				+  - name : mymemory translated
			
 
				+    engine : translated
			
 
				+    shortcut : tl
			
 
				+    timeout : 5.0
			
 
				+    disabled : True
			
 
				+    # You can use without an API key, but you are limited to 1000 words/day
			
 
				+    # See : http://mymemory.translated.net/doc/usagelimits.php
			
 
				+    # api_key : ''
			
 
				+
			
 
				 #The blekko technology and team have joined IBM Watson! -> https://blekko.com/
			
 
				 #  - name : blekko images
			
 
				 #    engine : blekko_images
			
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -9,6 +9,7 @@ from HTMLParser import HTMLParser
 
				 from random import choice
			
 
				 
			
 
				 from searx.version import VERSION_STRING
			
 
				+from searx.languages import language_codes
			
 
				 from searx import settings
			
 
				 from searx import logger
			
 
				 
			
@@ -255,3 +256,17 @@ def get_torrent_size(filesize, filesize_multiplier):
 
				         filesize = None
			
 
				 
			
 
				     return filesize
			
 
				+
			
 
				+
			
 
				+def is_valid_lang(lang):
			
 
				+    is_abbr = (len(lang) == 2)
			
 
				+    if is_abbr:
			
 
				+        for l in language_codes:
			
 
				+            if l[0][:2] == lang.lower():
			
 
				+                return (True, l[0][:2], l[1].lower())
			
 
				+        return False
			
 
				+    else:
			
 
				+        for l in language_codes:
			
 
				+            if l[1].lower() == lang.lower():
			
 
				+                return (True, l[0][:2], l[1].lower())
			
 
				+        return False