add 1x.com engine, improve yacy-engine

10 年之前 · 03137eebd9
--- a/searx/engines/www1x.py
+++ b/searx/engines/www1x.py
@@ -0,0 +1,82 @@
 
				+## 1x (Images)
			
 
				+#
			
 
				+# @website     http://1x.com/
			
 
				+# @provide-api no
			
 
				+#
			
 
				+# @using-api   no
			
 
				+# @results     HTML
			
 
				+# @stable      no (HTML can change)
			
 
				+# @parse       url, title, thumbnail, img_src, content
			
 
				+
			
 
				+
			
 
				+from urllib import urlencode
			
 
				+from urlparse import urljoin
			
 
				+from lxml import html
			
 
				+import string
			
 
				+import re
			
 
				+
			
 
				+# engine dependent config
			
 
				+categories = ['images']
			
 
				+paging = False
			
 
				+
			
 
				+# search-url
			
 
				+base_url = 'http://1x.com'
			
 
				+search_url = base_url+'/backend/search.php?{query}'
			
 
				+
			
 
				+
			
 
				+# do search-request
			
 
				+def request(query, params):
			
 
				+    params['url'] = search_url.format(query=urlencode({'q': query}))
			
 
				+
			
 
				+    return params
			
 
				+
			
 
				+
			
 
				+# get response from search-request
			
 
				+def response(resp):
			
 
				+    results = []
			
 
				+
			
 
				+    # get links from result-text
			
 
				+    regex = re.compile('(</a>|<a)')
			
 
				+    results_parts = re.split(regex, resp.text)
			
 
				+
			
 
				+    cur_element = ''
			
 
				+
			
 
				+    # iterate over link parts
			
 
				+    for result_part in results_parts:
			
 
				+        # processed start and end of link
			
 
				+        if result_part == '<a':
			
 
				+            cur_element = result_part
			
 
				+            continue
			
 
				+        elif result_part != '</a>':
			
 
				+            cur_element += result_part
			
 
				+            continue
			
 
				+
			
 
				+        cur_element += result_part
			
 
				+
			
 
				+        # fix xml-error
			
 
				+        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
			
 
				+
			
 
				+        dom = html.fromstring(cur_element)
			
 
				+        link = dom.xpath('//a')[0]
			
 
				+
			
 
				+        url = urljoin(base_url, link.attrib.get('href'))
			
 
				+        title = link.attrib.get('title', '')
			
 
				+
			
 
				+        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
			
 
				+        # TODO: get image with higher resolution
			
 
				+        img_src = thumbnail_src
			
 
				+
			
 
				+        # check if url is showing to a photo
			
 
				+        if '/photo/' not in url:
			
 
				+            continue
			
 
				+
			
 
				+        # append result
			
 
				+        results.append({'url': url,
			
 
				+                        'title': title,
			
 
				+                        'img_src': img_src,
			
 
				+                        'content': '',
			
 
				+                        'thumbnail_src': thumbnail_src,
			
 
				+                        'template': 'images.html'})
			
 
				+
			
 
				+    # return results
			
 
				+    return results
			
--- a/searx/engines/yacy.py
+++ b/searx/engines/yacy.py
@@ -68,9 +68,18 @@ def response(resp):
 
				 
			
 
				     search_results = raw_search_results.get('channels', {})[0].get('items', [])
			
 
				 
			
 
				-    if resp.search_params['category'] == 'general':
			
 
				+    for result in search_results:
			
 
				+        # parse image results
			
 
				+        if result.get('image'):
			
 
				+            # append result
			
 
				+            results.append({'url': result['url'],
			
 
				+                            'title': result['title'],
			
 
				+                            'content': '',
			
 
				+                            'img_src': result['image'],
			
 
				+                            'template': 'images.html'})
			
 
				+
			
 
				         # parse general results
			
 
				-        for result in search_results:
			
 
				+        else:
			
 
				             publishedDate = parser.parse(result['pubDate'])
			
 
				 
			
 
				             # append result
			
@@ -79,17 +88,7 @@ def response(resp):
 
				                             'content': result['description'],
			
 
				                             'publishedDate': publishedDate})
			
 
				 
			
 
				-    elif resp.search_params['category'] == 'images':
			
 
				-        # parse image results
			
 
				-        for result in search_results:
			
 
				-            # append result
			
 
				-            results.append({'url': result['url'],
			
 
				-                            'title': result['title'],
			
 
				-                            'content': '',
			
 
				-                            'img_src': result['image'],
			
 
				-                            'template': 'images.html'})
			
 
				-
			
 
				-    #TODO parse video, audio and file results
			
 
				+        #TODO parse video, audio and file results
			
 
				 
			
 
				     # return results
			
 
				     return results
			
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -83,6 +83,11 @@ engines:
 
				     engine : www500px
			
 
				     shortcut : px
			
 
				 
			
 
				+  - name : 1x
			
 
				+    engine : www1x
			
 
				+    shortcut : 1x
			
 
				+    disabled : True
			
 
				+
			
 
				   - name : flickr
			
 
				     categories : images
			
 
				     shortcut : fl
			
--- a/searx/tests/engines/test_www1x.py
+++ b/searx/tests/engines/test_www1x.py
@@ -0,0 +1,57 @@
 
				+from collections import defaultdict
			
 
				+import mock
			
 
				+from searx.engines import www1x
			
 
				+from searx.testing import SearxTestCase
			
 
				+
			
 
				+
			
 
				+class TestWww1xEngine(SearxTestCase):
			
 
				+
			
 
				+    def test_request(self):
			
 
				+        query = 'test_query'
			
 
				+        params = www1x.request(query, defaultdict(dict))
			
 
				+        self.assertTrue('url' in params)
			
 
				+        self.assertTrue(query in params['url'])
			
 
				+        self.assertTrue('1x.com' in params['url'])
			
 
				+
			
 
				+    def test_response(self):
			
 
				+        self.assertRaises(AttributeError, www1x.response, None)
			
 
				+        self.assertRaises(AttributeError, www1x.response, [])
			
 
				+        self.assertRaises(AttributeError, www1x.response, '')
			
 
				+        self.assertRaises(AttributeError, www1x.response, '[]')
			
 
				+
			
 
				+        response = mock.Mock(text='<html></html>')
			
 
				+        self.assertEqual(www1x.response(response), [])
			
 
				+        html = """
			
 
				+        <?xml version="1.0" encoding="UTF-8"?><!DOCTYPE characters
			
 
				+        [
			
 
				+        <!ELEMENT characters (character*) >
			
 
				+        <!ELEMENT character  (#PCDATA   ) >
			
 
				+
			
 
				+        <!ENTITY iexcl   "&#161;" >
			
 
				+        <!ENTITY cent    "&#162;" >
			
 
				+        <!ENTITY pound   "&#163;" >
			
 
				+        ]
			
 
				+        ><root><searchresult><![CDATA[<table border="0" cellpadding="0" cellspacing="0" width="100%">
			
 
				+        <tr>
			
 
				+            <td style="min-width: 220px;" valign="top">
			
 
				+                <div style="font-size: 30px; margin: 0px 0px 20px 0px;">Photos</div>
			
 
				+                <div>
			
 
				+                    <a href="/photo/123456" class="dynamiclink">
			
 
				+<img border="0" class="searchresult" src="/images/user/testimage-123456.jpg" style="width: 125px; height: 120px;">
			
 
				+                    </a>
			
 
				+                    <a title="sjoerd lammers street photography" href="/member/sjoerdlammers" class="dynamiclink">
			
 
				+<img border="0" class="searchresult" src="/images/profile/60c48b394c677d2fa4d9e7d263aabf44-square.jpg">
			
 
				+                    </a>
			
 
				+                </div>
			
 
				+            </td>
			
 
				+        </table>
			
 
				+        ]]></searchresult></root>
			
 
				+        """
			
 
				+        response = mock.Mock(text=html)
			
 
				+        results = www1x.response(response)
			
 
				+        self.assertEqual(type(results), list)
			
 
				+        self.assertEqual(len(results), 1)
			
 
				+        self.assertEqual(results[0]['url'], 'http://1x.com/photo/123456')
			
 
				+        self.assertEqual(results[0]['thumbnail_src'], 'http://1x.com/images/user/testimage-123456.jpg')
			
 
				+        self.assertEqual(results[0]['content'], '')
			
 
				+        self.assertEqual(results[0]['template'], 'images.html')
			
--- a/searx/tests/test_engines.py
+++ b/searx/tests/test_engines.py
@@ -1,2 +1,3 @@
 
				 from searx.tests.engines.test_dummy import *  # noqa
			
 
				 from searx.tests.engines.test_github import *  # noqa
			
 
				+from searx.tests.engines.test_www1x import *  # noqa