Browse Source

[enh] add 1x.com engine

* Deacivated by default, because of the big amount of results
Thomas Pointhuber 10 years ago
parent
commit
6042f2bc53
2 changed files with 86 additions and 0 deletions
  1. 81
    0
      searx/engines/www1x.py
  2. 5
    0
      searx/settings.yml

+ 81
- 0
searx/engines/www1x.py View File

@@ -0,0 +1,81 @@
1
+## 1x (Images)
2
+#
3
+# @website     http://1x.com/
4
+# @provide-api no
5
+#
6
+# @using-api   no
7
+# @results     HTML
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, thumbnail, img_src, content
10
+
11
+
12
+from urllib import urlencode
13
+from urlparse import urljoin
14
+from lxml import html
15
+import string
16
+import re
17
+
18
+# engine dependent config
19
+categories = ['images']
20
+paging = False
21
+
22
+# search-url
23
+base_url = 'http://1x.com'
24
+search_url = base_url+'/backend/search.php?{query}'
25
+
26
+
27
+# do search-request
28
+def request(query, params):
29
+    params['url'] = search_url.format(query=urlencode({'q': query}))
30
+
31
+    return params
32
+
33
+
34
+# get response from search-request
35
+def response(resp):
36
+    results = []
37
+
38
+    # get links from result-text
39
+    results_parts = re.split(r'(</a>|<a)', resp.text)
40
+
41
+    cur_element = ''
42
+
43
+    # iterate over link parts
44
+    for result_part in results_parts:
45
+        # processed start and end of link
46
+        if result_part == '<a':
47
+            cur_element = result_part
48
+            continue
49
+        elif result_part != '</a>':
50
+            cur_element += result_part
51
+            continue
52
+
53
+        cur_element += result_part
54
+
55
+        # fix xml-error
56
+        cur_element = string.replace(cur_element, '"></a>', '"/></a>')
57
+
58
+        dom = html.fromstring(cur_element)
59
+        link = dom.xpath('//a')[0]
60
+
61
+        url = urljoin(base_url, link.attrib.get('href'))
62
+        title = link.attrib.get('title', '')
63
+
64
+        thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
65
+        # TODO: get image with higher resolution
66
+        img_src = thumbnail_src
67
+
68
+        # check if url is showing to a photo
69
+        if '/photo/' not in url:
70
+            continue
71
+
72
+        # append result
73
+        results.append({'url': url,
74
+                        'title': title,
75
+                        'img_src': img_src,
76
+                        'content': '',
77
+                        'thumbnail_src': thumbnail_src,
78
+                        'template': 'images.html'})
79
+
80
+    # return results
81
+    return results

+ 5
- 0
searx/settings.yml View File

@@ -83,6 +83,11 @@ engines:
83 83
     engine : www500px
84 84
     shortcut : px
85 85
 
86
+  - name : 1x
87
+    engine : www1x
88
+    shortcut : 1x
89
+    disabled : True
90
+
86 91
   - name : flickr
87 92
     categories : images
88 93
     shortcut : fl