浏览代码

[enh] add infoboxes and answers (clean up)

Dalf 10 年前
父节点
当前提交
0a71525ab6
共有 4 个文件被更改,包括 74 次插入20 次删除
  1. 26
    19
      searx/engines/wikidata.py
  2. 0
    1
      searx/search.py
  3. 4
    0
      searx/settings.yml
  4. 44
    0
      searx/templates/default/infobox.html

+ 26
- 19
searx/engines/wikidata.py 查看文件

@@ -1,13 +1,12 @@
1 1
 import json
2
-from datetime import datetime
3 2
 from requests import get
4 3
 from urllib import urlencode
4
+from datetime import datetime
5 5
 
6 6
 resultCount=2
7
-urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectionsnippet&{query}'
7
+urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
8 8
 urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}'
9
-# find the right URL for urlMap
10
-urlMap = 'http://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
9
+urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
11 10
 
12 11
 def request(query, params):
13 12
     params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount}))
@@ -18,24 +17,27 @@ def request(query, params):
18 17
 def response(resp):
19 18
     results = []
20 19
     search_res = json.loads(resp.text)
21
-    # TODO parallel http queries
22
-    before = datetime.now()
20
+
21
+    wikidata_ids = set()
23 22
     for r in search_res.get('query', {}).get('search', {}):
24
-        wikidata_id = r.get('title', '')
25
-        results = results + getDetail(wikidata_id)
26
-    after = datetime.now()
27
-    print str(after - before) + " second(s)"
23
+        wikidata_ids.add(r.get('title', ''))
28 24
 
29
-    return results
25
+    language = resp.search_params['language'].split('_')[0]
26
+    if language == 'all':
27
+        language = 'en'
28
+    url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'}))
29
+
30
+    before = datetime.now()
31
+    htmlresponse = get(url)
32
+    print datetime.now() - before
33
+    jsonresponse = json.loads(htmlresponse.content)
34
+    for wikidata_id in wikidata_ids:
35
+        results = results + getDetail(jsonresponse, wikidata_id, language)
30 36
 
31
-def getDetail(wikidata_id):
32
-    language = 'fr'
37
+    return results
33 38
 
34
-    url = urlDetail.format(query=urlencode({'ids': wikidata_id, 'languages': language + '|en'}))
35
-    print url
36
-    response = get(url)
37
-    result = json.loads(response.content)
38
-    result = result.get('entities', {}).get(wikidata_id, {})
39
+def getDetail(jsonresponse, wikidata_id, language):
40
+    result = jsonresponse.get('entities', {}).get(wikidata_id, {})
39 41
     
40 42
     title = result.get('labels', {}).get(language, {}).get('value', None)
41 43
     if title == None:
@@ -50,7 +52,6 @@ def getDetail(wikidata_id):
50 52
 
51 53
     claims = result.get('claims', {})
52 54
     official_website = get_string(claims, 'P856', None)
53
-    print official_website
54 55
     if official_website != None:
55 56
         urls.append({ 'title' : 'Official site', 'url': official_website })
56 57
         results.append({ 'title': title, 'url' : official_website })
@@ -98,10 +99,12 @@ def getDetail(wikidata_id):
98 99
 
99 100
     return results
100 101
 
102
+
101 103
 def add_url(urls, title, url):
102 104
     if url != None:
103 105
         urls.append({'title' : title, 'url' : url})
104 106
 
107
+
105 108
 def get_mainsnak(claims, propertyName):
106 109
     propValue = claims.get(propertyName, {})
107 110
     if len(propValue) == 0:
@@ -110,6 +113,7 @@ def get_mainsnak(claims, propertyName):
110 113
     propValue = propValue[0].get('mainsnak', None)
111 114
     return propValue
112 115
 
116
+
113 117
 def get_string(claims, propertyName, defaultValue=None):
114 118
     propValue = claims.get(propertyName, {})
115 119
     if len(propValue) == 0:
@@ -129,6 +133,7 @@ def get_string(claims, propertyName, defaultValue=None):
129 133
     else:
130 134
         return ', '.join(result)
131 135
 
136
+
132 137
 def get_time(claims, propertyName, defaultValue=None):
133 138
     propValue = claims.get(propertyName, {})
134 139
     if len(propValue) == 0:
@@ -149,6 +154,7 @@ def get_time(claims, propertyName, defaultValue=None):
149 154
     else:
150 155
         return ', '.join(result)
151 156
 
157
+
152 158
 def get_geolink(claims, propertyName, defaultValue=''):
153 159
     mainsnak = get_mainsnak(claims, propertyName)
154 160
 
@@ -182,6 +188,7 @@ def get_geolink(claims, propertyName, defaultValue=''):
182 188
 
183 189
     return url
184 190
 
191
+
185 192
 def get_wikilink(result, wikiid):
186 193
     url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
187 194
     if url == None:

+ 0
- 1
searx/search.py 查看文件

@@ -76,7 +76,6 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
76 76
             # if it is an infobox, add it to list of infoboxes
77 77
             if 'infobox' in result:
78 78
                 infoboxes.append(result)
79
-                print result
80 79
                 continue
81 80
 
82 81
             # append result

+ 4
- 0
searx/settings.yml 查看文件

@@ -44,6 +44,10 @@ engines:
44 44
     engine : duckduckgo_definitions
45 45
     shortcut : ddd
46 46
 
47
+  - name : wikidata
48
+    engine : wikidata
49
+    shortcut : wd
50
+
47 51
   - name : duckduckgo
48 52
     engine : duckduckgo
49 53
     shortcut : ddg

+ 44
- 0
searx/templates/default/infobox.html 查看文件

@@ -0,0 +1,44 @@
1
+<div class="infobox">
2
+  <h2>{{ infobox.infobox }}</h2>
3
+  {% if infobox.img_src %}<img src="{{ infobox.img_src }}" />{% endif %}
4
+  <p>{{ infobox.entity }}</p>
5
+  <p>{{ infobox.content }}</p>
6
+  {% if infobox.attributes %}
7
+  <div class="attributes">
8
+    <table>
9
+      {% for attribute in infobox.attributes %}
10
+      <tr><td>{{ attribute.label }}</td><td>{{ attribute.value }}</td></tr>
11
+      {% endfor %}
12
+    </table>
13
+  </div>
14
+  {% endif %}
15
+
16
+  {% if infobox.urls %}
17
+  <div class="urls">
18
+    <ul>
19
+      {% for url in infobox.urls %}
20
+      <li class="url"><a href="{{ url.url }}">{{ url.title }}</a></li>
21
+      {% endfor %}
22
+    </ul>
23
+  </div>
24
+  {% endif %}
25
+
26
+  {% if infobox.relatedTopics %}
27
+  <div class="relatedTopics">
28
+      {% for topic in infobox.relatedTopics %}
29
+      <div>
30
+	<h3>{{ topic.name }}</h3>
31
+	{% for suggestion in topic.suggestions %}
32
+	<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
33
+            <input type="hidden" name="q" value="{{ suggestion }}">
34
+            <input type="submit" value="{{ suggestion }}" />
35
+        </form>
36
+	{% endfor %}
37
+      </div>
38
+      {% endfor %}
39
+  </div>
40
+  {% endif %}
41
+
42
+  <br />
43
+  
44
+</div>