Browse Source

Merge pull request #104 from dalf/master

[enh] add infoboxes and answers, [fix] when two results are merged, really use the content with more text
Adam Tauber 10 years ago
parent
commit
67b69619ba

+ 5
- 6
searx/engines/currency_convert.py View File

38
     except:
38
     except:
39
         return results
39
         return results
40
 
40
 
41
-    title = '{0} {1} in {2} is {3}'.format(
41
+    answer = '{0} {1} = {2} {3} (1 {1} = {4} {3})'.format(
42
         resp.search_params['ammount'],
42
         resp.search_params['ammount'],
43
         resp.search_params['from'],
43
         resp.search_params['from'],
44
+        resp.search_params['ammount'] * conversion_rate,
44
         resp.search_params['to'],
45
         resp.search_params['to'],
45
-        resp.search_params['ammount'] * conversion_rate
46
+        conversion_rate
46
     )
47
     )
47
 
48
 
48
-    content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
49
-                                        conversion_rate,
50
-                                        resp.search_params['to'])
51
     now_date = datetime.now().strftime('%Y%m%d')
49
     now_date = datetime.now().strftime('%Y%m%d')
52
     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'  # noqa
50
     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'  # noqa
53
     url = url.format(
51
     url = url.format(
56
         resp.search_params['from'].lower(),
54
         resp.search_params['from'].lower(),
57
         resp.search_params['to'].lower()
55
         resp.search_params['to'].lower()
58
     )
56
     )
59
-    results.append({'title': title, 'content': content, 'url': url})
57
+
58
+    results.append({'answer' : answer, 'url': url})
60
 
59
 
61
     return results
60
     return results

+ 114
- 7
searx/engines/duckduckgo_definitions.py View File

1
 import json
1
 import json
2
 from urllib import urlencode
2
 from urllib import urlencode
3
+from lxml import html
4
+from searx.engines.xpath import extract_text
3
 
5
 
4
-url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
6
+url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1'
5
 
7
 
8
+def result_to_text(url, text, htmlResult):
9
+    # TODO : remove result ending with "Meaning" or "Category"
10
+    dom = html.fromstring(htmlResult)
11
+    a = dom.xpath('//a')
12
+    if len(a)>=1:
13
+        return extract_text(a[0])
14
+    else:
15
+        return text
16
+
17
+def html_to_text(htmlFragment):
18
+    dom = html.fromstring(htmlFragment)
19
+    return extract_text(dom)
6
 
20
 
7
 def request(query, params):
21
 def request(query, params):
22
+    # TODO add kl={locale}
8
     params['url'] = url.format(query=urlencode({'q': query}))
23
     params['url'] = url.format(query=urlencode({'q': query}))
9
     return params
24
     return params
10
 
25
 
12
 def response(resp):
27
 def response(resp):
13
     search_res = json.loads(resp.text)
28
     search_res = json.loads(resp.text)
14
     results = []
29
     results = []
30
+
31
+    content = ''
32
+    heading = search_res.get('Heading', '')
33
+    attributes = []
34
+    urls = []
35
+    infobox_id = None
36
+    relatedTopics = []
37
+
38
+    # add answer if there is one
39
+    answer = search_res.get('Answer', '')
40
+    if answer != '':
41
+        results.append({ 'answer' : html_to_text(answer) })
42
+
43
+    # add infobox
15
     if 'Definition' in search_res:
44
     if 'Definition' in search_res:
16
-        if search_res.get('AbstractURL'):
17
-            res = {'title': search_res.get('Heading', ''),
18
-                   'content': search_res.get('Definition', ''),
19
-                   'url': search_res.get('AbstractURL', ''),
20
-                   'class': 'definition_result'}
21
-            results.append(res)
45
+        content = content + search_res.get('Definition', '') 
46
+
47
+    if 'Abstract' in search_res:
48
+        content = content + search_res.get('Abstract', '')
49
+
50
+
51
+    # image
52
+    image = search_res.get('Image', '')
53
+    image = None if image == '' else image
54
+
55
+    # attributes
56
+    if 'Infobox' in search_res:
57
+        infobox = search_res.get('Infobox', None)
58
+        if  'content' in infobox:
59
+            for info in infobox.get('content'):
60
+                attributes.append({'label': info.get('label'), 'value': info.get('value')})
61
+
62
+    # urls
63
+    for ddg_result in search_res.get('Results', []):
64
+        if 'FirstURL' in ddg_result:
65
+            firstURL = ddg_result.get('FirstURL', '')
66
+            text = ddg_result.get('Text', '')
67
+            urls.append({'title':text, 'url':firstURL})
68
+            results.append({'title':heading, 'url': firstURL})
69
+
70
+    # related topics
71
+    for ddg_result in search_res.get('RelatedTopics', None):
72
+        if 'FirstURL' in ddg_result:
73
+            suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None))
74
+            if suggestion != heading:
75
+                results.append({'suggestion': suggestion})
76
+        elif 'Topics' in ddg_result:
77
+            suggestions = []
78
+            relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions })
79
+            for topic_result in ddg_result.get('Topics', []):
80
+                suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None))
81
+                if suggestion != heading:
82
+                    suggestions.append(suggestion)
83
+
84
+    # abstract
85
+    abstractURL = search_res.get('AbstractURL', '')
86
+    if abstractURL != '':
87
+        # add as result ? problem always in english
88
+        infobox_id = abstractURL
89
+        urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL})
90
+
91
+    # definition
92
+    definitionURL = search_res.get('DefinitionURL', '')
93
+    if definitionURL != '':
94
+        # add as result ? as answer ? problem always in english
95
+        infobox_id = definitionURL
96
+        urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL})
97
+
98
+    # entity
99
+    entity = search_res.get('Entity', None)
100
+    # TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations
101
+    # TODO musician : link to music search
102
+    # TODO concert tour : ??
103
+    # TODO film / actor / television  / media franchise : links to IMDB / rottentomatoes (or scrap result)
104
+    # TODO music : link tu musicbrainz / last.fm
105
+    # TODO book : ??
106
+    # TODO artist / playwright : ??
107
+    # TODO compagny : ??
108
+    # TODO software / os : ??
109
+    # TODO software engineer : ??
110
+    # TODO prepared food : ??
111
+    # TODO website : ??
112
+    # TODO performing art : ??
113
+    # TODO prepared food : ??
114
+    # TODO programming language : ??
115
+    # TODO file format : ??
116
+
117
+    if len(heading)>0:
118
+        # TODO get infobox.meta.value where .label='article_title'
119
+        results.append({
120
+               'infobox': heading,
121
+               'id': infobox_id,
122
+               'entity': entity,
123
+               'content': content,
124
+               'img_src' : image,
125
+               'attributes': attributes,
126
+               'urls': urls,
127
+               'relatedTopics': relatedTopics
128
+               })
22
 
129
 
23
     return results
130
     return results

+ 221
- 0
searx/engines/wikidata.py View File

1
+import json
2
+from requests import get
3
+from urllib import urlencode
4
+from datetime import datetime
5
+
6
+resultCount=2
7
+urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
8
+urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}'
9
+urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
10
+
11
+def request(query, params):
12
+    params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount}))
13
+    print params['url']
14
+    return params
15
+
16
+
17
+def response(resp):
18
+    results = []
19
+    search_res = json.loads(resp.text)
20
+
21
+    wikidata_ids = set()
22
+    for r in search_res.get('query', {}).get('search', {}):
23
+        wikidata_ids.add(r.get('title', ''))
24
+
25
+    language = resp.search_params['language'].split('_')[0]
26
+    if language == 'all':
27
+        language = 'en'
28
+    url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'}))
29
+
30
+    before = datetime.now()
31
+    htmlresponse = get(url)
32
+    print datetime.now() - before
33
+    jsonresponse = json.loads(htmlresponse.content)
34
+    for wikidata_id in wikidata_ids:
35
+        results = results + getDetail(jsonresponse, wikidata_id, language)
36
+
37
+    return results
38
+
39
+def getDetail(jsonresponse, wikidata_id, language):
40
+    result = jsonresponse.get('entities', {}).get(wikidata_id, {})
41
+    
42
+    title = result.get('labels', {}).get(language, {}).get('value', None)
43
+    if title == None:
44
+        title = result.get('labels', {}).get('en', {}).get('value', wikidata_id)
45
+    results = []
46
+    urls = []
47
+    attributes = []
48
+
49
+    description = result.get('descriptions', {}).get(language, {}).get('value', '')
50
+    if description == '':
51
+        description = result.get('descriptions', {}).get('en', {}).get('value', '')
52
+
53
+    claims = result.get('claims', {})
54
+    official_website = get_string(claims, 'P856', None)
55
+    if official_website != None:
56
+        urls.append({ 'title' : 'Official site', 'url': official_website })
57
+        results.append({ 'title': title, 'url' : official_website })
58
+
59
+    if language != 'en':
60
+        add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki'))
61
+    wikipedia_en_link = get_wikilink(result, 'enwiki')
62
+    add_url(urls, 'Wikipedia (en)', wikipedia_en_link)
63
+
64
+    if language != 'en':
65
+        add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage'))
66
+    add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage'))
67
+
68
+    if language != 'en':
69
+        add_url(urls, 'Wikiquote (' + language + ')', get_wikilink(result, language + 'wikiquote'))
70
+    add_url(urls, 'Wikiquote (en)', get_wikilink(result, 'enwikiquote'))
71
+
72
+    add_url(urls, 'Commons wiki', get_wikilink(result, 'commonswiki'))
73
+
74
+    add_url(urls, 'Location', get_geolink(claims, 'P625', None))
75
+
76
+    add_url(urls, 'Wikidata', 'https://www.wikidata.org/wiki/' + wikidata_id + '?uselang='+ language)
77
+
78
+    musicbrainz_work_id = get_string(claims, 'P435')
79
+    if musicbrainz_work_id != None:
80
+        add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/work/' + musicbrainz_work_id)
81
+
82
+    musicbrainz_artist_id = get_string(claims, 'P434')
83
+    if musicbrainz_artist_id != None:
84
+        add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/artist/' + musicbrainz_artist_id)
85
+
86
+    musicbrainz_release_group_id = get_string(claims, 'P436')
87
+    if musicbrainz_release_group_id != None:
88
+        add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/release-group/' + musicbrainz_release_group_id)
89
+    
90
+    musicbrainz_label_id = get_string(claims, 'P966')
91
+    if musicbrainz_label_id != None:
92
+        add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/label/' + musicbrainz_label_id)
93
+
94
+    # musicbrainz_area_id = get_string(claims, 'P982')
95
+    # P1407 MusicBrainz series ID
96
+    # P1004 MusicBrainz place ID
97
+    # P1330 MusicBrainz instrument ID
98
+    # P1407 MusicBrainz series ID
99
+
100
+    postal_code = get_string(claims, 'P281', None)
101
+    if postal_code != None:
102
+        attributes.append({'label' : 'Postal code(s)', 'value' : postal_code})
103
+
104
+    date_of_birth = get_time(claims, 'P569', None)
105
+    if date_of_birth != None:
106
+        attributes.append({'label' : 'Date of birth', 'value' : date_of_birth})
107
+
108
+    date_of_death = get_time(claims, 'P570', None)
109
+    if date_of_death != None:
110
+        attributes.append({'label' : 'Date of death', 'value' : date_of_death})
111
+
112
+
113
+    results.append({
114
+            'infobox' : title, 
115
+            'id' : wikipedia_en_link,
116
+            'content' : description,
117
+            'attributes' : attributes,
118
+            'urls' : urls
119
+            })
120
+
121
+    return results
122
+
123
+
124
+def add_url(urls, title, url):
125
+    if url != None:
126
+        urls.append({'title' : title, 'url' : url})
127
+
128
+
129
+def get_mainsnak(claims, propertyName):
130
+    propValue = claims.get(propertyName, {})
131
+    if len(propValue) == 0:
132
+        return None
133
+
134
+    propValue = propValue[0].get('mainsnak', None)
135
+    return propValue
136
+
137
+
138
+def get_string(claims, propertyName, defaultValue=None):
139
+    propValue = claims.get(propertyName, {})
140
+    if len(propValue) == 0:
141
+        return defaultValue
142
+
143
+    result = []
144
+    for e in propValue:
145
+        mainsnak = e.get('mainsnak', {})
146
+
147
+        datatype = mainsnak.get('datatype', '')
148
+        datavalue = mainsnak.get('datavalue', {})
149
+        if datavalue != None:
150
+            result.append(datavalue.get('value', ''))
151
+
152
+    if len(result) == 0:
153
+        return defaultValue
154
+    else:
155
+        return ', '.join(result)
156
+
157
+
158
+def get_time(claims, propertyName, defaultValue=None):
159
+    propValue = claims.get(propertyName, {})
160
+    if len(propValue) == 0:
161
+        return defaultValue
162
+
163
+    result = []
164
+    for e in propValue:
165
+        mainsnak = e.get('mainsnak', {})
166
+
167
+        datatype = mainsnak.get('datatype', '')
168
+        datavalue = mainsnak.get('datavalue', {})
169
+        if datavalue != None:
170
+            value = datavalue.get('value', '')
171
+            result.append(value.get('time', ''))
172
+
173
+    if len(result) == 0:
174
+        return defaultValue
175
+    else:
176
+        return ', '.join(result)
177
+
178
+
179
+def get_geolink(claims, propertyName, defaultValue=''):
180
+    mainsnak = get_mainsnak(claims, propertyName)
181
+
182
+    if mainsnak == None:
183
+        return defaultValue
184
+
185
+    datatype = mainsnak.get('datatype', '')
186
+    datavalue = mainsnak.get('datavalue', {})
187
+
188
+    if datatype != 'globe-coordinate':
189
+        return defaultValue
190
+
191
+    value = datavalue.get('value', {})
192
+
193
+    precision = value.get('precision', 0.0002)
194
+
195
+    # there is no zoom information, deduce from precision (error prone)    
196
+    # samples :
197
+    # 13 --> 5
198
+    # 1 --> 6
199
+    # 0.016666666666667 --> 9
200
+    # 0.00027777777777778 --> 19
201
+    # wolframalpha : quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}}
202
+    # 14.1186-8.8322 x+0.625447 x^2
203
+    if precision < 0.0003:
204
+        zoom = 19
205
+    else:
206
+        zoom = int(15 - precision*8.8322 + precision*precision*0.625447)
207
+
208
+    url = urlMap.replace('{latitude}', str(value.get('latitude',0))).replace('{longitude}', str(value.get('longitude',0))).replace('{zoom}', str(zoom))
209
+
210
+    return url
211
+
212
+
213
+def get_wikilink(result, wikiid):
214
+    url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
215
+    if url == None:
216
+        return url
217
+    elif url.startswith('http://'):
218
+        url = url.replace('http://', 'https://')
219
+    elif url.startswith('//'):
220
+        url = 'https:' + url
221
+    return url

+ 103
- 10
searx/search.py View File

16
 '''
16
 '''
17
 
17
 
18
 import grequests
18
 import grequests
19
+import re
19
 from itertools import izip_longest, chain
20
 from itertools import izip_longest, chain
20
 from datetime import datetime
21
 from datetime import datetime
21
 from operator import itemgetter
22
 from operator import itemgetter
38
 
39
 
39
 
40
 
40
 # create a callback wrapper for the search engine results
41
 # create a callback wrapper for the search engine results
41
-def make_callback(engine_name, results, suggestions, callback, params):
42
+def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
42
 
43
 
43
     # creating a callback wrapper for the search engine results
44
     # creating a callback wrapper for the search engine results
44
     def process_callback(response, **kwargs):
45
     def process_callback(response, **kwargs):
45
         cb_res = []
46
         cb_res = []
46
         response.search_params = params
47
         response.search_params = params
47
 
48
 
48
-        # update stats with current page-load-time
49
-        engines[engine_name].stats['page_load_time'] += \
50
-            (datetime.now() - params['started']).total_seconds()
51
-
49
+        # callback
52
         try:
50
         try:
53
             search_results = callback(response)
51
             search_results = callback(response)
54
         except Exception, e:
52
         except Exception, e:
61
                 engine_name, str(e))
59
                 engine_name, str(e))
62
             return
60
             return
63
 
61
 
62
+        # add results
64
         for result in search_results:
63
         for result in search_results:
65
             result['engine'] = engine_name
64
             result['engine'] = engine_name
66
 
65
 
70
                 suggestions.add(result['suggestion'])
69
                 suggestions.add(result['suggestion'])
71
                 continue
70
                 continue
72
 
71
 
72
+            # if it is an answer, add it to list of answers
73
+            if 'answer' in result:
74
+                answers.add(result['answer'])
75
+                continue
76
+
77
+            # if it is an infobox, add it to list of infoboxes
78
+            if 'infobox' in result:
79
+                infoboxes.append(result)
80
+                continue
81
+
73
             # append result
82
             # append result
74
             cb_res.append(result)
83
             cb_res.append(result)
75
 
84
 
76
         results[engine_name] = cb_res
85
         results[engine_name] = cb_res
77
 
86
 
87
+        # update stats with current page-load-time
88
+        engines[engine_name].stats['page_load_time'] += \
89
+            (datetime.now() - params['started']).total_seconds()
90
+
78
     return process_callback
91
     return process_callback
79
 
92
 
80
 
93
 
94
+# return the meaningful length of the content for a result
95
+def content_result_len(content):
96
+    if isinstance(content, basestring):
97
+        content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
98
+        return len(content) 
99
+    else:
100
+        return 0
101
+
102
+
81
 # score results and remove duplications
103
 # score results and remove duplications
82
 def score_results(results):
104
 def score_results(results):
83
     # calculate scoring parameters
105
     # calculate scoring parameters
99
             res['host'] = res['host'].replace('www.', '', 1)
121
             res['host'] = res['host'].replace('www.', '', 1)
100
 
122
 
101
         res['engines'] = [res['engine']]
123
         res['engines'] = [res['engine']]
124
+
102
         weight = 1.0
125
         weight = 1.0
103
 
126
 
127
+        # strip multiple spaces and cariage returns from content
128
+        if 'content' in res:
129
+            res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
130
+
104
         # get weight of this engine if possible
131
         # get weight of this engine if possible
105
         if hasattr(engines[res['engine']], 'weight'):
132
         if hasattr(engines[res['engine']], 'weight'):
106
             weight = float(engines[res['engine']].weight)
133
             weight = float(engines[res['engine']].weight)
108
         # calculate score for that engine
135
         # calculate score for that engine
109
         score = int((flat_len - i) / engines_len) * weight + 1
136
         score = int((flat_len - i) / engines_len) * weight + 1
110
 
137
 
111
-        duplicated = False
112
-
113
         # check for duplicates
138
         # check for duplicates
139
+        duplicated = False
114
         for new_res in results:
140
         for new_res in results:
115
             # remove / from the end of the url if required
141
             # remove / from the end of the url if required
116
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
142
             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
127
         # merge duplicates together
153
         # merge duplicates together
128
         if duplicated:
154
         if duplicated:
129
             # using content with more text
155
             # using content with more text
130
-            if res.get('content') > duplicated.get('content'):
156
+            if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
131
                 duplicated['content'] = res['content']
157
                 duplicated['content'] = res['content']
132
 
158
 
133
             # increase result-score
159
             # increase result-score
186
     return gresults
212
     return gresults
187
 
213
 
188
 
214
 
215
+def merge_two_infoboxes(infobox1, infobox2):
216
+    if 'urls' in infobox2:
217
+        urls1 = infobox1.get('urls', None)
218
+        if urls1 == None:
219
+            urls1 = []
220
+            infobox1.set('urls', urls1)
221
+
222
+        urlSet = set()
223
+        for url in infobox1.get('urls', []):
224
+            urlSet.add(url.get('url', None))
225
+        
226
+        for url in infobox2.get('urls', []):
227
+            if url.get('url', None) not in urlSet:
228
+                urls1.append(url)
229
+
230
+    if 'attributes' in infobox2:
231
+        attributes1 = infobox1.get('attributes', None)
232
+        if attributes1 == None:
233
+            attributes1 = []
234
+            infobox1.set('attributes', attributes1)
235
+
236
+        attributeSet = set()
237
+        for attribute in infobox1.get('attributes', []):
238
+            if attribute.get('label', None) not in attributeSet:
239
+                attributeSet.add(attribute.get('label', None))
240
+        
241
+        for attribute in infobox2.get('attributes', []):
242
+            attributes1.append(attribute)
243
+
244
+    if 'content' in infobox2:
245
+        content1 = infobox1.get('content', None)
246
+        content2 = infobox2.get('content', '')
247
+        if content1 != None:
248
+            if content_result_len(content2) > content_result_len(content1):
249
+                infobox1['content'] = content2
250
+        else:
251
+            infobox1.set('content', content2)
252
+
253
+
254
+def merge_infoboxes(infoboxes):
255
+    results = []
256
+    infoboxes_id = {}
257
+    for infobox in infoboxes:
258
+        add_infobox = True
259
+        infobox_id = infobox.get('id', None)
260
+        if infobox_id != None:
261
+            existingIndex = infoboxes_id.get(infobox_id, None)
262
+            if existingIndex != None:
263
+                merge_two_infoboxes(results[existingIndex], infobox)
264
+                add_infobox=False
265
+            
266
+        if add_infobox:
267
+            results.append(infobox)
268
+            infoboxes_id[infobox_id] = len(results)-1
269
+
270
+    return results
271
+
272
+
189
 class Search(object):
273
 class Search(object):
190
 
274
 
191
     """Search information container"""
275
     """Search information container"""
208
 
292
 
209
         self.results = []
293
         self.results = []
210
         self.suggestions = []
294
         self.suggestions = []
295
+        self.answers = []
296
+        self.infoboxes = []
211
         self.request_data = {}
297
         self.request_data = {}
212
 
298
 
213
         # set specific language if set
299
         # set specific language if set
293
         requests = []
379
         requests = []
294
         results = {}
380
         results = {}
295
         suggestions = set()
381
         suggestions = set()
382
+        answers = set()
383
+        infoboxes = []
296
 
384
 
297
         # increase number of searches
385
         # increase number of searches
298
         number_of_searches += 1
386
         number_of_searches += 1
337
                 selected_engine['name'],
425
                 selected_engine['name'],
338
                 results,
426
                 results,
339
                 suggestions,
427
                 suggestions,
428
+                answers,
429
+                infoboxes,
340
                 engine.response,
430
                 engine.response,
341
                 request_params
431
                 request_params
342
             )
432
             )
374
         # score results and remove duplications
464
         # score results and remove duplications
375
         results = score_results(results)
465
         results = score_results(results)
376
 
466
 
467
+        # merge infoboxes according to their ids
468
+        infoboxes = merge_infoboxes(infoboxes)
469
+
377
         # update engine stats, using calculated score
470
         # update engine stats, using calculated score
378
         for result in results:
471
         for result in results:
379
             for res_engine in result['engines']:
472
             for res_engine in result['engines']:
380
                 engines[result['engine']]\
473
                 engines[result['engine']]\
381
                     .stats['score_count'] += result['score']
474
                     .stats['score_count'] += result['score']
382
 
475
 
383
-        # return results and suggestions
384
-        return results, suggestions
476
+        # return results, suggestions, answers and infoboxes
477
+        return results, suggestions, answers, infoboxes

+ 5
- 1
searx/settings.yml View File

1
 server:
1
 server:
2
     port : 8888
2
     port : 8888
3
     secret_key : "ultrasecretkey" # change this!
3
     secret_key : "ultrasecretkey" # change this!
4
-    debug : False # Debug mode, only for development
4
+    debug : True # Debug mode, only for development
5
     request_timeout : 2.0 # seconds
5
     request_timeout : 2.0 # seconds
6
     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
6
     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/"
7
     themes_path : "" # Custom ui themes path
7
     themes_path : "" # Custom ui themes path
44
     engine : duckduckgo_definitions
44
     engine : duckduckgo_definitions
45
     shortcut : ddd
45
     shortcut : ddd
46
 
46
 
47
+  - name : wikidata
48
+    engine : wikidata
49
+    shortcut : wd
50
+
47
   - name : duckduckgo
51
   - name : duckduckgo
48
     engine : duckduckgo
52
     engine : duckduckgo
49
     shortcut : ddg
53
     shortcut : ddg

+ 1
- 75
searx/static/default/css/style.css
File diff suppressed because it is too large
View File


+ 96
- 28
searx/static/default/less/style.less View File

235
 		max-width: 54em;
235
 		max-width: 54em;
236
 		word-wrap:break-word;
236
 		word-wrap:break-word;
237
 		line-height: 1.24;
237
 		line-height: 1.24;
238
+
239
+		img {
240
+		    float: left;
241
+		    margin-right: 5px;
242
+		    max-width: 200px;
243
+		    max-height: 100px;
244
+		}
245
+		
246
+		br.last {
247
+		    clear: both;
248
+		}
238
 	}
249
 	}
239
 
250
 
240
 	.url {
251
 	.url {
384
     }
395
     }
385
 }
396
 }
386
 
397
 
387
-#suggestions {
398
+#suggestions, #answers {
388
 
399
 
389
-    margin-top: 20px;
400
+    	margin-top: 20px;
401
+
402
+}
403
+
404
+#suggestions, #answers, #infoboxes {
390
 
405
 
391
-	span {
392
-		display: inline;
393
-		margin: 0 2px 2px 2px;
394
-		padding: 0;
395
-	}
396
 	input {
406
 	input {
397
 		padding: 0;
407
 		padding: 0;
398
 		margin: 3px;
408
 		margin: 3px;
399
 		font-size: 0.8em;
409
 		font-size: 0.8em;
400
 		display: inline-block;
410
 		display: inline-block;
401
-        background: transparent;
402
-        color: @color-result-search-url-font;
411
+        	background: transparent;
412
+        	color: @color-result-search-url-font;
403
 		cursor: pointer;
413
 		cursor: pointer;
404
 	}
414
 	}
405
-    input[type="submit"] {
415
+
416
+    	input[type="submit"] {
406
 		text-decoration: underline;
417
 		text-decoration: underline;
407
-    }
418
+    	}
408
 
419
 
409
 	form {
420
 	form {
410
 		display: inline;
421
 		display: inline;
411
 	}
422
 	}
412
 }
423
 }
413
 
424
 
425
+
426
+#infoboxes {
427
+	   position: absolute;
428
+	   top: 220px;
429
+	   right: 20px;
430
+	   margin: 0px 2px 5px 5px;
431
+	   padding: 0px 2px 2px;
432
+	   max-width: 21em;
433
+
434
+	   .infobox {
435
+	   	    margin: 10px 0 10px;
436
+	   	    border: 1px solid #ddd;
437
+		    padding: 5px;
438
+	   	    font-size: 0.8em;
439
+
440
+	   	    img {
441
+		    	max-width: 20em;
442
+			max-heigt: 12em;
443
+			display: block;
444
+			margin: 5px;
445
+			padding: 5px;
446
+		    }
447
+
448
+		    h2 {
449
+		       margin: 0;
450
+		    }
451
+
452
+		    table {
453
+		    	  width: auto;
454
+
455
+			  td {
456
+		       	     vertical-align: top;
457
+		    	  }
458
+
459
+		    }
460
+
461
+		    input {
462
+		    	  font-size: 1em;
463
+		    }
464
+
465
+		    br {
466
+		       clear: both;
467
+		    }
468
+
469
+	   }
470
+}
471
+
414
 #search_url {
472
 #search_url {
415
 	margin-top: 8px;
473
 	margin-top: 8px;
416
 
474
 
453
 
511
 
454
 @media screen and (max-width: @results-width) {
512
 @media screen and (max-width: @results-width) {
455
 
513
 
456
-	#categories {
457
-		font-size: 90%;
458
-		clear: both;
459
-
460
-		.checkbox_container {
461
-			margin-top: 2px;
462
-			margin: auto; 
463
-		}
464
-	}
465
-
466
     #results {
514
     #results {
467
         margin: auto;
515
         margin: auto;
468
         padding: 0;
516
         padding: 0;
483
 	}
531
 	}
484
 }
532
 }
485
 
533
 
486
-@media screen and (max-width: 70em) {
534
+@media screen and (max-width: 75em) {
535
+
536
+       #infoboxes {
537
+	   position: inherit;
538
+	   max-width: inherit;
539
+	   
540
+	   .infobox {
541
+	   	    clear:both;
542
+	   
543
+	   	   img {
544
+	   	       float: left;
545
+	       	       max-width: 10em;
546
+	   	   }
547
+	   }
548
+
549
+       }
550
+
551
+	#categories {
552
+		font-size: 90%;
553
+		clear: both;
554
+
555
+		.checkbox_container {
556
+			margin-top: 2px;
557
+			margin: auto; 
558
+		}
559
+	}
560
+
487
 	.right {
561
 	.right {
488
 		display: none;
562
 		display: none;
489
 		postion: fixed !important;
563
 		postion: fixed !important;
515
 	.result {
589
 	.result {
516
 		border-top: 1px solid @color-result-top-border;
590
 		border-top: 1px solid @color-result-top-border;
517
 		margin: 7px 0 6px 0;
591
 		margin: 7px 0 6px 0;
518
-
519
-		img {
520
-			max-width: 90%;
521
-			width: auto;
522
-			height: auto
523
-		}
524
 	}
592
 	}
525
 }
593
 }
526
 
594
 

+ 44
- 0
searx/templates/default/infobox.html View File

1
+<div class="infobox">
2
+  <h2>{{ infobox.infobox }}</h2>
3
+  {% if infobox.img_src %}<img src="{{ infobox.img_src }}" />{% endif %}
4
+  <p>{{ infobox.entity }}</p>
5
+  <p>{{ infobox.content }}</p>
6
+  {% if infobox.attributes %}
7
+  <div class="attributes">
8
+    <table>
9
+      {% for attribute in infobox.attributes %}
10
+      <tr><td>{{ attribute.label }}</td><td>{{ attribute.value }}</td></tr>
11
+      {% endfor %}
12
+    </table>
13
+  </div>
14
+  {% endif %}
15
+
16
+  {% if infobox.urls %}
17
+  <div class="urls">
18
+    <ul>
19
+      {% for url in infobox.urls %}
20
+      <li class="url"><a href="{{ url.url }}">{{ url.title }}</a></li>
21
+      {% endfor %}
22
+    </ul>
23
+  </div>
24
+  {% endif %}
25
+
26
+  {% if infobox.relatedTopics %}
27
+  <div class="relatedTopics">
28
+      {% for topic in infobox.relatedTopics %}
29
+      <div>
30
+	<h3>{{ topic.name }}</h3>
31
+	{% for suggestion in topic.suggestions %}
32
+	<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}">
33
+            <input type="hidden" name="q" value="{{ suggestion }}">
34
+            <input type="submit" value="{{ suggestion }}" />
35
+        </form>
36
+	{% endfor %}
37
+      </div>
38
+      {% endfor %}
39
+  </div>
40
+  {% endif %}
41
+
42
+  <br />
43
+  
44
+</div>

+ 1
- 1
searx/templates/default/result_templates/default.html View File

8
     <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
8
     <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
9
     <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p>
9
     <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p>
10
 	{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %}
10
 	{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %}
11
-    <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
11
+    <p class="content">{% if result.img_src %}<img src="{{ result.img_src|safe }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p>
12
   </div>
12
   </div>
13
 </div>
13
 </div>

+ 16
- 0
searx/templates/default/results.html View File

30
         </div>
30
         </div>
31
     </div>
31
     </div>
32
 
32
 
33
+    {% if answers %}
34
+    <div id="answers"><span>{{ _('Answers') }}</span>
35
+        {% for answer in answers %}
36
+        <span>{{ answer }}</span>
37
+        {% endfor %}
38
+    </div>
39
+    {% endif %}
40
+
33
     {% if suggestions %}
41
     {% if suggestions %}
34
     <div id="suggestions"><span>{{ _('Suggestions') }}</span>
42
     <div id="suggestions"><span>{{ _('Suggestions') }}</span>
35
         {% for suggestion in suggestions %}
43
         {% for suggestion in suggestions %}
41
     </div>
49
     </div>
42
     {% endif %}
50
     {% endif %}
43
 
51
 
52
+    {% if infoboxes %}
53
+    <div id="infoboxes">
54
+      {% for infobox in infoboxes %}
55
+         {% include 'default/infobox.html' %}
56
+      {% endfor %}
57
+    </div>
58
+    {% endif %}    
59
+
44
     {% for result in results %}
60
     {% for result in results %}
45
         {% if result['template'] %}
61
         {% if result['template'] %}
46
             {% include 'default/result_templates/'+result['template'] %}
62
             {% include 'default/result_templates/'+result['template'] %}

+ 9
- 1
searx/tests/test_webapp.py View File

43
     def test_index_html(self, search):
43
     def test_index_html(self, search):
44
         search.return_value = (
44
         search.return_value = (
45
             self.test_results,
45
             self.test_results,
46
+            set(),
47
+            set(),
46
             set()
48
             set()
47
         )
49
         )
48
         result = self.app.post('/', data={'q': 'test'})
50
         result = self.app.post('/', data={'q': 'test'})
51
             result.data
53
             result.data
52
         )
54
         )
53
         self.assertIn(
55
         self.assertIn(
54
-            '<p class="content">first <span class="highlight">test</span> content<br /></p>',  # noqa
56
+            '<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>',  # noqa
55
             result.data
57
             result.data
56
         )
58
         )
57
 
59
 
59
     def test_index_json(self, search):
61
     def test_index_json(self, search):
60
         search.return_value = (
62
         search.return_value = (
61
             self.test_results,
63
             self.test_results,
64
+            set(),
65
+            set(),
62
             set()
66
             set()
63
         )
67
         )
64
         result = self.app.post('/', data={'q': 'test', 'format': 'json'})
68
         result = self.app.post('/', data={'q': 'test', 'format': 'json'})
75
     def test_index_csv(self, search):
79
     def test_index_csv(self, search):
76
         search.return_value = (
80
         search.return_value = (
77
             self.test_results,
81
             self.test_results,
82
+            set(),
83
+            set(),
78
             set()
84
             set()
79
         )
85
         )
80
         result = self.app.post('/', data={'q': 'test', 'format': 'csv'})
86
         result = self.app.post('/', data={'q': 'test', 'format': 'csv'})
90
     def test_index_rss(self, search):
96
     def test_index_rss(self, search):
91
         search.return_value = (
97
         search.return_value = (
92
             self.test_results,
98
             self.test_results,
99
+            set(),
100
+            set(),
93
             set()
101
             set()
94
         )
102
         )
95
         result = self.app.post('/', data={'q': 'test', 'format': 'rss'})
103
         result = self.app.post('/', data={'q': 'test', 'format': 'rss'})

+ 3
- 1
searx/webapp.py View File

199
             'index.html',
199
             'index.html',
200
         )
200
         )
201
 
201
 
202
-    search.results, search.suggestions = search.search(request)
202
+    search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
203
 
203
 
204
     for result in search.results:
204
     for result in search.results:
205
 
205
 
292
         pageno=search.pageno,
292
         pageno=search.pageno,
293
         base_url=get_base_url(),
293
         base_url=get_base_url(),
294
         suggestions=search.suggestions,
294
         suggestions=search.suggestions,
295
+        answers=search.answers,
296
+        infoboxes=search.infoboxes,
295
         theme=get_current_theme_name()
297
         theme=get_current_theme_name()
296
     )
298
     )
297
 
299