Ver código fonte

implement query parser and use it inside autocompletion

Thomas Pointhuber 10 anos atrás
pai
commit
510aba5e66
2 arquivos alterados com 152 adições e 3 exclusões
  1. 125
    0
      searx/query.py
  2. 27
    3
      searx/webapp.py

+ 125
- 0
searx/query.py Ver arquivo

@@ -0,0 +1,125 @@
1
+#!/usr/bin/env python
2
+
3
+'''
4
+searx is free software: you can redistribute it and/or modify
5
+it under the terms of the GNU Affero General Public License as published by
6
+the Free Software Foundation, either version 3 of the License, or
7
+(at your option) any later version.
8
+
9
+searx is distributed in the hope that it will be useful,
10
+but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
+GNU Affero General Public License for more details.
13
+
14
+You should have received a copy of the GNU Affero General Public License
15
+along with searx. If not, see < http://www.gnu.org/licenses/ >.
16
+
17
+(C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at>
18
+'''
19
+
20
+from searx.languages import language_codes
21
+from searx.engines import (
22
+    categories, engines, engine_shortcuts
23
+)
24
+import string
25
+import re
26
+
27
+
28
+class Query(object):
29
+    """parse query"""
30
+
31
+    def __init__(self, query, blocked_engines):
32
+        self.query = query
33
+        self.blocked_engines = []
34
+        
35
+        if blocked_engines:
36
+            self.blocked_engines = blocked_engines
37
+            
38
+        self.query_parts = []
39
+        self.engines = []
40
+        self.languages = []
41
+    
42
+    def parse_query(self):
43
+        self.query_parts = []
44
+        
45
+        # split query, including whitespaces
46
+        raw_query_parts = re.split(r'(\s+)', self.query)
47
+        
48
+        parse_next = True
49
+        
50
+        for query_part in raw_query_parts:
51
+            if not parse_next:
52
+                self.query_parts[-1] += query_part
53
+                continue
54
+           
55
+            parse_next = False
56
+           
57
+            # part does only contain spaces, skip
58
+            if query_part.isspace():
59
+                parse_next = True
60
+                self.query_parts.append(query_part)
61
+                continue
62
+
63
+            # this force a language            
64
+            if query_part[0] == ':':
65
+                lang = query_part[1:].lower()
66
+
67
+                # check if any language-code is equal with declared language-codes
68
+                for lc in language_codes:
69
+                    lang_id, lang_name, country = map(str.lower, lc)
70
+
71
+                    # if correct language-code is found, set it as new search-language
72
+                    if lang == lang_id\
73
+                       or lang_id.startswith(lang)\
74
+                       or lang == lang_name\
75
+                       or lang == country:
76
+                        parse_next = True
77
+                        self.languages.append(lang)
78
+                        break
79
+
80
+            # this force a engine or category
81
+            if query_part[0] == '!':
82
+                prefix = query_part[1:].replace('_', ' ')
83
+
84
+                # check if prefix is equal with engine shortcut
85
+                if prefix in engine_shortcuts\
86
+                   and not engine_shortcuts[prefix] in self.blocked_engines:
87
+                    parse_next = True
88
+                    self.engines.append({'category': 'none',
89
+                                         'name': engine_shortcuts[prefix]})
90
+                
91
+                # check if prefix is equal with engine name
92
+                elif prefix in engines\
93
+                        and not prefix in self.blocked_engines:
94
+                    parse_next = True
95
+                    self.engines.append({'category': 'none',
96
+                                        'name': prefix})
97
+
98
+                # check if prefix is equal with categorie name
99
+                elif prefix in categories:
100
+                    # using all engines for that search, which are declared under that categorie name
101
+                    parse_next = True
102
+                    self.engines.extend({'category': prefix,
103
+                                        'name': engine.name}
104
+                                        for engine in categories[prefix]
105
+                                        if not engine in self.blocked_engines)
106
+          
107
+            # append query part to query_part list
108
+            self.query_parts.append(query_part)
109
+
110
+    def changeSearchQuery(self, search_query):
111
+        if len(self.query_parts):
112
+            self.query_parts[-1] = search_query
113
+        else:
114
+            self.query_parts.append(search_query)
115
+            
116
+    def getSearchQuery(self):
117
+        if len(self.query_parts):
118
+            return self.query_parts[-1]
119
+        else:
120
+            return ''
121
+    
122
+    def getFullQuery(self):
123
+        # get full querry including whitespaces
124
+        return string.join(self.query_parts, '')
125
+

+ 27
- 3
searx/webapp.py Ver arquivo

@@ -47,6 +47,7 @@ from searx.utils import (
47 47
 from searx.https_rewrite import https_rules
48 48
 from searx.languages import language_codes
49 49
 from searx.search import Search
50
+from searx.query import Query
50 51
 from searx.autocomplete import backends as autocomplete_backends
51 52
 
52 53
 
@@ -308,23 +309,46 @@ def autocompleter():
308 309
     """Return autocompleter results"""
309 310
     request_data = {}
310 311
 
312
+    # select request method
311 313
     if request.method == 'POST':
312 314
         request_data = request.form
313 315
     else:
314 316
         request_data = request.args
315 317
 
316
-    query = request_data.get('q', '').encode('utf-8')
318
+    # set blocked engines
319
+    if request.cookies.get('blocked_engines'):
320
+        blocked_engines = request.cookies['blocked_engines'].split(',')  # noqa
321
+    else:
322
+        blocked_engines = []
323
+
324
+    # parse query
325
+    query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines)
326
+    query.parse_query()
317 327
 
318
-    if not query:
328
+    # check if search query is set
329
+    if not query.getSearchQuery():
319 330
         return
320 331
 
332
+    # run autocompleter
321 333
     completer = autocomplete_backends.get(request.cookies.get('autocomplete'))
322 334
 
335
+    # check if valid autocompleter is selected
323 336
     if not completer:
324 337
         return
325 338
 
326
-    results = completer(query)
339
+    # run autocompletion
340
+    raw_results = completer(query.getSearchQuery())
341
+
342
+    # parse results (write :language and !engine back to result string)
343
+    results = []
344
+    for result in raw_results:
345
+        result_query = query
346
+        result_query.changeSearchQuery(result)
347
+
348
+        # add parsed result
349
+        results.append(result_query.getFullQuery())
327 350
 
351
+    # return autocompleter results
328 352
     if request_data.get('format') == 'x-suggestions':
329 353
         return Response(json.dumps([query, results]),
330 354
                         mimetype='application/json')