Просмотр исходного кода

[enh][mod] code refactor ++ search engine filtering

asciimoo 11 лет назад
Родитель
Сommit
0540ea9ee2
3 измененных файлов: 133 добавлений и 100 удалений
  1. 94
    0
      searx/search.py
  2. 4
    4
      searx/tests/test_webapp.py
  3. 35
    96
      searx/webapp.py

+ 94
- 0
searx/search.py Просмотреть файл

@@ -0,0 +1,94 @@
1
+from searx.engines import (
2
+    categories, engines, engine_shortcuts
3
+)
4
+from searx.languages import language_codes
5
+
6
+
7
+class Search(object):
8
+
9
+    """Search information container"""
10
+
11
+    def __init__(self, request):
12
+        super(Search, self).__init__()
13
+        self.query = None
14
+        self.engines = []
15
+        self.categories = []
16
+        query_engines = []
17
+        self.paging = False
18
+        self.pageno = 1
19
+        self.lang = 'all'
20
+        if request.cookies.get('blocked_engines'):
21
+            self.blocked_engines = request.cookies['blocked_engines'].split(',')
22
+        else:
23
+            self.blocked_engines = []
24
+        self.results = []
25
+        self.suggestions = []
26
+        self.request_data = {}
27
+
28
+        if request.cookies.get('language')\
29
+           and request.cookies['language'] in (x[0] for x in language_codes):
30
+            self.lang = request.cookies['language']
31
+
32
+        if request.method == 'POST':
33
+            self.request_data = request.form
34
+        else:
35
+            self.request_data = request.args
36
+
37
+        # TODO better exceptions
38
+        if not self.request_data.get('q'):
39
+            raise Exception('noquery')
40
+
41
+        self.query = self.request_data['q']
42
+
43
+        pageno_param = self.request_data.get('pageno', '1')
44
+        if not pageno_param.isdigit() or int(pageno_param) < 1:
45
+            raise Exception('wrong pagenumber')
46
+
47
+        self.pageno = int(pageno_param)
48
+
49
+        query_parts = self.query.split()
50
+        if query_parts[0].startswith('!'):
51
+            prefix = query_parts[0][1:].replace('_', ' ')
52
+            if prefix in engine_shortcuts\
53
+               and not engine_shortcuts[prefix] in self.blocked_engines:
54
+                self.engines.append({'category': 'none',
55
+                                     'name': engine_shortcuts[prefix]})
56
+            elif prefix in engines\
57
+                    and not prefix in self.blocked_engines:
58
+                self.engines.append({'category': 'none',
59
+                                    'name': prefix})
60
+            elif prefix in categories:
61
+                self.engines.extend({'category': prefix,
62
+                                    'name': engine.name}
63
+                                    for engine in categories[prefix]
64
+                                    if not engine in self.blocked_engines)
65
+
66
+        if len(query_engines):
67
+            self.query = self.query.replace(query_parts[0], '', 1).strip()
68
+
69
+        self.categories = []
70
+
71
+        if len(self.engines):
72
+            self.categories = list(set(engine['category']
73
+                                           for engine in self.engines))
74
+        else:
75
+            for pd_name, pd in self.request_data.items():
76
+                if pd_name.startswith('category_'):
77
+                    category = pd_name[9:]
78
+                    if not category in categories:
79
+                        continue
80
+                    self.categories.append(category)
81
+            if not len(self.categories):
82
+                cookie_categories = request.cookies.get('categories', '')
83
+                cookie_categories = cookie_categories.split(',')
84
+                for ccateg in cookie_categories:
85
+                    if ccateg in categories:
86
+                        self.categories.append(ccateg)
87
+            if not len(self.categories):
88
+                self.categories = ['general']
89
+
90
+            for categ in self.categories:
91
+                self.engines.extend({'category': categ,
92
+                                     'name': x.name}
93
+                                    for x in categories[categ]
94
+                                    if not x.name in self.blocked_engines)

+ 4
- 4
searx/tests/test_webapp.py Просмотреть файл

@@ -39,7 +39,7 @@ class ViewsTestCase(SearxTestCase):
39 39
         self.assertEqual(result.status_code, 200)
40 40
         self.assertIn('<div class="title"><h1>searx</h1></div>', result.data)
41 41
 
42
-    @patch('searx.webapp.search')
42
+    @patch('searx.webapp.do_search')
43 43
     def test_index_html(self, search):
44 44
         search.return_value = (
45 45
             self.test_results,
@@ -55,7 +55,7 @@ class ViewsTestCase(SearxTestCase):
55 55
             result.data
56 56
         )
57 57
 
58
-    @patch('searx.webapp.search')
58
+    @patch('searx.webapp.do_search')
59 59
     def test_index_json(self, search):
60 60
         search.return_value = (
61 61
             self.test_results,
@@ -71,7 +71,7 @@ class ViewsTestCase(SearxTestCase):
71 71
         self.assertEqual(
72 72
             result_dict['results'][0]['url'], 'http://first.test.xyz')
73 73
 
74
-    @patch('searx.webapp.search')
74
+    @patch('searx.webapp.do_search')
75 75
     def test_index_csv(self, search):
76 76
         search.return_value = (
77 77
             self.test_results,
@@ -86,7 +86,7 @@ class ViewsTestCase(SearxTestCase):
86 86
             result.data
87 87
         )
88 88
 
89
-    @patch('searx.webapp.search')
89
+    @patch('searx.webapp.do_search')
90 90
     def test_index_rss(self, search):
91 91
         search.return_value = (
92 92
             self.test_results,

+ 35
- 96
searx/webapp.py Просмотреть файл

@@ -28,10 +28,12 @@ from flask import (
28 28
 from flask.ext.babel import Babel
29 29
 from searx import settings, searx_dir
30 30
 from searx.engines import (
31
-    search, categories, engines, get_engines_stats, engine_shortcuts
31
+    search as do_search, categories, engines, get_engines_stats,
32
+    engine_shortcuts
32 33
 )
33 34
 from searx.utils import UnicodeWriter, highlight_content, html_to_text
34 35
 from searx.languages import language_codes
36
+from searx.search import Search
35 37
 
36 38
 
37 39
 app = Flask(
@@ -94,95 +96,32 @@ def render(template_name, **kwargs):
94 96
     return render_template(template_name, **kwargs)
95 97
 
96 98
 
97
-def parse_query(query):
98
-    query_engines = []
99
-    query_parts = query.split()
100
-
101
-    if query_parts[0].startswith('!'):
102
-        prefix = query_parts[0][1:].replace('_', ' ')
103
-        if prefix in engine_shortcuts:
104
-            query_engines.append({'category': 'none',
105
-                                  'name': engine_shortcuts[prefix]})
106
-        elif prefix in engines:
107
-            query_engines.append({'category': 'none',
108
-                                  'name': prefix})
109
-        elif prefix in categories:
110
-            query_engines.extend({'category': prefix,
111
-                                  'name': engine.name}
112
-                                 for engine in categories[prefix])
113
-
114
-    if len(query_engines):
115
-        query = query.replace(query_parts[0], '', 1).strip()
116
-    return query, query_engines
117
-
118
-
119 99
 @app.route('/', methods=['GET', 'POST'])
120 100
 def index():
121 101
     """Render index page.
122 102
 
123 103
     Supported outputs: html, json, csv, rss.
124 104
     """
125
-    paging = False
126
-    lang = 'all'
127
-
128
-    if request.cookies.get('language')\
129
-       and request.cookies['language'] in (x[0] for x in language_codes):
130
-        lang = request.cookies['language']
131
-
132
-    if request.method == 'POST':
133
-        request_data = request.form
134
-    else:
135
-        request_data = request.args
136
-    if not request_data.get('q'):
137
-        return render('index.html')
138 105
 
139
-    pageno_param = request_data.get('pageno', '1')
140
-    if not pageno_param.isdigit() or int(pageno_param) < 1:
106
+    try:
107
+        search = Search(request)
108
+    except:
141 109
         return render('index.html')
142 110
 
143
-    pageno = int(pageno_param)
144
-
145
-    selected_categories = []
146
-
147
-    query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
148
-
149
-    if len(selected_engines):
150
-        selected_categories = list(set(engine['category']
151
-                                       for engine in selected_engines))
152
-    else:
153
-        for pd_name, pd in request_data.items():
154
-            if pd_name.startswith('category_'):
155
-                category = pd_name[9:]
156
-                if not category in categories:
157
-                    continue
158
-                selected_categories.append(category)
159
-        if not len(selected_categories):
160
-            cookie_categories = request.cookies.get('categories', '')
161
-            cookie_categories = cookie_categories.split(',')
162
-            for ccateg in cookie_categories:
163
-                if ccateg in categories:
164
-                    selected_categories.append(ccateg)
165
-        if not len(selected_categories):
166
-            selected_categories = ['general']
167
-
168
-        for categ in selected_categories:
169
-            selected_engines.extend({'category': categ,
170
-                                     'name': x.name}
171
-                                    for x in categories[categ])
172
-
173
-    results, suggestions = search(query,
174
-                                  request,
175
-                                  selected_engines,
176
-                                  pageno,
177
-                                  lang)
178
-
179
-    for result in results:
180
-        if not paging and engines[result['engine']].paging:
181
-            paging = True
182
-        if request_data.get('format', 'html') == 'html':
111
+    # TODO moar refactor - do_search integration into Search class
112
+    search.results, search.suggestions = do_search(search.query,
113
+                                                   request,
114
+                                                   search.engines,
115
+                                                   search.pageno,
116
+                                                   search.lang)
117
+
118
+    for result in search.results:
119
+        if not search.paging and engines[result['engine']].paging:
120
+            search.paging = True
121
+        if search.request_data.get('format', 'html') == 'html':
183 122
             if 'content' in result:
184
-                result['content'] = highlight_content(result['content'], query)
185
-            result['title'] = highlight_content(result['title'], query)
123
+                result['content'] = highlight_content(result['content'], search.query)
124
+            result['title'] = highlight_content(result['title'], search.query)
186 125
         else:
187 126
             if 'content' in result:
188 127
                 result['content'] = html_to_text(result['content']).strip()
@@ -199,40 +138,40 @@ def index():
199 138
             if engine in favicons:
200 139
                 result['favicon'] = engine
201 140
 
202
-    if request_data.get('format') == 'json':
203
-        return Response(json.dumps({'query': query, 'results': results}),
141
+    if search.request_data.get('format') == 'json':
142
+        return Response(json.dumps({'query': search.query, 'results': search.results}),
204 143
                         mimetype='application/json')
205
-    elif request_data.get('format') == 'csv':
144
+    elif search.request_data.get('format') == 'csv':
206 145
         csv = UnicodeWriter(cStringIO.StringIO())
207 146
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
208
-        if len(results):
147
+        if len(search.results):
209 148
             csv.writerow(keys)
210
-            for row in results:
149
+            for row in search.results:
211 150
                 row['host'] = row['parsed_url'].netloc
212 151
                 csv.writerow([row.get(key, '') for key in keys])
213 152
         csv.stream.seek(0)
214 153
         response = Response(csv.stream.read(), mimetype='application/csv')
215
-        content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
154
+        content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search.query)
216 155
         response.headers.add('Content-Disposition', content_disp)
217 156
         return response
218
-    elif request_data.get('format') == 'rss':
157
+    elif search.request_data.get('format') == 'rss':
219 158
         response_rss = render(
220 159
             'opensearch_response_rss.xml',
221
-            results=results,
222
-            q=request_data['q'],
223
-            number_of_results=len(results),
160
+            results=search.results,
161
+            q=search.request_data['q'],
162
+            number_of_results=len(search.results),
224 163
             base_url=get_base_url()
225 164
         )
226 165
         return Response(response_rss, mimetype='text/xml')
227 166
 
228 167
     return render(
229 168
         'results.html',
230
-        results=results,
231
-        q=request_data['q'],
232
-        selected_categories=selected_categories,
233
-        paging=paging,
234
-        pageno=pageno,
235
-        suggestions=suggestions
169
+        results=search.results,
170
+        q=search.request_data['q'],
171
+        selected_categories=search.categories,
172
+        paging=search.paging,
173
+        pageno=search.pageno,
174
+        suggestions=search.suggestions
236 175
     )
237 176
 
238 177