Procházet zdrojové kódy

[fix] pep8 part II.

Adam Tauber před 10 roky
rodič
revize
5740cfbf1c
6 změnil soubory, kde provedl 121 přidání a 80 odebrání
  1. 2
    1
      searx/__init__.py
  2. 6
    5
      searx/engines/__init__.py
  3. 23
    20
      searx/query.py
  4. 67
    37
      searx/search.py
  5. 5
    4
      searx/utils.py
  6. 18
    13
      searx/webapp.py

+ 2
- 1
searx/__init__.py Zobrazit soubor

@@ -28,7 +28,8 @@ except:
28 28
 searx_dir = abspath(dirname(__file__))
29 29
 engine_dir = dirname(realpath(__file__))
30 30
 
31
-# if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH
31
+# if possible set path to settings using the
32
+# enviroment variable SEARX_SETTINGS_PATH
32 33
 if 'SEARX_SETTINGS_PATH' in environ:
33 34
     settings_path = environ['SEARX_SETTINGS_PATH']
34 35
 # otherwise using default path

+ 6
- 5
searx/engines/__init__.py Zobrazit soubor

@@ -41,7 +41,7 @@ def load_module(filename):
41 41
     module.name = modname
42 42
     return module
43 43
 
44
-if not 'engines' in settings or not settings['engines']:
44
+if 'engines' not in settings or not settings['engines']:
45 45
     print '[E] Error no engines found. Edit your settings.yml'
46 46
     exit(2)
47 47
 
@@ -68,15 +68,15 @@ for engine_data in settings['engines']:
68 68
         engine.categories = ['general']
69 69
 
70 70
     if not hasattr(engine, 'language_support'):
71
-        #engine.language_support = False
71
+        # engine.language_support = False
72 72
         engine.language_support = True
73 73
 
74 74
     if not hasattr(engine, 'timeout'):
75
-        #engine.language_support = False
75
+        # engine.language_support = False
76 76
         engine.timeout = settings['server']['request_timeout']
77 77
 
78 78
     if not hasattr(engine, 'shortcut'):
79
-        #engine.shortcut = '''
79
+        # engine.shortcut = '''
80 80
         engine.shortcut = ''
81 81
 
82 82
     # checking required variables
@@ -161,7 +161,8 @@ def get_engines_stats():
161 161
 
162 162
     for engine in scores_per_result:
163 163
         if max_score_per_result:
164
-            engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
164
+            engine['percentage'] = int(engine['avg']
165
+                                       / max_score_per_result * 100)
165 166
         else:
166 167
             engine['percentage'] = 0
167 168
 

+ 23
- 20
searx/query.py Zobrazit soubor

@@ -31,30 +31,31 @@ class Query(object):
31 31
     def __init__(self, query, blocked_engines):
32 32
         self.query = query
33 33
         self.blocked_engines = []
34
-        
34
+
35 35
         if blocked_engines:
36 36
             self.blocked_engines = blocked_engines
37
-            
37
+
38 38
         self.query_parts = []
39 39
         self.engines = []
40 40
         self.languages = []
41
-    
42
-    # parse query, if tags are set, which change the serch engine or search-language
41
+
42
+    # parse query, if tags are set, which
43
+    # change the serch engine or search-language
43 44
     def parse_query(self):
44 45
         self.query_parts = []
45
-        
46
+
46 47
         # split query, including whitespaces
47 48
         raw_query_parts = re.split(r'(\s+)', self.query)
48
-        
49
+
49 50
         parse_next = True
50
-        
51
+
51 52
         for query_part in raw_query_parts:
52 53
             if not parse_next:
53 54
                 self.query_parts[-1] += query_part
54 55
                 continue
55
-           
56
+
56 57
             parse_next = False
57
-           
58
+
58 59
             # part does only contain spaces, skip
59 60
             if query_part.isspace()\
60 61
                or query_part == '':
@@ -62,15 +63,17 @@ class Query(object):
62 63
                 self.query_parts.append(query_part)
63 64
                 continue
64 65
 
65
-            # this force a language            
66
+            # this force a language
66 67
             if query_part[0] == ':':
67 68
                 lang = query_part[1:].lower()
68 69
 
69
-                # check if any language-code is equal with declared language-codes
70
+                # check if any language-code is equal with
71
+                # declared language-codes
70 72
                 for lc in language_codes:
71 73
                     lang_id, lang_name, country = map(str.lower, lc)
72 74
 
73
-                    # if correct language-code is found, set it as new search-language
75
+                    # if correct language-code is found
76
+                    # set it as new search-language
74 77
                     if lang == lang_id\
75 78
                        or lang_id.startswith(lang)\
76 79
                        or lang == lang_name\
@@ -89,23 +92,24 @@ class Query(object):
89 92
                     parse_next = True
90 93
                     self.engines.append({'category': 'none',
91 94
                                          'name': engine_shortcuts[prefix]})
92
-                
95
+
93 96
                 # check if prefix is equal with engine name
94 97
                 elif prefix in engines\
95
-                        and not prefix in self.blocked_engines:
98
+                        and prefix not in self.blocked_engines:
96 99
                     parse_next = True
97 100
                     self.engines.append({'category': 'none',
98 101
                                         'name': prefix})
99 102
 
100 103
                 # check if prefix is equal with categorie name
101 104
                 elif prefix in categories:
102
-                    # using all engines for that search, which are declared under that categorie name
105
+                    # using all engines for that search, which
106
+                    # are declared under that categorie name
103 107
                     parse_next = True
104 108
                     self.engines.extend({'category': prefix,
105 109
                                         'name': engine.name}
106 110
                                         for engine in categories[prefix]
107
-                                        if not engine in self.blocked_engines)
108
-          
111
+                                        if engine not in self.blocked_engines)
112
+
109 113
             # append query part to query_part list
110 114
             self.query_parts.append(query_part)
111 115
 
@@ -114,14 +118,13 @@ class Query(object):
114 118
             self.query_parts[-1] = search_query
115 119
         else:
116 120
             self.query_parts.append(search_query)
117
-            
121
+
118 122
     def getSearchQuery(self):
119 123
         if len(self.query_parts):
120 124
             return self.query_parts[-1]
121 125
         else:
122 126
             return ''
123
-    
127
+
124 128
     def getFullQuery(self):
125 129
         # get full querry including whitespaces
126 130
         return string.join(self.query_parts, '')
127
-

+ 67
- 37
searx/search.py Zobrazit soubor

@@ -22,7 +22,7 @@ from datetime import datetime
22 22
 from operator import itemgetter
23 23
 from urlparse import urlparse, unquote
24 24
 from searx.engines import (
25
-    categories, engines, engine_shortcuts
25
+    categories, engines
26 26
 )
27 27
 from searx.languages import language_codes
28 28
 from searx.utils import gen_useragent
@@ -39,7 +39,13 @@ def default_request_params():
39 39
 
40 40
 
41 41
 # create a callback wrapper for the search engine results
42
-def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params):
42
+def make_callback(engine_name,
43
+                  results,
44
+                  suggestions,
45
+                  answers,
46
+                  infoboxes,
47
+                  callback,
48
+                  params):
43 49
 
44 50
     # creating a callback wrapper for the search engine results
45 51
     def process_callback(response, **kwargs):
@@ -95,7 +101,7 @@ def make_callback(engine_name, results, suggestions, answers, infoboxes, callbac
95 101
 def content_result_len(content):
96 102
     if isinstance(content, basestring):
97 103
         content = re.sub('[,;:!?\./\\\\ ()-_]', '', content)
98
-        return len(content) 
104
+        return len(content)
99 105
     else:
100 106
         return 0
101 107
 
@@ -126,7 +132,8 @@ def score_results(results):
126 132
 
127 133
         # strip multiple spaces and cariage returns from content
128 134
         if 'content' in res:
129
-            res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', ''))
135
+            res['content'] = re.sub(' +', ' ',
136
+                                    res['content'].strip().replace('\n', ''))
130 137
 
131 138
         # get weight of this engine if possible
132 139
         if hasattr(engines[res['engine']], 'weight'):
@@ -139,8 +146,12 @@ def score_results(results):
139 146
         duplicated = False
140 147
         for new_res in results:
141 148
             # remove / from the end of the url if required
142
-            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
143
-            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
149
+            p1 = res['parsed_url'].path[:-1]\
150
+                if res['parsed_url'].path.endswith('/')\
151
+                else res['parsed_url'].path
152
+            p2 = new_res['parsed_url'].path[:-1]\
153
+                if new_res['parsed_url'].path.endswith('/')\
154
+                else new_res['parsed_url'].path
144 155
 
145 156
             # check if that result is a duplicate
146 157
             if res['host'] == new_res['host'] and\
@@ -153,7 +164,8 @@ def score_results(results):
153 164
         # merge duplicates together
154 165
         if duplicated:
155 166
             # using content with more text
156
-            if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')):
167
+            if content_result_len(res.get('content', '')) >\
168
+                    content_result_len(duplicated.get('content', '')):
157 169
                 duplicated['content'] = res['content']
158 170
 
159 171
             # increase result-score
@@ -182,17 +194,25 @@ def score_results(results):
182 194
 
183 195
     for i, res in enumerate(results):
184 196
         # FIXME : handle more than one category per engine
185
-        category = engines[res['engine']].categories[0] + ':' + '' if 'template' not in res else res['template']
186
-
187
-        current = None if category not in categoryPositions else categoryPositions[category]
188
-
189
-        # group with previous results using the same category if the group can accept more result and is not too far from the current position
190
-        if current != None and (current['count'] > 0) and (len(gresults) - current['index'] < 20):
191
-            # group with the previous results using the same category with this one
197
+        category = engines[res['engine']].categories[0] + ':' + ''\
198
+            if 'template' not in res\
199
+            else res['template']
200
+
201
+        current = None if category not in categoryPositions\
202
+            else categoryPositions[category]
203
+
204
+        # group with previous results using the same category
205
+        # if the group can accept more result and is not too far
206
+        # from the current position
207
+        if current is not None and (current['count'] > 0)\
208
+                and (len(gresults) - current['index'] < 20):
209
+            # group with the previous results using
210
+            # the same category with this one
192 211
             index = current['index']
193 212
             gresults.insert(index, res)
194 213
 
195
-            # update every index after the current one (including the current one)
214
+            # update every index after the current one
215
+            # (including the current one)
196 216
             for k in categoryPositions:
197 217
                 v = categoryPositions[k]['index']
198 218
                 if v >= index:
@@ -206,7 +226,7 @@ def score_results(results):
206 226
             gresults.append(res)
207 227
 
208 228
             # update categoryIndex
209
-            categoryPositions[category] = { 'index' : len(gresults), 'count' : 8 }
229
+            categoryPositions[category] = {'index': len(gresults), 'count': 8}
210 230
 
211 231
     # return gresults
212 232
     return gresults
@@ -215,21 +235,21 @@ def score_results(results):
215 235
 def merge_two_infoboxes(infobox1, infobox2):
216 236
     if 'urls' in infobox2:
217 237
         urls1 = infobox1.get('urls', None)
218
-        if urls1 == None:
238
+        if urls1 is None:
219 239
             urls1 = []
220 240
             infobox1.set('urls', urls1)
221 241
 
222 242
         urlSet = set()
223 243
         for url in infobox1.get('urls', []):
224 244
             urlSet.add(url.get('url', None))
225
-        
245
+
226 246
         for url in infobox2.get('urls', []):
227 247
             if url.get('url', None) not in urlSet:
228 248
                 urls1.append(url)
229 249
 
230 250
     if 'attributes' in infobox2:
231 251
         attributes1 = infobox1.get('attributes', None)
232
-        if attributes1 == None:
252
+        if attributes1 is None:
233 253
             attributes1 = []
234 254
             infobox1.set('attributes', attributes1)
235 255
 
@@ -237,14 +257,14 @@ def merge_two_infoboxes(infobox1, infobox2):
237 257
         for attribute in infobox1.get('attributes', []):
238 258
             if attribute.get('label', None) not in attributeSet:
239 259
                 attributeSet.add(attribute.get('label', None))
240
-        
260
+
241 261
         for attribute in infobox2.get('attributes', []):
242 262
             attributes1.append(attribute)
243 263
 
244 264
     if 'content' in infobox2:
245 265
         content1 = infobox1.get('content', None)
246 266
         content2 = infobox2.get('content', '')
247
-        if content1 != None:
267
+        if content1 is not None:
248 268
             if content_result_len(content2) > content_result_len(content1):
249 269
                 infobox1['content'] = content2
250 270
         else:
@@ -257,12 +277,12 @@ def merge_infoboxes(infoboxes):
257 277
     for infobox in infoboxes:
258 278
         add_infobox = True
259 279
         infobox_id = infobox.get('id', None)
260
-        if infobox_id != None:
280
+        if infobox_id is not None:
261 281
             existingIndex = infoboxes_id.get(infobox_id, None)
262
-            if existingIndex != None:
282
+            if existingIndex is not None:
263 283
                 merge_two_infoboxes(results[existingIndex], infobox)
264
-                add_infobox=False
265
-            
284
+                add_infobox = False
285
+
266 286
         if add_infobox:
267 287
             results.append(infobox)
268 288
             infoboxes_id[infobox_id] = len(results)-1
@@ -318,7 +338,8 @@ class Search(object):
318 338
 
319 339
         self.pageno = int(pageno_param)
320 340
 
321
-        # parse query, if tags are set, which change the serch engine or search-language
341
+        # parse query, if tags are set, which change
342
+        # the serch engine or search-language
322 343
         query_obj = Query(self.request_data['q'], self.blocked_engines)
323 344
         query_obj.parse_query()
324 345
 
@@ -334,25 +355,29 @@ class Search(object):
334 355
 
335 356
         self.categories = []
336 357
 
337
-        # if engines are calculated from query, set categories by using that informations
358
+        # if engines are calculated from query,
359
+        # set categories by using that informations
338 360
         if self.engines:
339 361
             self.categories = list(set(engine['category']
340 362
                                        for engine in self.engines))
341 363
 
342
-        # otherwise, using defined categories to calculate which engines should be used
364
+        # otherwise, using defined categories to
365
+        # calculate which engines should be used
343 366
         else:
344 367
             # set used categories
345 368
             for pd_name, pd in self.request_data.items():
346 369
                 if pd_name.startswith('category_'):
347 370
                     category = pd_name[9:]
348 371
                     # if category is not found in list, skip
349
-                    if not category in categories:
372
+                    if category not in categories:
350 373
                         continue
351 374
 
352 375
                     # add category to list
353 376
                     self.categories.append(category)
354 377
 
355
-            # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie)
378
+            # if no category is specified for this search,
379
+            # using user-defined default-configuration which
380
+            # (is stored in cookie)
356 381
             if not self.categories:
357 382
                 cookie_categories = request.cookies.get('categories', '')
358 383
                 cookie_categories = cookie_categories.split(',')
@@ -360,16 +385,18 @@ class Search(object):
360 385
                     if ccateg in categories:
361 386
                         self.categories.append(ccateg)
362 387
 
363
-            # if still no category is specified, using general as default-category
388
+            # if still no category is specified, using general
389
+            # as default-category
364 390
             if not self.categories:
365 391
                 self.categories = ['general']
366 392
 
367
-            # using all engines for that search, which are declared under the specific categories
393
+            # using all engines for that search, which are
394
+            # declared under the specific categories
368 395
             for categ in self.categories:
369 396
                 self.engines.extend({'category': categ,
370 397
                                      'name': x.name}
371 398
                                     for x in categories[categ]
372
-                                    if not x.name in self.blocked_engines)
399
+                                    if x.name not in self.blocked_engines)
373 400
 
374 401
     # do search-request
375 402
     def search(self, request):
@@ -386,7 +413,7 @@ class Search(object):
386 413
         number_of_searches += 1
387 414
 
388 415
         # set default useragent
389
-        #user_agent = request.headers.get('User-Agent', '')
416
+        # user_agent = request.headers.get('User-Agent', '')
390 417
         user_agent = gen_useragent()
391 418
 
392 419
         # start search-reqest for all selected engines
@@ -400,7 +427,8 @@ class Search(object):
400 427
             if self.pageno > 1 and not engine.paging:
401 428
                 continue
402 429
 
403
-            # if search-language is set and engine does not provide language-support, skip
430
+            # if search-language is set and engine does not
431
+            # provide language-support, skip
404 432
             if self.lang != 'all' and not engine.language_support:
405 433
                 continue
406 434
 
@@ -412,7 +440,8 @@ class Search(object):
412 440
             request_params['pageno'] = self.pageno
413 441
             request_params['language'] = self.lang
414 442
 
415
-            # update request parameters dependent on search-engine (contained in engines folder)
443
+            # update request parameters dependent on
444
+            # search-engine (contained in engines folder)
416 445
             request_params = engine.request(self.query.encode('utf-8'),
417 446
                                             request_params)
418 447
 
@@ -431,7 +460,8 @@ class Search(object):
431 460
                 request_params
432 461
             )
433 462
 
434
-            # create dictionary which contain all informations about the request
463
+            # create dictionary which contain all
464
+            # informations about the request
435 465
             request_args = dict(
436 466
                 headers=request_params['headers'],
437 467
                 hooks=dict(response=callback),

+ 5
- 4
searx/utils.py Zobrazit soubor

@@ -1,4 +1,4 @@
1
-#import htmlentitydefs
1
+# import htmlentitydefs
2 2
 from codecs import getincrementalencoder
3 3
 from HTMLParser import HTMLParser
4 4
 from random import choice
@@ -22,7 +22,8 @@ def gen_useragent():
22 22
 
23 23
 def searx_useragent():
24 24
     return 'searx'
25
-    
25
+
26
+
26 27
 def highlight_content(content, query):
27 28
 
28 29
     if not content:
@@ -67,8 +68,8 @@ class HTMLTextExtractor(HTMLParser):
67 68
         self.result.append(unichr(codepoint))
68 69
 
69 70
     def handle_entityref(self, name):
70
-        #codepoint = htmlentitydefs.name2codepoint[name]
71
-        #self.result.append(unichr(codepoint))
71
+        # codepoint = htmlentitydefs.name2codepoint[name]
72
+        # self.result.append(unichr(codepoint))
72 73
         self.result.append(name)
73 74
 
74 75
     def get_text(self):

+ 18
- 13
searx/webapp.py Zobrazit soubor

@@ -71,7 +71,7 @@ app.secret_key = settings['server']['secret_key']
71 71
 
72 72
 babel = Babel(app)
73 73
 
74
-#TODO configurable via settings.yml
74
+# TODO configurable via settings.yml
75 75
 favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
76 76
             'twitter', 'stackoverflow', 'github']
77 77
 
@@ -146,14 +146,14 @@ def render(template_name, override_theme=None, **kwargs):
146 146
 
147 147
     nonblocked_categories = set(chain.from_iterable(nonblocked_categories))
148 148
 
149
-    if not 'categories' in kwargs:
149
+    if 'categories' not in kwargs:
150 150
         kwargs['categories'] = ['general']
151 151
         kwargs['categories'].extend(x for x in
152 152
                                     sorted(categories.keys())
153 153
                                     if x != 'general'
154 154
                                     and x in nonblocked_categories)
155 155
 
156
-    if not 'selected_categories' in kwargs:
156
+    if 'selected_categories' not in kwargs:
157 157
         kwargs['selected_categories'] = []
158 158
         for arg in request.args:
159 159
             if arg.startswith('category_'):
@@ -168,7 +168,7 @@ def render(template_name, override_theme=None, **kwargs):
168 168
     if not kwargs['selected_categories']:
169 169
         kwargs['selected_categories'] = ['general']
170 170
 
171
-    if not 'autocomplete' in kwargs:
171
+    if 'autocomplete' not in kwargs:
172 172
         kwargs['autocomplete'] = autocomplete
173 173
 
174 174
     kwargs['method'] = request.cookies.get('method', 'POST')
@@ -202,14 +202,15 @@ def index():
202 202
             'index.html',
203 203
         )
204 204
 
205
-    search.results, search.suggestions, search.answers, search.infoboxes = search.search(request)
205
+    search.results, search.suggestions,\
206
+        search.answers, search.infoboxes = search.search(request)
206 207
 
207 208
     for result in search.results:
208 209
 
209 210
         if not search.paging and engines[result['engine']].paging:
210 211
             search.paging = True
211 212
 
212
-        # check if HTTPS rewrite is required 
213
+        # check if HTTPS rewrite is required
213 214
         if settings['server']['https_rewrite']\
214 215
            and result['parsed_url'].scheme == 'http':
215 216
 
@@ -236,7 +237,7 @@ def index():
236 237
                         try:
237 238
                             # TODO, precompile rule
238 239
                             p = re.compile(rule[0])
239
-                            
240
+
240 241
                             # rewrite url if possible
241 242
                             new_result_url = p.sub(rule[1], result['url'])
242 243
                         except:
@@ -250,17 +251,21 @@ def index():
250 251
                             continue
251 252
 
252 253
                         # get domainname from result
253
-                        # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de
254
+                        # TODO, does only work correct with TLD's like
255
+                        #  asdf.com, not for asdf.com.de
254 256
                         # TODO, using publicsuffix instead of this rewrite rule
255
-                        old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:])
256
-                        new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:])
257
+                        old_result_domainname = '.'.join(
258
+                            result['parsed_url'].hostname.split('.')[-2:])
259
+                        new_result_domainname = '.'.join(
260
+                            new_parsed_url.hostname.split('.')[-2:])
257 261
 
258
-                        # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules
262
+                        # check if rewritten hostname is the same,
263
+                        # to protect against wrong or malicious rewrite rules
259 264
                         if old_result_domainname == new_result_domainname:
260 265
                             # set new url
261 266
                             result['url'] = new_result_url
262 267
 
263
-                    # target has matched, do not search over the other rules 
268
+                    # target has matched, do not search over the other rules
264 269
                     break
265 270
 
266 271
         if search.request_data.get('format', 'html') == 'html':
@@ -429,7 +434,7 @@ def preferences():
429 434
         for pd_name, pd in request.form.items():
430 435
             if pd_name.startswith('category_'):
431 436
                 category = pd_name[9:]
432
-                if not category in categories:
437
+                if category not in categories:
433 438
                     continue
434 439
                 selected_categories.append(category)
435 440
             elif pd_name == 'locale' and pd in settings['locales']: