소스 검색

Merge pull request #30 from matejc/smallissues

fix: robot fw, entry points, some flake8, package searx egg
Adam Tauber 11 년 전
부모
커밋
ffc93ba256
17개의 변경된 파일293개의 추가작업 그리고 130개의 파일을 삭제
  1. 11
    7
      .gitignore
  2. 4
    8
      Makefile
  3. 0
    2
      buildout.cfg
  4. 0
    2
      minimal.cfg
  5. 0
    2
      production.cfg
  6. 2
    4
      searx/__init__.py
  7. 72
    38
      searx/engines/__init__.py
  8. 3
    2
      searx/engines/bing.py
  9. 15
    12
      searx/engines/currency_convert.py
  10. 4
    1
      searx/engines/dailymotion.py
  11. 0
    0
      searx/settings.yml
  12. 0
    16
      searx/settings_robot.py
  13. 107
    0
      searx/settings_robot.yml
  14. 16
    3
      searx/testing.py
  15. 5
    1
      searx/utils.py
  16. 39
    32
      searx/webapp.py
  17. 15
    0
      setup.py

+ 11
- 7
.gitignore 파일 보기

1
-env
2
-engines.cfg
3
-.installed.cfg
4
 .coverage
1
 .coverage
5
-coverage/
2
+.installed.cfg
3
+engines.cfg
4
+env
5
+robot_log.html
6
+robot_output.xml
7
+robot_report.html
6
 setup.cfg
8
 setup.cfg
7
 
9
 
8
 *.pyc
10
 *.pyc
9
 */*.pyc
11
 */*.pyc
10
 
12
 
11
 bin/
13
 bin/
12
-include/
13
-lib/
14
 build/
14
 build/
15
+covearge/
15
 develop-eggs/
16
 develop-eggs/
17
+dist/
16
 eggs/
18
 eggs/
19
+include/
20
+lib/
17
 local/
21
 local/
18
-searx.egg-info/
19
 parts/
22
 parts/
23
+searx.egg-info/
20
 var/
24
 var/

+ 4
- 8
Makefile 파일 보기

21
 tests: .installed.cfg
21
 tests: .installed.cfg
22
 	@bin/test
22
 	@bin/test
23
 
23
 
24
-enginescfg:
25
-	@test -f ./engines.cfg || echo "Copying engines.cfg ..."
26
-	@cp --no-clobber engines.cfg_sample engines.cfg
27
-
28
-robot: .installed.cfg enginescfg
24
+robot: .installed.cfg
29
 	@bin/robot
25
 	@bin/robot
30
 
26
 
31
 flake8: .installed.cfg
27
 flake8: .installed.cfg
37
 	@bin/coverage report --show-missing
33
 	@bin/coverage report --show-missing
38
 	@bin/coverage html --directory ./coverage
34
 	@bin/coverage html --directory ./coverage
39
 
35
 
40
-production: bin/buildout production.cfg setup.py enginescfg
36
+production: bin/buildout production.cfg setup.py
41
 	bin/buildout -c production.cfg $(options)
37
 	bin/buildout -c production.cfg $(options)
42
 	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
38
 	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
43
 	@echo "* Hint 1: on production, disable debug mode and change secret_key"
39
 	@echo "* Hint 1: on production, disable debug mode and change secret_key"
44
 	@echo "* Hint 2: searx will be executed at server startup by crontab"
40
 	@echo "* Hint 2: searx will be executed at server startup by crontab"
45
 	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
41
 	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
46
 
42
 
47
-minimal: bin/buildout minimal.cfg setup.py enginescfg
43
+minimal: bin/buildout minimal.cfg setup.py
48
 	bin/buildout -c minimal.cfg $(options)
44
 	bin/buildout -c minimal.cfg $(options)
49
 
45
 
50
 clean:
46
 clean:
51
 	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
47
 	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
52
 		searx.egg-info lib include .coverage coverage
48
 		searx.egg-info lib include .coverage coverage
53
 
49
 
54
-.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
50
+.PHONY: all tests robot flake8 coverage production minimal clean

+ 0
- 2
buildout.cfg 파일 보기

16
 eggs = ${buildout:eggs}
16
 eggs = ${buildout:eggs}
17
 interpreter = py
17
 interpreter = py
18
 dependent-scripts = true
18
 dependent-scripts = true
19
-entry-points =
20
-    searx-run=searx.webapp:run
21
 
19
 
22
 
20
 
23
 [robot]
21
 [robot]

+ 0
- 2
minimal.cfg 파일 보기

13
 recipe = zc.recipe.egg:script
13
 recipe = zc.recipe.egg:script
14
 eggs = ${buildout:eggs}
14
 eggs = ${buildout:eggs}
15
 interpreter = py
15
 interpreter = py
16
-entry-points =
17
-    searx-run=searx.webapp:run

+ 0
- 2
production.cfg 파일 보기

15
 recipe = zc.recipe.egg:script
15
 recipe = zc.recipe.egg:script
16
 eggs = ${buildout:eggs}
16
 eggs = ${buildout:eggs}
17
 interpreter = py
17
 interpreter = py
18
-entry-points =
19
-    searx-run=searx.webapp:run
20
 
18
 
21
 
19
 
22
 [supervisor]
20
 [supervisor]

+ 2
- 4
searx/__init__.py 파일 보기

1
 from os import environ
1
 from os import environ
2
-from os.path import realpath, dirname, join
2
+from os.path import realpath, dirname, join, abspath
3
 try:
3
 try:
4
     from yaml import load
4
     from yaml import load
5
 except:
5
 except:
7
     stderr.write('[E] install pyyaml\n')
7
     stderr.write('[E] install pyyaml\n')
8
     exit(2)
8
     exit(2)
9
 
9
 
10
-
11
-searx_dir  = realpath(dirname(realpath(__file__))+'/../')
10
+searx_dir = abspath(dirname(__file__))
12
 engine_dir = dirname(realpath(__file__))
11
 engine_dir = dirname(realpath(__file__))
13
 
12
 
14
 if 'SEARX_SETTINGS_PATH' in environ:
13
 if 'SEARX_SETTINGS_PATH' in environ:
19
 
18
 
20
 with open(settings_path) as settings_yaml:
19
 with open(settings_path) as settings_yaml:
21
     settings = load(settings_yaml)
20
     settings = load(settings_yaml)
22
-

+ 72
- 38
searx/engines/__init__.py 파일 보기

35
 
35
 
36
 categories = {'general': []}
36
 categories = {'general': []}
37
 
37
 
38
+
38
 def load_module(filename):
39
 def load_module(filename):
39
     modname = splitext(filename)[0]
40
     modname = splitext(filename)[0]
40
     if modname in sys.modules:
41
     if modname in sys.modules:
50
 
51
 
51
 for engine_data in settings['engines']:
52
 for engine_data in settings['engines']:
52
     engine_name = engine_data['engine']
53
     engine_name = engine_data['engine']
53
-    engine = load_module(engine_name+'.py')
54
+    engine = load_module(engine_name + '.py')
54
     for param_name in engine_data:
55
     for param_name in engine_data:
55
         if param_name == 'engine':
56
         if param_name == 'engine':
56
             continue
57
             continue
58
             if engine_data['categories'] == 'none':
59
             if engine_data['categories'] == 'none':
59
                 engine.categories = []
60
                 engine.categories = []
60
             else:
61
             else:
61
-                engine.categories = map(str.strip, engine_data['categories'].split(','))
62
+                engine.categories = map(
63
+                    str.strip, engine_data['categories'].split(','))
62
             continue
64
             continue
63
         setattr(engine, param_name, engine_data[param_name])
65
         setattr(engine, param_name, engine_data[param_name])
64
     for engine_attr in dir(engine):
66
     for engine_attr in dir(engine):
65
         if engine_attr.startswith('_'):
67
         if engine_attr.startswith('_'):
66
             continue
68
             continue
67
         if getattr(engine, engine_attr) == None:
69
         if getattr(engine, engine_attr) == None:
68
-            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)
70
+            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)  # noqa
69
             sys.exit(1)
71
             sys.exit(1)
70
     engines[engine.name] = engine
72
     engines[engine.name] = engine
71
-    engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0}
73
+    engine.stats = {
74
+        'result_count': 0,
75
+        'search_count': 0,
76
+        'page_load_time': 0,
77
+        'score_count': 0,
78
+        'errors': 0
79
+    }
72
     if hasattr(engine, 'categories'):
80
     if hasattr(engine, 'categories'):
73
         for category_name in engine.categories:
81
         for category_name in engine.categories:
74
             categories.setdefault(category_name, []).append(engine)
82
             categories.setdefault(category_name, []).append(engine)
75
     else:
83
     else:
76
         categories['general'].append(engine)
84
         categories['general'].append(engine)
77
 
85
 
86
+
78
 def default_request_params():
87
 def default_request_params():
79
-    return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
88
+    return {
89
+        'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
90
+
80
 
91
 
81
 def make_callback(engine_name, results, suggestions, callback, params):
92
 def make_callback(engine_name, results, suggestions, callback, params):
82
     # creating a callback wrapper for the search engine results
93
     # creating a callback wrapper for the search engine results
83
     def process_callback(response, **kwargs):
94
     def process_callback(response, **kwargs):
84
         cb_res = []
95
         cb_res = []
85
         response.search_params = params
96
         response.search_params = params
86
-        engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds()
97
+        engines[engine_name].stats['page_load_time'] += \
98
+            (datetime.now() - params['started']).total_seconds()
87
         try:
99
         try:
88
             search_results = callback(response)
100
             search_results = callback(response)
89
         except Exception, e:
101
         except Exception, e:
90
             engines[engine_name].stats['errors'] += 1
102
             engines[engine_name].stats['errors'] += 1
91
             results[engine_name] = cb_res
103
             results[engine_name] = cb_res
92
-            print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e))
104
+            print '[E] Error with engine "{0}":\n\t{1}'.format(
105
+                engine_name, str(e))
93
             return
106
             return
94
         for result in search_results:
107
         for result in search_results:
95
             result['engine'] = engine_name
108
             result['engine'] = engine_name
101
         results[engine_name] = cb_res
114
         results[engine_name] = cb_res
102
     return process_callback
115
     return process_callback
103
 
116
 
117
+
104
 def score_results(results):
118
 def score_results(results):
105
-    flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
119
+    flat_res = filter(
120
+        None, chain.from_iterable(izip_longest(*results.values())))
106
     flat_len = len(flat_res)
121
     flat_len = len(flat_res)
107
     engines_len = len(results)
122
     engines_len = len(results)
108
     results = []
123
     results = []
109
     # deduplication + scoring
124
     # deduplication + scoring
110
-    for i,res in enumerate(flat_res):
125
+    for i, res in enumerate(flat_res):
111
         res['parsed_url'] = urlparse(res['url'])
126
         res['parsed_url'] = urlparse(res['url'])
112
         res['engines'] = [res['engine']]
127
         res['engines'] = [res['engine']]
113
         weight = 1.0
128
         weight = 1.0
114
         if hasattr(engines[res['engine']], 'weight'):
129
         if hasattr(engines[res['engine']], 'weight'):
115
             weight = float(engines[res['engine']].weight)
130
             weight = float(engines[res['engine']].weight)
116
-        score = int((flat_len - i)/engines_len)*weight+1
131
+        score = int((flat_len - i) / engines_len) * weight + 1
117
         duplicated = False
132
         duplicated = False
118
         for new_res in results:
133
         for new_res in results:
119
-            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path
120
-            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path
134
+            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
135
+            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
121
             if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
136
             if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
122
                p1 == p2 and\
137
                p1 == p2 and\
123
                res['parsed_url'].query == new_res['parsed_url'].query and\
138
                res['parsed_url'].query == new_res['parsed_url'].query and\
125
                 duplicated = new_res
140
                 duplicated = new_res
126
                 break
141
                 break
127
         if duplicated:
142
         if duplicated:
128
-            if len(res.get('content', '')) > len(duplicated.get('content', '')):
143
+            if len(res.get('content', '')) > len(duplicated.get('content', '')):  # noqa
129
                 duplicated['content'] = res['content']
144
                 duplicated['content'] = res['content']
130
             duplicated['score'] += score
145
             duplicated['score'] += score
131
             duplicated['engines'].append(res['engine'])
146
             duplicated['engines'].append(res['engine'])
139
             results.append(res)
154
             results.append(res)
140
     return sorted(results, key=itemgetter('score'), reverse=True)
155
     return sorted(results, key=itemgetter('score'), reverse=True)
141
 
156
 
157
+
142
 def search(query, request, selected_engines):
158
 def search(query, request, selected_engines):
143
     global engines, categories, number_of_searches
159
     global engines, categories, number_of_searches
144
     requests = []
160
     requests = []
160
         request_params['started'] = datetime.now()
176
         request_params['started'] = datetime.now()
161
         request_params = engine.request(query, request_params)
177
         request_params = engine.request(query, request_params)
162
 
178
 
163
-        callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params)
164
-
165
-        request_args = dict(headers = request_params['headers']
166
-                           ,hooks   = dict(response=callback)
167
-                           ,cookies = request_params['cookies']
168
-                           ,timeout = settings['server']['request_timeout']
169
-                           )
179
+        callback = make_callback(
180
+            selected_engine['name'],
181
+            results,
182
+            suggestions,
183
+            engine.response,
184
+            request_params
185
+        )
186
+
187
+        request_args = dict(
188
+            headers=request_params['headers'],
189
+            hooks=dict(response=callback),
190
+            cookies=request_params['cookies'],
191
+            timeout=settings['server']['request_timeout']
192
+        )
170
 
193
 
171
         if request_params['method'] == 'GET':
194
         if request_params['method'] == 'GET':
172
             req = grequests.get
195
             req = grequests.get
180
 
203
 
181
         requests.append(req(request_params['url'], **request_args))
204
         requests.append(req(request_params['url'], **request_args))
182
     grequests.map(requests)
205
     grequests.map(requests)
183
-    for engine_name,engine_results in results.items():
206
+    for engine_name, engine_results in results.items():
184
         engines[engine_name].stats['search_count'] += 1
207
         engines[engine_name].stats['search_count'] += 1
185
         engines[engine_name].stats['result_count'] += len(engine_results)
208
         engines[engine_name].stats['result_count'] += len(engine_results)
186
 
209
 
192
 
215
 
193
     return results, suggestions
216
     return results, suggestions
194
 
217
 
218
+
195
 def get_engines_stats():
219
 def get_engines_stats():
196
     # TODO refactor
220
     # TODO refactor
197
     pageloads = []
221
     pageloads = []
200
     errors = []
224
     errors = []
201
     scores_per_result = []
225
     scores_per_result = []
202
 
226
 
203
-    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0
227
+    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
204
     for engine in engines.values():
228
     for engine in engines.values():
205
         if engine.stats['search_count'] == 0:
229
         if engine.stats['search_count'] == 0:
206
             continue
230
             continue
207
-        results_num = engine.stats['result_count']/float(engine.stats['search_count'])
208
-        load_times  = engine.stats['page_load_time']/float(engine.stats['search_count'])
231
+        results_num = \
232
+            engine.stats['result_count'] / float(engine.stats['search_count'])
233
+        load_times = engine.stats['page_load_time'] / float(engine.stats['search_count'])  # noqa
209
         if results_num:
234
         if results_num:
210
-            score = engine.stats['score_count'] / float(engine.stats['search_count'])
235
+            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
211
             score_per_result = score / results_num
236
             score_per_result = score / results_num
212
         else:
237
         else:
213
             score = score_per_result = 0.0
238
             score = score_per_result = 0.0
220
         results.append({'avg': results_num, 'name': engine.name})
245
         results.append({'avg': results_num, 'name': engine.name})
221
         scores.append({'avg': score, 'name': engine.name})
246
         scores.append({'avg': score, 'name': engine.name})
222
         errors.append({'avg': engine.stats['errors'], 'name': engine.name})
247
         errors.append({'avg': engine.stats['errors'], 'name': engine.name})
223
-        scores_per_result.append({'avg': score_per_result, 'name': engine.name})
248
+        scores_per_result.append({
249
+            'avg': score_per_result,
250
+            'name': engine.name
251
+        })
224
 
252
 
225
     for engine in pageloads:
253
     for engine in pageloads:
226
-        engine['percentage'] = int(engine['avg']/max_pageload*100)
254
+        engine['percentage'] = int(engine['avg'] / max_pageload * 100)
227
 
255
 
228
     for engine in results:
256
     for engine in results:
229
-        engine['percentage'] = int(engine['avg']/max_results*100)
257
+        engine['percentage'] = int(engine['avg'] / max_results * 100)
230
 
258
 
231
     for engine in scores:
259
     for engine in scores:
232
-        engine['percentage'] = int(engine['avg']/max_score*100)
260
+        engine['percentage'] = int(engine['avg'] / max_score * 100)
233
 
261
 
234
     for engine in scores_per_result:
262
     for engine in scores_per_result:
235
-        engine['percentage'] = int(engine['avg']/max_score_per_result*100)
263
+        engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
236
 
264
 
237
     for engine in errors:
265
     for engine in errors:
238
         if max_errors:
266
         if max_errors:
239
-            engine['percentage'] = int(float(engine['avg'])/max_errors*100)
267
+            engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
240
         else:
268
         else:
241
             engine['percentage'] = 0
269
             engine['percentage'] = 0
242
 
270
 
243
-
244
-    return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg')))
245
-           ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True))
246
-           ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True))
247
-           ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True))
248
-           ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True))
249
-           ]
271
+    return [
272
+        ('Page loads (sec)', sorted(pageloads, key=itemgetter('avg'))),
273
+        (
274
+            'Number of results',
275
+            sorted(results, key=itemgetter('avg'), reverse=True)
276
+        ),
277
+        ('Scores', sorted(scores, key=itemgetter('avg'), reverse=True)),
278
+        (
279
+            'Scores per result',
280
+            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
281
+        ),
282
+        ('Errors', sorted(errors, key=itemgetter('avg'), reverse=True)),
283
+    ]

+ 3
- 2
searx/engines/bing.py 파일 보기

4
 
4
 
5
 base_url = 'http://www.bing.com/'
5
 base_url = 'http://www.bing.com/'
6
 search_string = 'search?{query}'
6
 search_string = 'search?{query}'
7
-locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
7
+locale = 'en-US'  # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
8
 
8
 
9
 
9
 
10
 def request(query, params):
10
 def request(query, params):
11
-    search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale}))
11
+    search_path = search_string.format(
12
+        query=urlencode({'q': query, 'setmkt': locale}))
12
     #if params['category'] == 'images':
13
     #if params['category'] == 'images':
13
     #    params['url'] = base_url + 'images/' + search_path
14
     #    params['url'] = base_url + 'images/' + search_path
14
     params['url'] = base_url + search_path
15
     params['url'] = base_url + search_path

+ 15
- 12
searx/engines/currency_convert.py 파일 보기

7
 
7
 
8
 parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
8
 parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
9
 
9
 
10
+
10
 def request(query, params):
11
 def request(query, params):
11
     m = parser_re.match(query)
12
     m = parser_re.match(query)
12
     if not m:
13
     if not m:
19
         # wrong params
20
         # wrong params
20
         return params
21
         return params
21
 
22
 
22
-    q = (from_currency+to_currency).upper()
23
+    q = (from_currency + to_currency).upper()
23
 
24
 
24
     params['url'] = url.format(query=q)
25
     params['url'] = url.format(query=q)
25
     params['ammount'] = ammount
26
     params['ammount'] = ammount
33
     global base_url
34
     global base_url
34
     results = []
35
     results = []
35
     try:
36
     try:
36
-        _,conversion_rate,_ = resp.text.split(',', 2)
37
+        _, conversion_rate, _ = resp.text.split(',', 2)
37
         conversion_rate = float(conversion_rate)
38
         conversion_rate = float(conversion_rate)
38
     except:
39
     except:
39
         return results
40
         return results
40
 
41
 
41
-    title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount']
42
-                                          ,resp.search_params['from']
43
-                                          ,resp.search_params['to']
44
-                                          ,resp.search_params['ammount']*conversion_rate
45
-                                          )
42
+    title = '{0} {1} in {2} is {3}'.format(
43
+        resp.search_params['ammount'],
44
+        resp.search_params['from'],
45
+        resp.search_params['to'],
46
+        resp.search_params['ammount'] * conversion_rate
47
+    )
46
 
48
 
47
     content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
49
     content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
48
     now_date = datetime.now().strftime('%Y%m%d')
50
     now_date = datetime.now().strftime('%Y%m%d')
49
     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
51
     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
50
-    url = url.format(now_date
51
-                    ,resp.search_params['ammount']
52
-                    ,resp.search_params['from'].lower()
53
-                    ,resp.search_params['to'].lower()
54
-                    )
52
+    url = url.format(
53
+        now_date,
54
+        resp.search_params['ammount'],
55
+        resp.search_params['from'].lower(),
56
+        resp.search_params['to'].lower()
57
+    )
55
     results.append({'title': title, 'content': content, 'url': url})
58
     results.append({'title': title, 'content': content, 'url': url})
56
 
59
 
57
     return results
60
     return results

+ 4
- 1
searx/engines/dailymotion.py 파일 보기

8
 # see http://www.dailymotion.com/doc/api/obj-video.html
8
 # see http://www.dailymotion.com/doc/api/obj-video.html
9
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
9
 search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
10
 
10
 
11
+
11
 def request(query, params):
12
 def request(query, params):
12
     global search_url
13
     global search_url
13
-    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
14
+    params['url'] = search_url.format(
15
+        query=urlencode({'search': query, 'localization': locale}))
14
     return params
16
     return params
15
 
17
 
16
 
18
 
32
         results.append({'url': url, 'title': title, 'content': content})
34
         results.append({'url': url, 'title': title, 'content': content})
33
     return results
35
     return results
34
 
36
 
37
+
35
 def text_content_from_html(html_string):
38
 def text_content_from_html(html_string):
36
     desc_html = html.fragment_fromstring(html_string, create_parent=True)
39
     desc_html = html.fragment_fromstring(html_string, create_parent=True)
37
     return desc_html.text_content()
40
     return desc_html.text_content()

settings.yml → searx/settings.yml 파일 보기


+ 0
- 16
searx/settings_robot.py 파일 보기

1
-
2
-port = 11111
3
-
4
-secret_key = "ultrasecretkey" # change this!
5
-
6
-debug = False
7
-
8
-request_timeout = 5.0 # seconds
9
-
10
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
11
-
12
-blacklist = [] # search engine blacklist
13
-
14
-categories = {} # custom search engine categories
15
-
16
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)

+ 107
- 0
searx/settings_robot.yml 파일 보기

1
+server:
2
+    port : 11111
3
+    secret_key : "ultrasecretkey" # change this!
4
+    debug : False
5
+    request_timeout : 3.0 # seconds
6
+    base_url: False
7
+
8
+engines:
9
+  - name : wikipedia
10
+    engine : mediawiki
11
+    url    : https://en.wikipedia.org/
12
+    number_of_results : 1
13
+
14
+  - name : bing
15
+    engine : bing
16
+    locale : en-US
17
+
18
+  - name : currency
19
+    engine : currency_convert
20
+    categories : general
21
+
22
+  - name : deviantart
23
+    engine : deviantart
24
+    categories : images
25
+
26
+  - name : ddg definitions
27
+    engine : duckduckgo_definitions
28
+
29
+  - name : duckduckgo
30
+    engine : duckduckgo
31
+    locale : en-us
32
+
33
+  - name : filecrop
34
+    engine : filecrop
35
+    categories : files
36
+
37
+  - name : flickr
38
+    engine : flickr
39
+    categories : images
40
+
41
+  - name : github
42
+    engine : github
43
+    categories : it
44
+
45
+  - name : google
46
+    engine        : json_engine
47
+    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
48
+    categories    : general
49
+    url_query     : /responseData/results/unescapedUrl
50
+    content_query : /responseData/results/content
51
+    title_query   : /responseData/results/titleNoFormatting
52
+
53
+  - name : google images
54
+    engine : google_images
55
+    categories : images
56
+
57
+  - name : piratebay
58
+    engine : piratebay
59
+    categories : videos, music, files
60
+
61
+  - name : soundcloud
62
+    engine : soundcloud
63
+    categories : music
64
+
65
+  - name : stackoverflow
66
+    engine : stackoverflow
67
+    categories : it
68
+
69
+  - name : startpage
70
+    engine : startpage
71
+
72
+  - name : twitter
73
+    engine : twitter
74
+    categories : social media
75
+
76
+  - name : urbandictionary
77
+    engine        : xpath
78
+    search_url    : http://www.urbandictionary.com/define.php?term={query}
79
+    url_xpath     : //div[@class="word"]//a/@href
80
+    title_xpath   : //div[@class="word"]//a
81
+    content_xpath : //div[@class="definition"]
82
+
83
+  - name : yahoo
84
+    engine           : xpath
85
+    search_url       : http://search.yahoo.com/search?p={query}
86
+    results_xpath    : //div[@class="res"]
87
+    url_xpath        : .//h3/a/@href
88
+    title_xpath      : .//h3/a
89
+    content_xpath    : .//div[@class="abstr"]
90
+    suggestion_xpath : //div[@id="satat"]//a
91
+
92
+  - name : youtube
93
+    engine : youtube
94
+    categories : videos
95
+
96
+  - name : dailymotion
97
+    engine : dailymotion
98
+    locale : en_US
99
+    categories : videos
100
+
101
+  - name : vimeo
102
+    engine : vimeo
103
+    categories : videos
104
+    results_xpath : //div[@id="browse_content"]/ol/li
105
+    url_xpath : ./a/@href
106
+    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
107
+    content_xpath : ./a/img/@src

+ 16
- 3
searx/testing.py 파일 보기

7
 
7
 
8
 import os
8
 import os
9
 import subprocess
9
 import subprocess
10
-import sys
11
 
10
 
12
 
11
 
13
 class SearxTestLayer:
12
 class SearxTestLayer:
13
+    """Base layer for non-robot tests."""
14
 
14
 
15
     __name__ = u'SearxTestLayer'
15
     __name__ = u'SearxTestLayer'
16
 
16
 
36
 
36
 
37
     def setUp(self):
37
     def setUp(self):
38
         os.setpgrp()  # create new process group, become its leader
38
         os.setpgrp()  # create new process group, become its leader
39
+
40
+        # get program paths
39
         webapp = os.path.join(
41
         webapp = os.path.join(
40
             os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
42
             os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
41
             'webapp.py'
43
             'webapp.py'
42
         )
44
         )
43
         exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
45
         exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
46
+
47
+        # set robot settings path
48
+        os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath(
49
+            os.path.dirname(__file__) + '/settings_robot.yml')
50
+
51
+        # run the server
44
         self.server = subprocess.Popen(
52
         self.server = subprocess.Popen(
45
-            [exe, webapp, 'settings_robot'],
53
+            [exe, webapp],
46
             stdout=subprocess.PIPE,
54
             stdout=subprocess.PIPE,
47
             stderr=subprocess.STDOUT
55
             stderr=subprocess.STDOUT
48
         )
56
         )
49
 
57
 
50
     def tearDown(self):
58
     def tearDown(self):
51
-        # TERM all processes in my group
59
+        # send TERM signal to all processes in my group, to stop subprocesses
52
         os.killpg(os.getpgid(self.server.pid), 15)
60
         os.killpg(os.getpgid(self.server.pid), 15)
53
 
61
 
62
+        # remove previously set environment variable
63
+        del os.environ['SEARX_SETTINGS_PATH']
64
+
54
 
65
 
55
 SEARXROBOTLAYER = SearxRobotLayer()
66
 SEARXROBOTLAYER = SearxRobotLayer()
56
 
67
 
57
 
68
 
58
 class SearxTestCase(TestCase):
69
 class SearxTestCase(TestCase):
70
+    """Base test case for non-robot tests."""
71
+
59
     layer = SearxTestLayer
72
     layer = SearxTestLayer

+ 5
- 1
searx/utils.py 파일 보기

5
 import cStringIO
5
 import cStringIO
6
 import re
6
 import re
7
 
7
 
8
+
8
 def gen_useragent():
9
 def gen_useragent():
9
     # TODO
10
     # TODO
10
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
11
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
11
 
12
 
13
+
12
 def highlight_content(content, query):
14
 def highlight_content(content, query):
13
 
15
 
14
     if not content:
16
     if not content:
34
 
36
 
35
     return content
37
     return content
36
 
38
 
39
+
37
 class HTMLTextExtractor(HTMLParser):
40
 class HTMLTextExtractor(HTMLParser):
38
     def __init__(self):
41
     def __init__(self):
39
         HTMLParser.__init__(self)
42
         HTMLParser.__init__(self)
40
-        self.result = [ ]
43
+        self.result = []
41
 
44
 
42
     def handle_data(self, d):
45
     def handle_data(self, d):
43
         self.result.append(d)
46
         self.result.append(d)
54
     def get_text(self):
57
     def get_text(self):
55
         return u''.join(self.result)
58
         return u''.join(self.result)
56
 
59
 
60
+
57
 def html_to_text(html):
61
 def html_to_text(html):
58
     s = HTMLTextExtractor()
62
     s = HTMLTextExtractor()
59
     s.feed(html)
63
     s.feed(html)

+ 39
- 32
searx/webapp.py 파일 보기

17
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
17
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
18
 '''
18
 '''
19
 
19
 
20
-import os
21
-import sys
22
-if __name__ == "__main__":
23
-    sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
24
-
25
 from searx import settings
20
 from searx import settings
26
-
27
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
21
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
28
 from searx.engines import search, categories, engines, get_engines_stats
22
 from searx.engines import search, categories, engines, get_engines_stats
29
 import json
23
 import json
33
 from searx.utils import highlight_content, html_to_text
27
 from searx.utils import highlight_content, html_to_text
34
 
28
 
35
 
29
 
30
+import os
36
 
31
 
37
-app = Flask(__name__)
38
-app.secret_key = settings['server']['secret_key']
39
 
32
 
33
+app = Flask(
34
+    __name__,
35
+    static_folder=os.path.join(os.path.dirname(__file__), 'static'),
36
+    template_folder=os.path.join(os.path.dirname(__file__), 'templates')
37
+)
38
+
39
+app.secret_key = settings['server']['secret_key']
40
 
40
 
41
 #TODO configurable via settings.yml
41
 #TODO configurable via settings.yml
42
 favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
42
 favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
81
             kwargs['selected_categories'] = ['general']
81
             kwargs['selected_categories'] = ['general']
82
     return render_template(template_name, **kwargs)
82
     return render_template(template_name, **kwargs)
83
 
83
 
84
+
84
 def parse_query(query):
85
 def parse_query(query):
85
     query_engines = []
86
     query_engines = []
86
     query_parts = query.split()
87
     query_parts = query.split()
94
 def index():
95
 def index():
95
     global categories
96
     global categories
96
 
97
 
97
-    if request.method=='POST':
98
+    if request.method == 'POST':
98
         request_data = request.form
99
         request_data = request.form
99
     else:
100
     else:
100
         request_data = request.args
101
         request_data = request.args
106
     query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
107
     query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
107
 
108
 
108
     if not len(selected_engines):
109
     if not len(selected_engines):
109
-        for pd_name,pd in request_data.items():
110
+        for pd_name, pd in request_data.items():
110
             if pd_name.startswith('category_'):
111
             if pd_name.startswith('category_'):
111
                 category = pd_name[9:]
112
                 category = pd_name[9:]
112
                 if not category in categories:
113
                 if not category in categories:
159
         response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
160
         response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
160
         return response
161
         return response
161
     elif request_data.get('format') == 'rss':
162
     elif request_data.get('format') == 'rss':
162
-        response_rss = render('opensearch_response_rss.xml'
163
-                              ,results=results
164
-                              ,q=request_data['q']
165
-                              ,number_of_results=len(results)
166
-                              ,base_url=get_base_url()
167
-                              )
163
+        response_rss = render(
164
+            'opensearch_response_rss.xml',
165
+            results=results,
166
+            q=request_data['q'],
167
+            number_of_results=len(results),
168
+            base_url=get_base_url()
169
+        )
168
         return Response(response_rss, mimetype='text/xml')
170
         return Response(response_rss, mimetype='text/xml')
169
 
171
 
170
-
171
-    return render('results.html'
172
-                 ,results=results
173
-                 ,q=request_data['q']
174
-                 ,selected_categories=selected_categories
175
-                 ,number_of_results=len(results)+len(featured_results)
176
-                 ,featured_results=featured_results
177
-                 ,suggestions=suggestions
178
-                 )
172
+    return render(
173
+        'results.html',
174
+        results=results,
175
+        q=request_data['q'],
176
+        selected_categories=selected_categories,
177
+        number_of_results=len(results) + len(featured_results),
178
+        featured_results=featured_results,
179
+        suggestions=suggestions
180
+    )
179
 
181
 
180
 
182
 
181
 @app.route('/about', methods=['GET'])
183
 @app.route('/about', methods=['GET'])
192
 @app.route('/preferences', methods=['GET', 'POST'])
194
 @app.route('/preferences', methods=['GET', 'POST'])
193
 def preferences():
195
 def preferences():
194
 
196
 
195
-    if request.method=='POST':
197
+    if request.method == 'POST':
196
         selected_categories = []
198
         selected_categories = []
197
-        for pd_name,pd in request.form.items():
199
+        for pd_name, pd in request.form.items():
198
             if pd_name.startswith('category_'):
200
             if pd_name.startswith('category_'):
199
                 category = pd_name[9:]
201
                 category = pd_name[9:]
200
                 if not category in categories:
202
                 if not category in categories:
203
         if selected_categories:
205
         if selected_categories:
204
             resp = make_response(redirect('/'))
206
             resp = make_response(redirect('/'))
205
             # cookie max age: 4 weeks
207
             # cookie max age: 4 weeks
206
-            resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
208
+            resp.set_cookie(
209
+                'categories', ','.join(selected_categories),
210
+                max_age=60 * 60 * 24 * 7 * 4
211
+            )
207
             return resp
212
             return resp
208
     return render('preferences.html')
213
     return render('preferences.html')
209
 
214
 
238
                 mimetype="application/xml")
243
                 mimetype="application/xml")
239
     return resp
244
     return resp
240
 
245
 
246
+
241
 @app.route('/favicon.ico')
247
 @app.route('/favicon.ico')
242
 def favicon():
248
 def favicon():
243
     return send_from_directory(os.path.join(app.root_path, 'static/img'),
249
     return send_from_directory(os.path.join(app.root_path, 'static/img'),
248
     from gevent import monkey
254
     from gevent import monkey
249
     monkey.patch_all()
255
     monkey.patch_all()
250
 
256
 
251
-    app.run(debug        = settings['server']['debug']
252
-           ,use_debugger = settings['server']['debug']
253
-           ,port         = settings['server']['port']
254
-           )
257
+    app.run(
258
+        debug=settings['server']['debug'],
259
+        use_debugger=settings['server']['debug'],
260
+        port=settings['server']['port']
261
+    )
255
 
262
 
256
 
263
 
257
 if __name__ == "__main__":
264
 if __name__ == "__main__":

+ 15
- 0
setup.py 파일 보기

49
             'zope.testrunner',
49
             'zope.testrunner',
50
         ]
50
         ]
51
     },
51
     },
52
+    entry_points={
53
+        'console_scripts': [
54
+            'searx-run = searx.webapp:run'
55
+        ]
56
+    },
57
+    package_data={
58
+        'searx': [
59
+            'settings.yml',
60
+            '../README.md',
61
+            'static/*/*',
62
+            'templates/*.html',
63
+            'templates/result_templates/*.html',
64
+        ],
65
+    },
66
+
52
 )
67
 )