浏览代码

Merge branch 'master' of https://github.com/asciimoo/searx

pw3t 11 年前
父节点
当前提交
9e72ebe064
共有 13 个文件被更改,包括 154 次插入150 次删除
  1. 2
    0
      .gitignore
  2. 1
    2
      README.md
  3. 0
    99
      engines.cfg_sample
  4. 1
    0
      requirements.txt
  5. 22
    0
      searx/__init__.py
  6. 11
    18
      searx/engines/__init__.py
  7. 0
    16
      searx/settings.py
  8. 0
    1
      searx/templates/about.html
  9. 1
    1
      searx/utils.py
  10. 7
    13
      searx/webapp.py
  11. 107
    0
      settings.yml
  12. 1
    0
      setup.py
  13. 1
    0
      versions.cfg

+ 2
- 0
.gitignore 查看文件

@@ -1,6 +1,8 @@
1 1
 env
2 2
 engines.cfg
3 3
 .installed.cfg
4
+.coverage
5
+covearge/
4 6
 setup.cfg
5 7
 
6 8
 *.pyc

+ 1
- 2
README.md 查看文件

@@ -25,8 +25,7 @@ List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instanc
25 25
 
26 26
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
27 27
 * install dependencies: `pip install -r requirements.txt`
28
-* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!)
29
-* rename `engines.cfg_sample` to `engines.cfg`
28
+* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
30 29
 * run `python searx/webapp.py` to start the application
31 30
 
32 31
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)

+ 0
- 99
engines.cfg_sample 查看文件

@@ -1,99 +0,0 @@
1
-[wikipedia]
2
-engine = mediawiki
3
-url    = https://en.wikipedia.org/
4
-number_of_results = 1
5
-
6
-[bing]
7
-engine = bing
8
-locale = en-US
9
-
10
-[currency]
11
-engine=currency_convert
12
-categories = general
13
-
14
-[deviantart]
15
-engine = deviantart
16
-categories = images
17
-
18
-[ddg definitions]
19
-engine = duckduckgo_definitions
20
-
21
-[duckduckgo]
22
-engine = duckduckgo
23
-locale = en-us
24
-
25
-[filecrop]
26
-engine = filecrop
27
-categories = files
28
-
29
-[flickr]
30
-engine = flickr
31
-categories = images
32
-
33
-[github]
34
-engine = github
35
-categories = it
36
-
37
-[google]
38
-engine        = json_engine
39
-search_url    = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
40
-categories    = general
41
-url_query     = /responseData/results/unescapedUrl
42
-content_query = /responseData/results/content
43
-title_query   = /responseData/results/titleNoFormatting
44
-
45
-[google images]
46
-engine = google_images
47
-categories = images
48
-
49
-[piratebay]
50
-engine = piratebay
51
-categories = videos, music, files
52
-
53
-[soundcloud]
54
-engine = soundcloud
55
-categories = music
56
-
57
-[stackoverflow]
58
-engine = stackoverflow
59
-categories = it
60
-
61
-[startpage]
62
-engine = startpage
63
-
64
-[twitter]
65
-engine = twitter
66
-categories = social media
67
-
68
-[urbandictionary]
69
-engine        = xpath
70
-search_url    = http://www.urbandictionary.com/define.php?term={query}
71
-url_xpath     = //div[@class="word"]//a/@href
72
-title_xpath   = //div[@class="word"]//a
73
-content_xpath = //div[@class="definition"]
74
-
75
-[yahoo]
76
-engine           = xpath
77
-search_url       = http://search.yahoo.com/search?p={query}
78
-results_xpath    = //div[@class="res"]
79
-url_xpath        = .//h3/a/@href
80
-title_xpath      = .//h3/a
81
-content_xpath    = .//div[@class="abstr"]
82
-suggestion_xpath = //div[@id="satat"]//a
83
-
84
-[youtube]
85
-engine = youtube
86
-categories = videos
87
-
88
-[dailymotion]
89
-engine = dailymotion
90
-locale = en_US
91
-categories = videos
92
-
93
-[vimeo]
94
-engine = vimeo
95
-categories = videos
96
-results_xpath = //div[@id="browse_content"]/ol/li
97
-url_xpath=./a/@href
98
-title_xpath=./a/div[@class="data"]/p[@class="title"]/text()
99
-content_xpath=./a/img/@src

+ 1
- 0
requirements.txt 查看文件

@@ -1,3 +1,4 @@
1 1
 flask
2 2
 grequests
3 3
 lxml
4
+pyyaml

+ 22
- 0
searx/__init__.py 查看文件

@@ -0,0 +1,22 @@
1
+from os import environ
2
+from os.path import realpath, dirname, join
3
+try:
4
+    from yaml import load
5
+except:
6
+    from sys import exit, stderr
7
+    stderr.write('[E] install pyyaml\n')
8
+    exit(2)
9
+
10
+
11
+searx_dir  = realpath(dirname(realpath(__file__))+'/../')
12
+engine_dir = dirname(realpath(__file__))
13
+
14
+if 'SEARX_SETTINGS_PATH' in environ:
15
+    settings_path = environ['SEARX_SETTINGS_PATH']
16
+else:
17
+    settings_path = join(searx_dir, 'settings.yml')
18
+
19
+
20
+with open(settings_path) as settings_yaml:
21
+    settings = load(settings_yaml)
22
+

+ 11
- 18
searx/engines/__init__.py 查看文件

@@ -23,16 +23,12 @@ from itertools import izip_longest, chain
23 23
 from operator import itemgetter
24 24
 from urlparse import urlparse
25 25
 from searx import settings
26
-from searx.utils import get_useragent
27
-import ConfigParser
26
+from searx.utils import gen_useragent
28 27
 import sys
29 28
 from datetime import datetime
30 29
 
31 30
 engine_dir = dirname(realpath(__file__))
32
-searx_dir  = join(engine_dir, '../../')
33 31
 
34
-engines_config = ConfigParser.SafeConfigParser()
35
-engines_config.read(join(searx_dir, 'engines.cfg'))
36 32
 number_of_searches = 0
37 33
 
38 34
 engines = {}
@@ -48,24 +44,23 @@ def load_module(filename):
48 44
     module.name = modname
49 45
     return module
50 46
 
51
-if not engines_config.sections():
52
-    print '[E] Error no engines found. Edit your engines.cfg'
47
+if not 'engines' in settings or not settings['engines']:
48
+    print '[E] Error no engines found. Edit your settings.yml'
53 49
     exit(2)
54 50
 
55
-for engine_config_name in engines_config.sections():
56
-    engine_data = engines_config.options(engine_config_name)
57
-    engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py')
58
-    engine.name = engine_config_name
51
+for engine_data in settings['engines']:
52
+    engine_name = engine_data['engine']
53
+    engine = load_module(engine_name+'.py')
59 54
     for param_name in engine_data:
60 55
         if param_name == 'engine':
61 56
             continue
62 57
         if param_name == 'categories':
63
-            if engines_config.get(engine_config_name, param_name) == 'none':
58
+            if engine_data['categories'] == 'none':
64 59
                 engine.categories = []
65 60
             else:
66
-                engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(','))
61
+                engine.categories = map(str.strip, engine_data['categories'].split(','))
67 62
             continue
68
-        setattr(engine, param_name, engines_config.get(engine_config_name, param_name))
63
+        setattr(engine, param_name, engine_data[param_name])
69 64
     for engine_attr in dir(engine):
70 65
         if engine_attr.startswith('_'):
71 66
             continue
@@ -118,8 +113,6 @@ def score_results(results):
118 113
         weight = 1.0
119 114
         if hasattr(engines[res['engine']], 'weight'):
120 115
             weight = float(engines[res['engine']].weight)
121
-        elif res['engine'] in settings.weights:
122
-            weight = float(settings.weights[res['engine']])
123 116
         score = int((flat_len - i)/engines_len)*weight+1
124 117
         duplicated = False
125 118
         for new_res in results:
@@ -153,7 +146,7 @@ def search(query, request, selected_engines):
153 146
     suggestions = set()
154 147
     number_of_searches += 1
155 148
     #user_agent = request.headers.get('User-Agent', '')
156
-    user_agent = get_useragent()
149
+    user_agent = gen_useragent()
157 150
 
158 151
     for selected_engine in selected_engines:
159 152
         if selected_engine['name'] not in engines:
@@ -172,7 +165,7 @@ def search(query, request, selected_engines):
172 165
         request_args = dict(headers = request_params['headers']
173 166
                            ,hooks   = dict(response=callback)
174 167
                            ,cookies = request_params['cookies']
175
-                           ,timeout = settings.request_timeout
168
+                           ,timeout = settings['server']['request_timeout']
176 169
                            )
177 170
 
178 171
         if request_params['method'] == 'GET':

+ 0
- 16
searx/settings.py 查看文件

@@ -1,16 +0,0 @@
1
-
2
-port = 8888
3
-
4
-secret_key = "ultrasecretkey" # change this!
5
-
6
-debug = True
7
-
8
-request_timeout = 5.0 # seconds
9
-
10
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
11
-
12
-blacklist = [] # search engine blacklist
13
-
14
-categories = {} # custom search engine categories
15
-
16
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)

+ 0
- 1
searx/templates/about.html 查看文件

@@ -10,7 +10,6 @@
10 10
     <ul>
11 11
         <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
12 12
         <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
13
-        <li>Searx doesn't make money on ads and it isn't customised based on your interests. You get the pure search results</li>
14 13
         <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
15 14
     </ul>
16 15
     <p>If you do care about privacy, want to be a conscious user, moreover believe

+ 1
- 1
searx/utils.py 查看文件

@@ -5,7 +5,7 @@ import codecs
5 5
 import cStringIO
6 6
 import re
7 7
 
8
-def get_useragent():
8
+def gen_useragent():
9 9
     # TODO
10 10
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
11 11
 

+ 7
- 13
searx/webapp.py 查看文件

@@ -22,13 +22,7 @@ import sys
22 22
 if __name__ == "__main__":
23 23
     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
24 24
 
25
-# first argument is for specifying settings module, used mostly by robot tests
26
-from sys import argv
27
-if len(argv) == 2:
28
-    from importlib import import_module
29
-    settings = import_module('searx.' + argv[1])
30
-else:
31
-    from searx import settings
25
+from searx import settings
32 26
 
33 27
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
34 28
 from searx.engines import search, categories, engines, get_engines_stats
@@ -41,7 +35,7 @@ from searx.utils import highlight_content, html_to_text
41 35
 
42 36
 
43 37
 app = Flask(__name__)
44
-app.secret_key = settings.secret_key
38
+app.secret_key = settings['server']['secret_key']
45 39
 
46 40
 
47 41
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
@@ -58,8 +52,8 @@ opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
58 52
 
59 53
 
60 54
 def get_base_url():
61
-    if settings.base_url:
62
-        hostname = settings.base_url
55
+    if settings['server']['base_url']:
56
+        hostname = settings['server']['base_url']
63 57
     else:
64 58
         scheme = 'http'
65 59
         if request.is_secure:
@@ -252,9 +246,9 @@ def run():
252 246
     from gevent import monkey
253 247
     monkey.patch_all()
254 248
 
255
-    app.run(debug        = settings.debug
256
-           ,use_debugger = settings.debug
257
-           ,port         = settings.port
249
+    app.run(debug        = settings['server']['debug']
250
+           ,use_debugger = settings['server']['debug']
251
+           ,port         = settings['server']['port']
258 252
            )
259 253
 
260 254
 

+ 107
- 0
settings.yml 查看文件

@@ -0,0 +1,107 @@
1
+server:
2
+    port : 8888
3
+    secret_key : "ultrasecretkey" # change this!
4
+    debug : True
5
+    request_timeout : 3.0 # seconds
6
+    base_url: False
7
+
8
+engines:
9
+  - name : wikipedia
10
+    engine : mediawiki
11
+    url    : https://en.wikipedia.org/
12
+    number_of_results : 1
13
+
14
+  - name : bing
15
+    engine : bing
16
+    locale : en-US
17
+
18
+  - name : currency
19
+    engine : currency_convert
20
+    categories : general
21
+
22
+  - name : deviantart
23
+    engine : deviantart
24
+    categories : images
25
+
26
+  - name : ddg definitions
27
+    engine : duckduckgo_definitions
28
+
29
+  - name : duckduckgo
30
+    engine : duckduckgo
31
+    locale : en-us
32
+
33
+  - name : filecrop
34
+    engine : filecrop
35
+    categories : files
36
+
37
+  - name : flickr
38
+    engine : flickr
39
+    categories : images
40
+
41
+  - name : github
42
+    engine : github
43
+    categories : it
44
+
45
+  - name : google
46
+    engine        : json_engine
47
+    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
48
+    categories    : general
49
+    url_query     : /responseData/results/unescapedUrl
50
+    content_query : /responseData/results/content
51
+    title_query   : /responseData/results/titleNoFormatting
52
+
53
+  - name : google images
54
+    engine : google_images
55
+    categories : images
56
+
57
+  - name : piratebay
58
+    engine : piratebay
59
+    categories : videos, music, files
60
+
61
+  - name : soundcloud
62
+    engine : soundcloud
63
+    categories : music
64
+
65
+  - name : stackoverflow
66
+    engine : stackoverflow
67
+    categories : it
68
+
69
+  - name : startpage
70
+    engine : startpage
71
+
72
+  - name : twitter
73
+    engine : twitter
74
+    categories : social media
75
+
76
+  - name : urbandictionary
77
+    engine        : xpath
78
+    search_url    : http://www.urbandictionary.com/define.php?term={query}
79
+    url_xpath     : //div[@class="word"]//a/@href
80
+    title_xpath   : //div[@class="word"]//a
81
+    content_xpath : //div[@class="definition"]
82
+
83
+  - name : yahoo
84
+    engine           : xpath
85
+    search_url       : http://search.yahoo.com/search?p={query}
86
+    results_xpath    : //div[@class="res"]
87
+    url_xpath        : .//h3/a/@href
88
+    title_xpath      : .//h3/a
89
+    content_xpath    : .//div[@class="abstr"]
90
+    suggestion_xpath : //div[@id="satat"]//a
91
+
92
+  - name : youtube
93
+    engine : youtube
94
+    categories : videos
95
+
96
+  - name : dailymotion
97
+    engine : dailymotion
98
+    locale : en_US
99
+    categories : videos
100
+
101
+  - name : vimeo
102
+    engine : vimeo
103
+    categories : videos
104
+    results_xpath : //div[@id="browse_content"]/ol/li
105
+    url_xpath : ./a/@href
106
+    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
107
+    content_xpath : ./a/img/@src

+ 1
- 0
setup.py 查看文件

@@ -32,6 +32,7 @@ setup(
32 32
         'flask',
33 33
         'grequests',
34 34
         'lxml',
35
+        'pyyaml',
35 36
         'setuptools',
36 37
     ],
37 38
     extras_require={

+ 1
- 0
versions.cfg 查看文件

@@ -16,6 +16,7 @@ mccabe = 0.2.1
16 16
 pep8 = 1.4.6
17 17
 plone.testing = 4.0.8
18 18
 pyflakes = 0.7.3
19
+pyyaml = 3.10
19 20
 requests = 2.2.0
20 21
 robotframework-debuglibrary = 0.3
21 22
 robotframework-httplibrary = 0.4.2