Browse Source

Merge branch 'master' of https://github.com/asciimoo/searx

pw3t 11 years ago
parent
commit
9e72ebe064
13 changed files with 154 additions and 150 deletions
  1. 2
    0
      .gitignore
  2. 1
    2
      README.md
  3. 0
    99
      engines.cfg_sample
  4. 1
    0
      requirements.txt
  5. 22
    0
      searx/__init__.py
  6. 11
    18
      searx/engines/__init__.py
  7. 0
    16
      searx/settings.py
  8. 0
    1
      searx/templates/about.html
  9. 1
    1
      searx/utils.py
  10. 7
    13
      searx/webapp.py
  11. 107
    0
      settings.yml
  12. 1
    0
      setup.py
  13. 1
    0
      versions.cfg

+ 2
- 0
.gitignore View File

1
 env
1
 env
2
 engines.cfg
2
 engines.cfg
3
 .installed.cfg
3
 .installed.cfg
4
+.coverage
5
+covearge/
4
 setup.cfg
6
 setup.cfg
5
 
7
 
6
 *.pyc
8
 *.pyc

+ 1
- 2
README.md View File

25
 
25
 
26
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
26
 * clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
27
 * install dependencies: `pip install -r requirements.txt`
27
 * install dependencies: `pip install -r requirements.txt`
28
-* edit your [searx/settings.py](https://github.com/asciimoo/searx/blob/master/searx/settings.py) (set your `secret_key`!)
29
-* rename `engines.cfg_sample` to `engines.cfg`
28
+* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
30
 * run `python searx/webapp.py` to start the application
29
 * run `python searx/webapp.py` to start the application
31
 
30
 
32
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
31
 For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)

+ 0
- 99
engines.cfg_sample View File

1
-[wikipedia]
2
-engine = mediawiki
3
-url    = https://en.wikipedia.org/
4
-number_of_results = 1
5
-
6
-[bing]
7
-engine = bing
8
-locale = en-US
9
-
10
-[currency]
11
-engine=currency_convert
12
-categories = general
13
-
14
-[deviantart]
15
-engine = deviantart
16
-categories = images
17
-
18
-[ddg definitions]
19
-engine = duckduckgo_definitions
20
-
21
-[duckduckgo]
22
-engine = duckduckgo
23
-locale = en-us
24
-
25
-[filecrop]
26
-engine = filecrop
27
-categories = files
28
-
29
-[flickr]
30
-engine = flickr
31
-categories = images
32
-
33
-[github]
34
-engine = github
35
-categories = it
36
-
37
-[google]
38
-engine        = json_engine
39
-search_url    = https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
40
-categories    = general
41
-url_query     = /responseData/results/unescapedUrl
42
-content_query = /responseData/results/content
43
-title_query   = /responseData/results/titleNoFormatting
44
-
45
-[google images]
46
-engine = google_images
47
-categories = images
48
-
49
-[piratebay]
50
-engine = piratebay
51
-categories = videos, music, files
52
-
53
-[soundcloud]
54
-engine = soundcloud
55
-categories = music
56
-
57
-[stackoverflow]
58
-engine = stackoverflow
59
-categories = it
60
-
61
-[startpage]
62
-engine = startpage
63
-
64
-[twitter]
65
-engine = twitter
66
-categories = social media
67
-
68
-[urbandictionary]
69
-engine        = xpath
70
-search_url    = http://www.urbandictionary.com/define.php?term={query}
71
-url_xpath     = //div[@class="word"]//a/@href
72
-title_xpath   = //div[@class="word"]//a
73
-content_xpath = //div[@class="definition"]
74
-
75
-[yahoo]
76
-engine           = xpath
77
-search_url       = http://search.yahoo.com/search?p={query}
78
-results_xpath    = //div[@class="res"]
79
-url_xpath        = .//h3/a/@href
80
-title_xpath      = .//h3/a
81
-content_xpath    = .//div[@class="abstr"]
82
-suggestion_xpath = //div[@id="satat"]//a
83
-
84
-[youtube]
85
-engine = youtube
86
-categories = videos
87
-
88
-[dailymotion]
89
-engine = dailymotion
90
-locale = en_US
91
-categories = videos
92
-
93
-[vimeo]
94
-engine = vimeo
95
-categories = videos
96
-results_xpath = //div[@id="browse_content"]/ol/li
97
-url_xpath=./a/@href
98
-title_xpath=./a/div[@class="data"]/p[@class="title"]/text()
99
-content_xpath=./a/img/@src

+ 1
- 0
requirements.txt View File

1
 flask
1
 flask
2
 grequests
2
 grequests
3
 lxml
3
 lxml
4
+pyyaml

+ 22
- 0
searx/__init__.py View File

1
+from os import environ
2
+from os.path import realpath, dirname, join
3
+try:
4
+    from yaml import load
5
+except:
6
+    from sys import exit, stderr
7
+    stderr.write('[E] install pyyaml\n')
8
+    exit(2)
9
+
10
+
11
+searx_dir  = realpath(dirname(realpath(__file__))+'/../')
12
+engine_dir = dirname(realpath(__file__))
13
+
14
+if 'SEARX_SETTINGS_PATH' in environ:
15
+    settings_path = environ['SEARX_SETTINGS_PATH']
16
+else:
17
+    settings_path = join(searx_dir, 'settings.yml')
18
+
19
+
20
+with open(settings_path) as settings_yaml:
21
+    settings = load(settings_yaml)
22
+

+ 11
- 18
searx/engines/__init__.py View File

23
 from operator import itemgetter
23
 from operator import itemgetter
24
 from urlparse import urlparse
24
 from urlparse import urlparse
25
 from searx import settings
25
 from searx import settings
26
-from searx.utils import get_useragent
27
-import ConfigParser
26
+from searx.utils import gen_useragent
28
 import sys
27
 import sys
29
 from datetime import datetime
28
 from datetime import datetime
30
 
29
 
31
 engine_dir = dirname(realpath(__file__))
30
 engine_dir = dirname(realpath(__file__))
32
-searx_dir  = join(engine_dir, '../../')
33
 
31
 
34
-engines_config = ConfigParser.SafeConfigParser()
35
-engines_config.read(join(searx_dir, 'engines.cfg'))
36
 number_of_searches = 0
32
 number_of_searches = 0
37
 
33
 
38
 engines = {}
34
 engines = {}
48
     module.name = modname
44
     module.name = modname
49
     return module
45
     return module
50
 
46
 
51
-if not engines_config.sections():
52
-    print '[E] Error no engines found. Edit your engines.cfg'
47
+if not 'engines' in settings or not settings['engines']:
48
+    print '[E] Error no engines found. Edit your settings.yml'
53
     exit(2)
49
     exit(2)
54
 
50
 
55
-for engine_config_name in engines_config.sections():
56
-    engine_data = engines_config.options(engine_config_name)
57
-    engine = load_module(engines_config.get(engine_config_name, 'engine')+'.py')
58
-    engine.name = engine_config_name
51
+for engine_data in settings['engines']:
52
+    engine_name = engine_data['engine']
53
+    engine = load_module(engine_name+'.py')
59
     for param_name in engine_data:
54
     for param_name in engine_data:
60
         if param_name == 'engine':
55
         if param_name == 'engine':
61
             continue
56
             continue
62
         if param_name == 'categories':
57
         if param_name == 'categories':
63
-            if engines_config.get(engine_config_name, param_name) == 'none':
58
+            if engine_data['categories'] == 'none':
64
                 engine.categories = []
59
                 engine.categories = []
65
             else:
60
             else:
66
-                engine.categories = map(str.strip, engines_config.get(engine_config_name, param_name).split(','))
61
+                engine.categories = map(str.strip, engine_data['categories'].split(','))
67
             continue
62
             continue
68
-        setattr(engine, param_name, engines_config.get(engine_config_name, param_name))
63
+        setattr(engine, param_name, engine_data[param_name])
69
     for engine_attr in dir(engine):
64
     for engine_attr in dir(engine):
70
         if engine_attr.startswith('_'):
65
         if engine_attr.startswith('_'):
71
             continue
66
             continue
118
         weight = 1.0
113
         weight = 1.0
119
         if hasattr(engines[res['engine']], 'weight'):
114
         if hasattr(engines[res['engine']], 'weight'):
120
             weight = float(engines[res['engine']].weight)
115
             weight = float(engines[res['engine']].weight)
121
-        elif res['engine'] in settings.weights:
122
-            weight = float(settings.weights[res['engine']])
123
         score = int((flat_len - i)/engines_len)*weight+1
116
         score = int((flat_len - i)/engines_len)*weight+1
124
         duplicated = False
117
         duplicated = False
125
         for new_res in results:
118
         for new_res in results:
153
     suggestions = set()
146
     suggestions = set()
154
     number_of_searches += 1
147
     number_of_searches += 1
155
     #user_agent = request.headers.get('User-Agent', '')
148
     #user_agent = request.headers.get('User-Agent', '')
156
-    user_agent = get_useragent()
149
+    user_agent = gen_useragent()
157
 
150
 
158
     for selected_engine in selected_engines:
151
     for selected_engine in selected_engines:
159
         if selected_engine['name'] not in engines:
152
         if selected_engine['name'] not in engines:
172
         request_args = dict(headers = request_params['headers']
165
         request_args = dict(headers = request_params['headers']
173
                            ,hooks   = dict(response=callback)
166
                            ,hooks   = dict(response=callback)
174
                            ,cookies = request_params['cookies']
167
                            ,cookies = request_params['cookies']
175
-                           ,timeout = settings.request_timeout
168
+                           ,timeout = settings['server']['request_timeout']
176
                            )
169
                            )
177
 
170
 
178
         if request_params['method'] == 'GET':
171
         if request_params['method'] == 'GET':

+ 0
- 16
searx/settings.py View File

1
-
2
-port = 8888
3
-
4
-secret_key = "ultrasecretkey" # change this!
5
-
6
-debug = True
7
-
8
-request_timeout = 5.0 # seconds
9
-
10
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
11
-
12
-blacklist = [] # search engine blacklist
13
-
14
-categories = {} # custom search engine categories
15
-
16
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)

+ 0
- 1
searx/templates/about.html View File

10
     <ul>
10
     <ul>
11
         <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
11
         <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
12
         <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
12
         <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
13
-        <li>Searx doesn't make money on ads and it isn't customised based on your interests. You get the pure search results</li>
14
         <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
13
         <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
15
     </ul>
14
     </ul>
16
     <p>If you do care about privacy, want to be a conscious user, moreover believe
15
     <p>If you do care about privacy, want to be a conscious user, moreover believe

+ 1
- 1
searx/utils.py View File

5
 import cStringIO
5
 import cStringIO
6
 import re
6
 import re
7
 
7
 
8
-def get_useragent():
8
+def gen_useragent():
9
     # TODO
9
     # TODO
10
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
10
     return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
11
 
11
 

+ 7
- 13
searx/webapp.py View File

22
 if __name__ == "__main__":
22
 if __name__ == "__main__":
23
     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
23
     sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
24
 
24
 
25
-# first argument is for specifying settings module, used mostly by robot tests
26
-from sys import argv
27
-if len(argv) == 2:
28
-    from importlib import import_module
29
-    settings = import_module('searx.' + argv[1])
30
-else:
31
-    from searx import settings
25
+from searx import settings
32
 
26
 
33
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
27
 from flask import Flask, request, render_template, url_for, Response, make_response, redirect
34
 from searx.engines import search, categories, engines, get_engines_stats
28
 from searx.engines import search, categories, engines, get_engines_stats
41
 
35
 
42
 
36
 
43
 app = Flask(__name__)
37
 app = Flask(__name__)
44
-app.secret_key = settings.secret_key
38
+app.secret_key = settings['server']['secret_key']
45
 
39
 
46
 
40
 
47
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
41
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
58
 
52
 
59
 
53
 
60
 def get_base_url():
54
 def get_base_url():
61
-    if settings.base_url:
62
-        hostname = settings.base_url
55
+    if settings['server']['base_url']:
56
+        hostname = settings['server']['base_url']
63
     else:
57
     else:
64
         scheme = 'http'
58
         scheme = 'http'
65
         if request.is_secure:
59
         if request.is_secure:
252
     from gevent import monkey
246
     from gevent import monkey
253
     monkey.patch_all()
247
     monkey.patch_all()
254
 
248
 
255
-    app.run(debug        = settings.debug
256
-           ,use_debugger = settings.debug
257
-           ,port         = settings.port
249
+    app.run(debug        = settings['server']['debug']
250
+           ,use_debugger = settings['server']['debug']
251
+           ,port         = settings['server']['port']
258
            )
252
            )
259
 
253
 
260
 
254
 

+ 107
- 0
settings.yml View File

1
+server:
2
+    port : 8888
3
+    secret_key : "ultrasecretkey" # change this!
4
+    debug : True
5
+    request_timeout : 3.0 # seconds
6
+    base_url: False
7
+
8
+engines:
9
+  - name : wikipedia
10
+    engine : mediawiki
11
+    url    : https://en.wikipedia.org/
12
+    number_of_results : 1
13
+
14
+  - name : bing
15
+    engine : bing
16
+    locale : en-US
17
+
18
+  - name : currency
19
+    engine : currency_convert
20
+    categories : general
21
+
22
+  - name : deviantart
23
+    engine : deviantart
24
+    categories : images
25
+
26
+  - name : ddg definitions
27
+    engine : duckduckgo_definitions
28
+
29
+  - name : duckduckgo
30
+    engine : duckduckgo
31
+    locale : en-us
32
+
33
+  - name : filecrop
34
+    engine : filecrop
35
+    categories : files
36
+
37
+  - name : flickr
38
+    engine : flickr
39
+    categories : images
40
+
41
+  - name : github
42
+    engine : github
43
+    categories : it
44
+
45
+  - name : google
46
+    engine        : json_engine
47
+    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
48
+    categories    : general
49
+    url_query     : /responseData/results/unescapedUrl
50
+    content_query : /responseData/results/content
51
+    title_query   : /responseData/results/titleNoFormatting
52
+
53
+  - name : google images
54
+    engine : google_images
55
+    categories : images
56
+
57
+  - name : piratebay
58
+    engine : piratebay
59
+    categories : videos, music, files
60
+
61
+  - name : soundcloud
62
+    engine : soundcloud
63
+    categories : music
64
+
65
+  - name : stackoverflow
66
+    engine : stackoverflow
67
+    categories : it
68
+
69
+  - name : startpage
70
+    engine : startpage
71
+
72
+  - name : twitter
73
+    engine : twitter
74
+    categories : social media
75
+
76
+  - name : urbandictionary
77
+    engine        : xpath
78
+    search_url    : http://www.urbandictionary.com/define.php?term={query}
79
+    url_xpath     : //div[@class="word"]//a/@href
80
+    title_xpath   : //div[@class="word"]//a
81
+    content_xpath : //div[@class="definition"]
82
+
83
+  - name : yahoo
84
+    engine           : xpath
85
+    search_url       : http://search.yahoo.com/search?p={query}
86
+    results_xpath    : //div[@class="res"]
87
+    url_xpath        : .//h3/a/@href
88
+    title_xpath      : .//h3/a
89
+    content_xpath    : .//div[@class="abstr"]
90
+    suggestion_xpath : //div[@id="satat"]//a
91
+
92
+  - name : youtube
93
+    engine : youtube
94
+    categories : videos
95
+
96
+  - name : dailymotion
97
+    engine : dailymotion
98
+    locale : en_US
99
+    categories : videos
100
+
101
+  - name : vimeo
102
+    engine : vimeo
103
+    categories : videos
104
+    results_xpath : //div[@id="browse_content"]/ol/li
105
+    url_xpath : ./a/@href
106
+    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
107
+    content_xpath : ./a/img/@src

+ 1
- 0
setup.py View File

32
         'flask',
32
         'flask',
33
         'grequests',
33
         'grequests',
34
         'lxml',
34
         'lxml',
35
+        'pyyaml',
35
         'setuptools',
36
         'setuptools',
36
     ],
37
     ],
37
     extras_require={
38
     extras_require={

+ 1
- 0
versions.cfg View File

16
 pep8 = 1.4.6
16
 pep8 = 1.4.6
17
 plone.testing = 4.0.8
17
 plone.testing = 4.0.8
18
 pyflakes = 0.7.3
18
 pyflakes = 0.7.3
19
+pyyaml = 3.10
19
 requests = 2.2.0
20
 requests = 2.2.0
20
 robotframework-debuglibrary = 0.3
21
 robotframework-debuglibrary = 0.3
21
 robotframework-httplibrary = 0.4.2
22
 robotframework-httplibrary = 0.4.2