Browse Source

Merge branch 'master' of https://github.com/asciimoo/searx

pw3t 11 years ago
parent
commit
132681b3aa
52 changed files with 888 additions and 411 deletions
  1. 11
    7
      .gitignore
  2. 7
    8
      Makefile
  3. 0
    122
      README.md
  4. 159
    0
      README.rst
  5. 3
    0
      babel.cfg
  6. 0
    2
      buildout.cfg
  7. 0
    2
      minimal.cfg
  8. 0
    2
      production.cfg
  9. 1
    0
      requirements.txt
  10. 2
    4
      searx/__init__.py
  11. 83
    39
      searx/engines/__init__.py
  12. 3
    2
      searx/engines/bing.py
  13. 20
    15
      searx/engines/currency_convert.py
  14. 9
    4
      searx/engines/dailymotion.py
  15. 6
    2
      searx/engines/deviantart.py
  16. 7
    5
      searx/engines/duckduckgo.py
  17. 6
    6
      searx/engines/duckduckgo_definitions.py
  18. 17
    8
      searx/engines/filecrop.py
  19. 8
    2
      searx/engines/flickr.py
  20. 5
    2
      searx/engines/github.py
  21. 8
    2
      searx/engines/google_images.py
  22. 14
    6
      searx/engines/json_engine.py
  23. 4
    4
      searx/engines/mediawiki.py
  24. 18
    9
      searx/engines/piratebay.py
  25. 5
    2
      searx/engines/soundcloud.py
  26. 3
    1
      searx/engines/stackoverflow.py
  27. 2
    4
      searx/engines/startpage.py
  28. 8
    3
      searx/engines/twitter.py
  29. 15
    12
      searx/engines/vimeo.py
  30. 17
    12
      searx/engines/xpath.py
  31. 4
    2
      searx/engines/yacy.py
  32. 7
    7
      searx/engines/youtube.py
  33. 111
    0
      searx/settings.yml
  34. 0
    16
      searx/settings_robot.py
  35. 2
    2
      searx/settings_robot.yml
  36. 4
    1
      searx/static/css/style.css
  37. 11
    11
      searx/templates/about.html
  38. 1
    1
      searx/templates/categories.html
  39. 4
    5
      searx/templates/engines.html
  40. 2
    2
      searx/templates/index.html
  41. 16
    6
      searx/templates/preferences.html
  42. 2
    4
      searx/templates/result_templates/default.html
  43. 2
    4
      searx/templates/result_templates/videos.html
  44. 4
    4
      searx/templates/results.html
  45. 1
    1
      searx/templates/stats.html
  46. 16
    3
      searx/testing.py
  47. BIN
      searx/translations/hu/LC_MESSAGES/messages.mo
  48. 115
    0
      searx/translations/hu/LC_MESSAGES/messages.po
  49. 20
    6
      searx/utils.py
  50. 104
    46
      searx/webapp.py
  51. 20
    3
      setup.py
  52. 1
    0
      versions.cfg

+ 11
- 7
.gitignore View File

1
-env
2
-engines.cfg
3
-.installed.cfg
4
 .coverage
1
 .coverage
5
-covearge/
2
+.installed.cfg
3
+engines.cfg
4
+env
5
+robot_log.html
6
+robot_output.xml
7
+robot_report.html
6
 setup.cfg
8
 setup.cfg
7
 
9
 
8
 *.pyc
10
 *.pyc
9
 */*.pyc
11
 */*.pyc
10
 
12
 
11
 bin/
13
 bin/
12
-include/
13
-lib/
14
 build/
14
 build/
15
+covearge/
15
 develop-eggs/
16
 develop-eggs/
17
+dist/
16
 eggs/
18
 eggs/
19
+include/
20
+lib/
17
 local/
21
 local/
18
-searx.egg-info/
19
 parts/
22
 parts/
23
+searx.egg-info/
20
 var/
24
 var/

+ 7
- 8
Makefile View File

21
 tests: .installed.cfg
21
 tests: .installed.cfg
22
 	@bin/test
22
 	@bin/test
23
 
23
 
24
-enginescfg:
25
-	@test -f ./engines.cfg || echo "Copying engines.cfg ..."
26
-	@cp --no-clobber engines.cfg_sample engines.cfg
27
-
28
-robot: .installed.cfg enginescfg
24
+robot: .installed.cfg
29
 	@bin/robot
25
 	@bin/robot
30
 
26
 
31
 flake8: .installed.cfg
27
 flake8: .installed.cfg
37
 	@bin/coverage report --show-missing
33
 	@bin/coverage report --show-missing
38
 	@bin/coverage html --directory ./coverage
34
 	@bin/coverage html --directory ./coverage
39
 
35
 
40
-production: bin/buildout production.cfg setup.py enginescfg
36
+production: bin/buildout production.cfg setup.py
41
 	bin/buildout -c production.cfg $(options)
37
 	bin/buildout -c production.cfg $(options)
42
 	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
38
 	@echo "* Please modify `readlink --canonicalize-missing ./searx/settings.py`"
43
 	@echo "* Hint 1: on production, disable debug mode and change secret_key"
39
 	@echo "* Hint 1: on production, disable debug mode and change secret_key"
44
 	@echo "* Hint 2: searx will be executed at server startup by crontab"
40
 	@echo "* Hint 2: searx will be executed at server startup by crontab"
45
 	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
41
 	@echo "* Hint 3: to run immediatley, execute 'bin/supervisord'"
46
 
42
 
47
-minimal: bin/buildout minimal.cfg setup.py enginescfg
43
+minimal: bin/buildout minimal.cfg setup.py
48
 	bin/buildout -c minimal.cfg $(options)
44
 	bin/buildout -c minimal.cfg $(options)
49
 
45
 
46
+locales:
47
+	@pybabel compile -d searx/translations
48
+
50
 clean:
49
 clean:
51
 	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
50
 	@rm -rf .installed.cfg .mr.developer.cfg bin parts develop-eggs \
52
 		searx.egg-info lib include .coverage coverage
51
 		searx.egg-info lib include .coverage coverage
53
 
52
 
54
-.PHONY: all tests enginescfg robot flake8 coverage production minimal clean
53
+.PHONY: all tests robot flake8 coverage production minimal locales clean

+ 0
- 122
README.md View File

1
-searx
2
-=====
3
-
4
-A privacy-respecting, hackable [metasearch engine](https://en.wikipedia.org/wiki/Metasearch_engine).
5
-
6
-List of [running instances](https://github.com/asciimoo/searx/wiki/Searx-instances).
7
-
8
-[![Flattr searx](http://api.flattr.com/button/flattr-badge-large.png)](https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software)
9
-
10
-
11
-### Features
12
-
13
-* Tracking free
14
-* Modular (see [examples](https://github.com/asciimoo/searx/blob/master/examples))
15
-* Parallel queries
16
-* Supports multiple output formats
17
- * json `curl https://searx.0x2a.tk/?format=json&q=[query]`
18
- * csv `curl https://searx.0x2a.tk/?format=csv&q=[query]`
19
- * opensearch/rss `curl https://searx.0x2a.tk/?format=rss&q=[query]`
20
-* Opensearch support (you can set as default search engine)
21
-* Configurable search engines/categories
22
-
23
-
24
-### Installation
25
-
26
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
27
-* install dependencies: `pip install -r requirements.txt`
28
-* edit your [settings.yml](https://github.com/asciimoo/searx/blob/master/settings.yml) (set your `secret_key`!)
29
-* run `python searx/webapp.py` to start the application
30
-
31
-For all the details, follow this [step by step installation](https://github.com/asciimoo/searx/wiki/Installation)
32
-
33
-
34
-### Alternative (Recommended) Installation
35
-
36
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
37
-* build in current folder: `make minimal`
38
-* run `bin/searx-run` to start the application
39
-
40
-
41
-### Development
42
-
43
-Just run `make`. Versions of dependencies are pinned down inside `versions.cfg` to produce most stable build. Also remember, NO make command should be run as root, not even `make production`
44
-
45
-
46
-### Deployment
47
-
48
-* clone source: `git clone git@github.com:asciimoo/searx.git && cd searx`
49
-* build in current folder: `make production`
50
-* run `bin/supervisord` to start the application
51
-
52
-
53
-### Upgrading
54
-
55
-* inside previously cloned searx directory run: `git stash` to temporarily save any changes you have made
56
-* pull source: `git pull origin master`
57
-* re-build in current folder: `make production`
58
-* run `bin/supervisorctl stop searx` to stop searx, if it does not, then run `fuser -k 8888/tcp`
59
-* run `bin/supervisorctl reload` to re-read supervisor config and start searx
60
-
61
-
62
-### Command make
63
-
64
-##### `make`
65
-
66
-Builds development environment with testing support.
67
-
68
-##### `make tests`
69
-
70
-Runs tests. You can write tests [here](https://github.com/asciimoo/searx/tree/master/searx/tests) and remember 'untested code is broken code'.
71
-
72
-##### `make robot`
73
-
74
-Runs robot (Selenium) tests, you must have `firefox` installed because this functional tests actually run the browser and perform operations on it. Also searx is executed with [settings_robot](https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py).
75
-
76
-##### `make flake8`
77
-
78
-'pep8 is a tool to check your Python code against some of the style conventions in [PEP 8](http://www.python.org/dev/peps/pep-0008/).'
79
-
80
-##### `make coverage`
81
-
82
-Checks coverage of tests, after running this, execute this: `firefox ./coverage/index.html`
83
-
84
-##### `make production`
85
-
86
-Used to make co-called production environment - without tests (you should ran tests before deploying searx on the server). This installs supervisord, so if searx crashes, it will try to pick itself up again. And crontab entry is added to start supervisord at server boot.
87
-
88
-##### `make minimal`
89
-
90
-Minimal build - without test frameworks, the quickest build option.
91
-
92
-##### `make clean`
93
-
94
-Deletes several folders and files (see `Makefile` for more), so that next time you run any other `make` command it will rebuild everithing.
95
-
96
-
97
-### TODO
98
-
99
-* Moar engines
100
-* Better ui
101
-* Language support
102
-* Documentation
103
-* Pagination
104
-* Fix `flake8` errors, `make flake8` will be merged into `make tests` when it does not fail anymore
105
-* Tests
106
-* When we have more tests, we can integrate Travis-CI
107
-
108
-
109
-### Bugs
110
-
111
-Bugs or suggestions? Visit the [issue tracker](https://github.com/asciimoo/searx/issues).
112
-
113
-
114
-### [License](https://github.com/asciimoo/searx/blob/master/LICENSE)
115
-
116
-
117
-### More about searx
118
-
119
-* [ohloh](https://www.ohloh.net/p/searx/)
120
-* [twitter](https://twitter.com/Searx_engine)
121
-* IRC: #searx @ freenode
122
-

+ 159
- 0
README.rst View File

1
+searx
2
+=====
3
+
4
+A privacy-respecting, hackable `metasearch
5
+engine <https://en.wikipedia.org/wiki/Metasearch_engine>`__.
6
+
7
+List of `running
8
+instances <https://github.com/asciimoo/searx/wiki/Searx-instances>`__.
9
+
10
+|Flattr searx|
11
+
12
+Features
13
+~~~~~~~~
14
+
15
+-  Tracking free
16
+-  Modular (see
17
+   `examples <https://github.com/asciimoo/searx/blob/master/examples>`__)
18
+-  Parallel queries
19
+-  Supports multiple output formats
20
+    -  json ``curl https://searx.0x2a.tk/?format=json&q=[query]``
21
+    -  csv ``curl https://searx.0x2a.tk/?format=csv&q=[query]``
22
+    -  opensearch/rss ``curl https://searx.0x2a.tk/?format=rss&q=[query]``
23
+-  Opensearch support (you can set as default search engine)
24
+-  Configurable search engines/categories
25
+
26
+Installation
27
+~~~~~~~~~~~~
28
+
29
+-  clone source:
30
+   ``git clone git@github.com:asciimoo/searx.git && cd searx``
31
+-  install dependencies: ``pip install -r requirements.txt``
32
+-  edit your
33
+   `settings.yml <https://github.com/asciimoo/searx/blob/master/settings.yml>`__
34
+   (set your ``secret_key``!)
35
+-  run ``python searx/webapp.py`` to start the application
36
+
37
+For all the details, follow this `step by step
38
+installation <https://github.com/asciimoo/searx/wiki/Installation>`__
39
+
40
+Alternative (Recommended) Installation
41
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
42
+
43
+-  clone source:
44
+   ``git clone git@github.com:asciimoo/searx.git && cd searx``
45
+-  build in current folder: ``make minimal``
46
+-  run ``bin/searx-run`` to start the application
47
+
48
+Development
49
+~~~~~~~~~~~
50
+
51
+Just run ``make``. Versions of dependencies are pinned down inside
52
+``versions.cfg`` to produce most stable build. Also remember, NO make
53
+command should be run as root, not even ``make production``
54
+
55
+Deployment
56
+~~~~~~~~~~
57
+
58
+-  clone source:
59
+   ``git clone git@github.com:asciimoo/searx.git && cd searx``
60
+-  build in current folder: ``make production``
61
+-  run ``bin/supervisord`` to start the application
62
+
63
+Upgrading
64
+~~~~~~~~~
65
+
66
+-  inside previously cloned searx directory run: ``git stash`` to
67
+   temporarily save any changes you have made
68
+-  pull source: ``git pull origin master``
69
+-  re-build in current folder: ``make production``
70
+-  run ``bin/supervisorctl stop searx`` to stop searx, if it does not,
71
+   then run ``fuser -k 8888/tcp``
72
+-  run ``bin/supervisorctl reload`` to re-read supervisor config and
73
+   start searx
74
+
75
+Command make
76
+~~~~~~~~~~~~
77
+
78
+``make``
79
+''''''''
80
+
81
+Builds development environment with testing support.
82
+
83
+``make tests``
84
+''''''''''''''
85
+
86
+Runs tests. You can write tests
87
+`here <https://github.com/asciimoo/searx/tree/master/searx/tests>`__ and
88
+remember 'untested code is broken code'.
89
+
90
+``make robot``
91
+''''''''''''''
92
+
93
+Runs robot (Selenium) tests, you must have ``firefox`` installed because
94
+this functional tests actually run the browser and perform operations on
95
+it. Also searx is executed with
96
+`settings\_robot <https://github.com/asciimoo/searx/blob/master/searx/settings_robot.py>`__.
97
+
98
+``make flake8``
99
+'''''''''''''''
100
+
101
+'pep8 is a tool to check your Python code against some of the style
102
+conventions in `PEP 8 <http://www.python.org/dev/peps/pep-0008/>`__.'
103
+
104
+``make coverage``
105
+'''''''''''''''''
106
+
107
+Checks coverage of tests, after running this, execute this:
108
+``firefox ./coverage/index.html``
109
+
110
+``make production``
111
+'''''''''''''''''''
112
+
113
+Used to make co-called production environment - without tests (you
114
+should ran tests before deploying searx on the server). This installs
115
+supervisord, so if searx crashes, it will try to pick itself up again.
116
+And crontab entry is added to start supervisord at server boot.
117
+
118
+``make minimal``
119
+''''''''''''''''
120
+
121
+Minimal build - without test frameworks, the quickest build option.
122
+
123
+``make clean``
124
+''''''''''''''
125
+
126
+Deletes several folders and files (see ``Makefile`` for more), so that
127
+next time you run any other ``make`` command it will rebuild everithing.
128
+
129
+TODO
130
+~~~~
131
+
132
+-  Moar engines
133
+-  Better ui
134
+-  Language support
135
+-  Documentation
136
+-  Pagination
137
+-  Fix ``flake8`` errors, ``make flake8`` will be merged into
138
+   ``make tests`` when it does not fail anymore
139
+-  Tests
140
+-  When we have more tests, we can integrate Travis-CI
141
+
142
+Bugs
143
+~~~~
144
+
145
+Bugs or suggestions? Visit the `issue
146
+tracker <https://github.com/asciimoo/searx/issues>`__.
147
+
148
+`License <https://github.com/asciimoo/searx/blob/master/LICENSE>`__
149
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
150
+
151
+More about searx
152
+~~~~~~~~~~~~~~~~
153
+
154
+-  `ohloh <https://www.ohloh.net/p/searx/>`__
155
+-  `twitter <https://twitter.com/Searx_engine>`__
156
+-  IRC: #searx @ freenode
157
+
158
+.. |Flattr searx| image:: http://api.flattr.com/button/flattr-badge-large.png
159
+   :target: https://flattr.com/submit/auto?user_id=asciimoo&url=https://github.com/asciimoo/searx&title=searx&language=&tags=github&category=software

+ 3
- 0
babel.cfg View File

1
+[python: **.py]
2
+[jinja2: **/templates/**.html]
3
+extensions=jinja2.ext.autoescape,jinja2.ext.with_

+ 0
- 2
buildout.cfg View File

16
 eggs = ${buildout:eggs}
16
 eggs = ${buildout:eggs}
17
 interpreter = py
17
 interpreter = py
18
 dependent-scripts = true
18
 dependent-scripts = true
19
-entry-points =
20
-    searx-run=searx.webapp:run
21
 
19
 
22
 
20
 
23
 [robot]
21
 [robot]

+ 0
- 2
minimal.cfg View File

13
 recipe = zc.recipe.egg:script
13
 recipe = zc.recipe.egg:script
14
 eggs = ${buildout:eggs}
14
 eggs = ${buildout:eggs}
15
 interpreter = py
15
 interpreter = py
16
-entry-points =
17
-    searx-run=searx.webapp:run

+ 0
- 2
production.cfg View File

15
 recipe = zc.recipe.egg:script
15
 recipe = zc.recipe.egg:script
16
 eggs = ${buildout:eggs}
16
 eggs = ${buildout:eggs}
17
 interpreter = py
17
 interpreter = py
18
-entry-points =
19
-    searx-run=searx.webapp:run
20
 
18
 
21
 
19
 
22
 [supervisor]
20
 [supervisor]

+ 1
- 0
requirements.txt View File

1
 flask
1
 flask
2
+flask-babel
2
 grequests
3
 grequests
3
 lxml
4
 lxml
4
 pyyaml
5
 pyyaml

+ 2
- 4
searx/__init__.py View File

1
 from os import environ
1
 from os import environ
2
-from os.path import realpath, dirname, join
2
+from os.path import realpath, dirname, join, abspath
3
 try:
3
 try:
4
     from yaml import load
4
     from yaml import load
5
 except:
5
 except:
7
     stderr.write('[E] install pyyaml\n')
7
     stderr.write('[E] install pyyaml\n')
8
     exit(2)
8
     exit(2)
9
 
9
 
10
-
11
-searx_dir  = realpath(dirname(realpath(__file__))+'/../')
10
+searx_dir = abspath(dirname(__file__))
12
 engine_dir = dirname(realpath(__file__))
11
 engine_dir = dirname(realpath(__file__))
13
 
12
 
14
 if 'SEARX_SETTINGS_PATH' in environ:
13
 if 'SEARX_SETTINGS_PATH' in environ:
19
 
18
 
20
 with open(settings_path) as settings_yaml:
19
 with open(settings_path) as settings_yaml:
21
     settings = load(settings_yaml)
20
     settings = load(settings_yaml)
22
-

+ 83
- 39
searx/engines/__init__.py View File

26
 from searx.utils import gen_useragent
26
 from searx.utils import gen_useragent
27
 import sys
27
 import sys
28
 from datetime import datetime
28
 from datetime import datetime
29
+from flask.ext.babel import gettext
29
 
30
 
30
 engine_dir = dirname(realpath(__file__))
31
 engine_dir = dirname(realpath(__file__))
31
 
32
 
35
 
36
 
36
 categories = {'general': []}
37
 categories = {'general': []}
37
 
38
 
39
+
38
 def load_module(filename):
40
 def load_module(filename):
39
     modname = splitext(filename)[0]
41
     modname = splitext(filename)[0]
40
     if modname in sys.modules:
42
     if modname in sys.modules:
50
 
52
 
51
 for engine_data in settings['engines']:
53
 for engine_data in settings['engines']:
52
     engine_name = engine_data['engine']
54
     engine_name = engine_data['engine']
53
-    engine = load_module(engine_name+'.py')
55
+    engine = load_module(engine_name + '.py')
54
     for param_name in engine_data:
56
     for param_name in engine_data:
55
         if param_name == 'engine':
57
         if param_name == 'engine':
56
             continue
58
             continue
58
             if engine_data['categories'] == 'none':
60
             if engine_data['categories'] == 'none':
59
                 engine.categories = []
61
                 engine.categories = []
60
             else:
62
             else:
61
-                engine.categories = map(str.strip, engine_data['categories'].split(','))
63
+                engine.categories = map(
64
+                    str.strip, engine_data['categories'].split(','))
62
             continue
65
             continue
63
         setattr(engine, param_name, engine_data[param_name])
66
         setattr(engine, param_name, engine_data[param_name])
64
     for engine_attr in dir(engine):
67
     for engine_attr in dir(engine):
65
         if engine_attr.startswith('_'):
68
         if engine_attr.startswith('_'):
66
             continue
69
             continue
67
-        if getattr(engine, engine_attr) == None:
68
-            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)
70
+        if getattr(engine, engine_attr) is None:
71
+            print '[E] Engine config error: Missing attribute "{0}.{1}"'.format(engine.name, engine_attr)  # noqa
69
             sys.exit(1)
72
             sys.exit(1)
70
     engines[engine.name] = engine
73
     engines[engine.name] = engine
71
-    engine.stats = {'result_count': 0, 'search_count': 0, 'page_load_time': 0, 'score_count': 0, 'errors': 0}
74
+    engine.stats = {
75
+        'result_count': 0,
76
+        'search_count': 0,
77
+        'page_load_time': 0,
78
+        'score_count': 0,
79
+        'errors': 0
80
+    }
72
     if hasattr(engine, 'categories'):
81
     if hasattr(engine, 'categories'):
73
         for category_name in engine.categories:
82
         for category_name in engine.categories:
74
             categories.setdefault(category_name, []).append(engine)
83
             categories.setdefault(category_name, []).append(engine)
75
     else:
84
     else:
76
         categories['general'].append(engine)
85
         categories['general'].append(engine)
77
 
86
 
87
+
78
 def default_request_params():
88
 def default_request_params():
79
-    return {'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
89
+    return {
90
+        'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}}
91
+
80
 
92
 
81
 def make_callback(engine_name, results, suggestions, callback, params):
93
 def make_callback(engine_name, results, suggestions, callback, params):
82
     # creating a callback wrapper for the search engine results
94
     # creating a callback wrapper for the search engine results
83
     def process_callback(response, **kwargs):
95
     def process_callback(response, **kwargs):
84
         cb_res = []
96
         cb_res = []
85
         response.search_params = params
97
         response.search_params = params
86
-        engines[engine_name].stats['page_load_time'] += (datetime.now() - params['started']).total_seconds()
98
+        engines[engine_name].stats['page_load_time'] += \
99
+            (datetime.now() - params['started']).total_seconds()
87
         try:
100
         try:
88
             search_results = callback(response)
101
             search_results = callback(response)
89
         except Exception, e:
102
         except Exception, e:
90
             engines[engine_name].stats['errors'] += 1
103
             engines[engine_name].stats['errors'] += 1
91
             results[engine_name] = cb_res
104
             results[engine_name] = cb_res
92
-            print '[E] Error with engine "{0}":\n\t{1}'.format(engine_name, str(e))
105
+            print '[E] Error with engine "{0}":\n\t{1}'.format(
106
+                engine_name, str(e))
93
             return
107
             return
94
         for result in search_results:
108
         for result in search_results:
95
             result['engine'] = engine_name
109
             result['engine'] = engine_name
101
         results[engine_name] = cb_res
115
         results[engine_name] = cb_res
102
     return process_callback
116
     return process_callback
103
 
117
 
118
+
104
 def score_results(results):
119
 def score_results(results):
105
-    flat_res = filter(None, chain.from_iterable(izip_longest(*results.values())))
120
+    flat_res = filter(
121
+        None, chain.from_iterable(izip_longest(*results.values())))
106
     flat_len = len(flat_res)
122
     flat_len = len(flat_res)
107
     engines_len = len(results)
123
     engines_len = len(results)
108
     results = []
124
     results = []
109
     # deduplication + scoring
125
     # deduplication + scoring
110
-    for i,res in enumerate(flat_res):
126
+    for i, res in enumerate(flat_res):
111
         res['parsed_url'] = urlparse(res['url'])
127
         res['parsed_url'] = urlparse(res['url'])
112
         res['engines'] = [res['engine']]
128
         res['engines'] = [res['engine']]
113
         weight = 1.0
129
         weight = 1.0
114
         if hasattr(engines[res['engine']], 'weight'):
130
         if hasattr(engines[res['engine']], 'weight'):
115
             weight = float(engines[res['engine']].weight)
131
             weight = float(engines[res['engine']].weight)
116
-        score = int((flat_len - i)/engines_len)*weight+1
132
+        score = int((flat_len - i) / engines_len) * weight + 1
117
         duplicated = False
133
         duplicated = False
118
         for new_res in results:
134
         for new_res in results:
119
-            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path
120
-            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path
135
+            p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa
136
+            p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa
121
             if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
137
             if res['parsed_url'].netloc == new_res['parsed_url'].netloc and\
122
                p1 == p2 and\
138
                p1 == p2 and\
123
                res['parsed_url'].query == new_res['parsed_url'].query and\
139
                res['parsed_url'].query == new_res['parsed_url'].query and\
125
                 duplicated = new_res
141
                 duplicated = new_res
126
                 break
142
                 break
127
         if duplicated:
143
         if duplicated:
128
-            if len(res.get('content', '')) > len(duplicated.get('content', '')):
144
+            if len(res.get('content', '')) > len(duplicated.get('content', '')):  # noqa
129
                 duplicated['content'] = res['content']
145
                 duplicated['content'] = res['content']
130
             duplicated['score'] += score
146
             duplicated['score'] += score
131
             duplicated['engines'].append(res['engine'])
147
             duplicated['engines'].append(res['engine'])
139
             results.append(res)
155
             results.append(res)
140
     return sorted(results, key=itemgetter('score'), reverse=True)
156
     return sorted(results, key=itemgetter('score'), reverse=True)
141
 
157
 
158
+
142
 def search(query, request, selected_engines):
159
 def search(query, request, selected_engines):
143
     global engines, categories, number_of_searches
160
     global engines, categories, number_of_searches
144
     requests = []
161
     requests = []
160
         request_params['started'] = datetime.now()
177
         request_params['started'] = datetime.now()
161
         request_params = engine.request(query, request_params)
178
         request_params = engine.request(query, request_params)
162
 
179
 
163
-        callback = make_callback(selected_engine['name'], results, suggestions, engine.response, request_params)
164
-
165
-        request_args = dict(headers = request_params['headers']
166
-                           ,hooks   = dict(response=callback)
167
-                           ,cookies = request_params['cookies']
168
-                           ,timeout = settings['server']['request_timeout']
169
-                           )
180
+        callback = make_callback(
181
+            selected_engine['name'],
182
+            results,
183
+            suggestions,
184
+            engine.response,
185
+            request_params
186
+        )
187
+
188
+        request_args = dict(
189
+            headers=request_params['headers'],
190
+            hooks=dict(response=callback),
191
+            cookies=request_params['cookies'],
192
+            timeout=settings['server']['request_timeout']
193
+        )
170
 
194
 
171
         if request_params['method'] == 'GET':
195
         if request_params['method'] == 'GET':
172
             req = grequests.get
196
             req = grequests.get
180
 
204
 
181
         requests.append(req(request_params['url'], **request_args))
205
         requests.append(req(request_params['url'], **request_args))
182
     grequests.map(requests)
206
     grequests.map(requests)
183
-    for engine_name,engine_results in results.items():
207
+    for engine_name, engine_results in results.items():
184
         engines[engine_name].stats['search_count'] += 1
208
         engines[engine_name].stats['search_count'] += 1
185
         engines[engine_name].stats['result_count'] += len(engine_results)
209
         engines[engine_name].stats['result_count'] += len(engine_results)
186
 
210
 
192
 
216
 
193
     return results, suggestions
217
     return results, suggestions
194
 
218
 
219
+
195
 def get_engines_stats():
220
 def get_engines_stats():
196
     # TODO refactor
221
     # TODO refactor
197
     pageloads = []
222
     pageloads = []
200
     errors = []
225
     errors = []
201
     scores_per_result = []
226
     scores_per_result = []
202
 
227
 
203
-    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0
228
+    max_pageload = max_results = max_score = max_errors = max_score_per_result = 0  # noqa
204
     for engine in engines.values():
229
     for engine in engines.values():
205
         if engine.stats['search_count'] == 0:
230
         if engine.stats['search_count'] == 0:
206
             continue
231
             continue
207
-        results_num = engine.stats['result_count']/float(engine.stats['search_count'])
208
-        load_times  = engine.stats['page_load_time']/float(engine.stats['search_count'])
232
+        results_num = \
233
+            engine.stats['result_count'] / float(engine.stats['search_count'])
234
+        load_times = engine.stats['page_load_time'] / float(engine.stats['search_count'])  # noqa
209
         if results_num:
235
         if results_num:
210
-            score = engine.stats['score_count'] / float(engine.stats['search_count'])
236
+            score = engine.stats['score_count'] / float(engine.stats['search_count'])  # noqa
211
             score_per_result = score / results_num
237
             score_per_result = score / results_num
212
         else:
238
         else:
213
             score = score_per_result = 0.0
239
             score = score_per_result = 0.0
220
         results.append({'avg': results_num, 'name': engine.name})
246
         results.append({'avg': results_num, 'name': engine.name})
221
         scores.append({'avg': score, 'name': engine.name})
247
         scores.append({'avg': score, 'name': engine.name})
222
         errors.append({'avg': engine.stats['errors'], 'name': engine.name})
248
         errors.append({'avg': engine.stats['errors'], 'name': engine.name})
223
-        scores_per_result.append({'avg': score_per_result, 'name': engine.name})
249
+        scores_per_result.append({
250
+            'avg': score_per_result,
251
+            'name': engine.name
252
+        })
224
 
253
 
225
     for engine in pageloads:
254
     for engine in pageloads:
226
-        engine['percentage'] = int(engine['avg']/max_pageload*100)
255
+        engine['percentage'] = int(engine['avg'] / max_pageload * 100)
227
 
256
 
228
     for engine in results:
257
     for engine in results:
229
-        engine['percentage'] = int(engine['avg']/max_results*100)
258
+        engine['percentage'] = int(engine['avg'] / max_results * 100)
230
 
259
 
231
     for engine in scores:
260
     for engine in scores:
232
-        engine['percentage'] = int(engine['avg']/max_score*100)
261
+        engine['percentage'] = int(engine['avg'] / max_score * 100)
233
 
262
 
234
     for engine in scores_per_result:
263
     for engine in scores_per_result:
235
-        engine['percentage'] = int(engine['avg']/max_score_per_result*100)
264
+        engine['percentage'] = int(engine['avg'] / max_score_per_result * 100)
236
 
265
 
237
     for engine in errors:
266
     for engine in errors:
238
         if max_errors:
267
         if max_errors:
239
-            engine['percentage'] = int(float(engine['avg'])/max_errors*100)
268
+            engine['percentage'] = int(float(engine['avg']) / max_errors * 100)
240
         else:
269
         else:
241
             engine['percentage'] = 0
270
             engine['percentage'] = 0
242
 
271
 
243
-
244
-    return [('Page loads (sec)', sorted(pageloads, key=itemgetter('avg')))
245
-           ,('Number of results', sorted(results, key=itemgetter('avg'), reverse=True))
246
-           ,('Scores', sorted(scores, key=itemgetter('avg'), reverse=True))
247
-           ,('Scores per result', sorted(scores_per_result, key=itemgetter('avg'), reverse=True))
248
-           ,('Errors', sorted(errors, key=itemgetter('avg'), reverse=True))
249
-           ]
272
+    return [
273
+        (
274
+            gettext('Page loads (sec)'),
275
+            sorted(pageloads, key=itemgetter('avg'))
276
+        ),
277
+        (
278
+            gettext('Number of results'),
279
+            sorted(results, key=itemgetter('avg'), reverse=True)
280
+        ),
281
+        (
282
+            gettext('Scores'),
283
+            sorted(scores, key=itemgetter('avg'), reverse=True)
284
+        ),
285
+        (
286
+            gettext('Scores per result'),
287
+            sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
288
+        ),
289
+        (
290
+            gettext('Errors'),
291
+            sorted(errors, key=itemgetter('avg'), reverse=True)
292
+        ),
293
+    ]

+ 3
- 2
searx/engines/bing.py View File

4
 
4
 
5
 base_url = 'http://www.bing.com/'
5
 base_url = 'http://www.bing.com/'
6
 search_string = 'search?{query}'
6
 search_string = 'search?{query}'
7
-locale = 'en-US' # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
7
+locale = 'en-US'  # see http://msdn.microsoft.com/en-us/library/dd251064.aspx
8
 
8
 
9
 
9
 
10
 def request(query, params):
10
 def request(query, params):
11
-    search_path = search_string.format(query=urlencode({'q': query, 'setmkt': locale}))
11
+    search_path = search_string.format(
12
+        query=urlencode({'q': query, 'setmkt': locale}))
12
     #if params['category'] == 'images':
13
     #if params['category'] == 'images':
13
     #    params['url'] = base_url + 'images/' + search_path
14
     #    params['url'] = base_url + 'images/' + search_path
14
     params['url'] = base_url + search_path
15
     params['url'] = base_url + search_path

+ 20
- 15
searx/engines/currency_convert.py View File

5
 url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
5
 url = 'http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
6
 weight = 100
6
 weight = 100
7
 
7
 
8
-parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)
8
+parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)  # noqa
9
+
9
 
10
 
10
 def request(query, params):
11
 def request(query, params):
11
     m = parser_re.match(query)
12
     m = parser_re.match(query)
19
         # wrong params
20
         # wrong params
20
         return params
21
         return params
21
 
22
 
22
-    q = (from_currency+to_currency).upper()
23
+    q = (from_currency + to_currency).upper()
23
 
24
 
24
     params['url'] = url.format(query=q)
25
     params['url'] = url.format(query=q)
25
     params['ammount'] = ammount
26
     params['ammount'] = ammount
33
     global base_url
34
     global base_url
34
     results = []
35
     results = []
35
     try:
36
     try:
36
-        _,conversion_rate,_ = resp.text.split(',', 2)
37
+        _, conversion_rate, _ = resp.text.split(',', 2)
37
         conversion_rate = float(conversion_rate)
38
         conversion_rate = float(conversion_rate)
38
     except:
39
     except:
39
         return results
40
         return results
40
 
41
 
41
-    title = '{0} {1} in {2} is {3}'.format(resp.search_params['ammount']
42
-                                          ,resp.search_params['from']
43
-                                          ,resp.search_params['to']
44
-                                          ,resp.search_params['ammount']*conversion_rate
45
-                                          )
42
+    title = '{0} {1} in {2} is {3}'.format(
43
+        resp.search_params['ammount'],
44
+        resp.search_params['from'],
45
+        resp.search_params['to'],
46
+        resp.search_params['ammount'] * conversion_rate
47
+    )
46
 
48
 
47
-    content = '1 {0} is {1} {2}'.format(resp.search_params['from'], conversion_rate, resp.search_params['to'])
49
+    content = '1 {0} is {1} {2}'.format(resp.search_params['from'],
50
+                                        conversion_rate,
51
+                                        resp.search_params['to'])
48
     now_date = datetime.now().strftime('%Y%m%d')
52
     now_date = datetime.now().strftime('%Y%m%d')
49
-    url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'
50
-    url = url.format(now_date
51
-                    ,resp.search_params['ammount']
52
-                    ,resp.search_params['from'].lower()
53
-                    ,resp.search_params['to'].lower()
54
-                    )
53
+    url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'  # noqa
54
+    url = url.format(
55
+        now_date,
56
+        resp.search_params['ammount'],
57
+        resp.search_params['from'].lower(),
58
+        resp.search_params['to'].lower()
59
+    )
55
     results.append({'title': title, 'content': content, 'url': url})
60
     results.append({'title': title, 'content': content, 'url': url})
56
 
61
 
57
     return results
62
     return results

+ 9
- 4
searx/engines/dailymotion.py View File

1
 from urllib import urlencode
1
 from urllib import urlencode
2
 from lxml import html
2
 from lxml import html
3
 from json import loads
3
 from json import loads
4
-from cgi import escape
5
 
4
 
6
 categories = ['videos']
5
 categories = ['videos']
7
 locale = 'en_US'
6
 locale = 'en_US'
8
 
7
 
9
 # see http://www.dailymotion.com/doc/api/obj-video.html
8
 # see http://www.dailymotion.com/doc/api/obj-video.html
10
-search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'
9
+search_url = 'https://api.dailymotion.com/videos?fields=title,description,duration,url,thumbnail_360_url&sort=relevance&limit=25&page=1&{query}'  # noqa
10
+
11
+# TODO use video result template
12
+content_tpl = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'
13
+
11
 
14
 
12
 def request(query, params):
15
 def request(query, params):
13
     global search_url
16
     global search_url
14
-    params['url'] = search_url.format(query=urlencode({'search': query, 'localization': locale }))
17
+    params['url'] = search_url.format(
18
+        query=urlencode({'search': query, 'localization': locale}))
15
     return params
19
     return params
16
 
20
 
17
 
21
 
24
         title = res['title']
28
         title = res['title']
25
         url = res['url']
29
         url = res['url']
26
         if res['thumbnail_360_url']:
30
         if res['thumbnail_360_url']:
27
-            content = '<a href="{0}" title="{0}" ><img src="{1}" /></a><br />'.format(url, res['thumbnail_360_url'])
31
+            content = content_tpl.format(url, res['thumbnail_360_url'])
28
         else:
32
         else:
29
             content = ''
33
             content = ''
30
         if res['description']:
34
         if res['description']:
33
         results.append({'url': url, 'title': title, 'content': content})
37
         results.append({'url': url, 'title': title, 'content': content})
34
     return results
38
     return results
35
 
39
 
40
+
36
 def text_content_from_html(html_string):
41
 def text_content_from_html(html_string):
37
     desc_html = html.fragment_fromstring(html_string, create_parent=True)
42
     desc_html = html.fragment_fromstring(html_string, create_parent=True)
38
     return desc_html.text_content()
43
     return desc_html.text_content()

+ 6
- 2
searx/engines/deviantart.py View File

7
 base_url = 'https://www.deviantart.com/'
7
 base_url = 'https://www.deviantart.com/'
8
 search_url = base_url+'search?'
8
 search_url = base_url+'search?'
9
 
9
 
10
+
10
 def request(query, params):
11
 def request(query, params):
11
     global search_url
12
     global search_url
12
     params['url'] = search_url + urlencode({'q': query})
13
     params['url'] = search_url + urlencode({'q': query})
22
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
23
     for result in dom.xpath('//div[contains(@class, "tt-a tt-fh")]'):
23
         link = result.xpath('.//a[contains(@class, "thumb")]')[0]
24
         link = result.xpath('.//a[contains(@class, "thumb")]')[0]
24
         url = urljoin(base_url, link.attrib.get('href'))
25
         url = urljoin(base_url, link.attrib.get('href'))
25
-        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')
26
+        title_links = result.xpath('.//span[@class="details"]//a[contains(@class, "t")]')  # noqa
26
         title = ''.join(title_links[0].xpath('.//text()'))
27
         title = ''.join(title_links[0].xpath('.//text()'))
27
         img_src = link.xpath('.//img')[0].attrib['src']
28
         img_src = link.xpath('.//img')[0].attrib['src']
28
-        results.append({'url': url, 'title': title, 'img_src': img_src, 'template': 'images.html'})
29
+        results.append({'url': url,
30
+                        'title': title,
31
+                        'img_src': img_src,
32
+                        'template': 'images.html'})
29
     return results
33
     return results

+ 7
- 5
searx/engines/duckduckgo.py View File

6
 search_url = url + 'd.js?{query}&p=1&s=0'
6
 search_url = url + 'd.js?{query}&p=1&s=0'
7
 locale = 'us-en'
7
 locale = 'us-en'
8
 
8
 
9
+
9
 def request(query, params):
10
 def request(query, params):
10
-    params['url'] = search_url.format(query=urlencode({'q': query, 'l': locale}))
11
+    q = urlencode({'q': query,
12
+                   'l': locale})
13
+    params['url'] = search_url.format(query=q)
11
     return params
14
     return params
12
 
15
 
13
 
16
 
17
     for r in search_res:
20
     for r in search_res:
18
         if not r.get('t'):
21
         if not r.get('t'):
19
             continue
22
             continue
20
-        results.append({'title': r['t']
21
-                       ,'content': html_to_text(r['a'])
22
-                       ,'url': r['u']
23
-                       })
23
+        results.append({'title': r['t'],
24
+                       'content': html_to_text(r['a']),
25
+                       'url': r['u']})
24
     return results
26
     return results

+ 6
- 6
searx/engines/duckduckgo_definitions.py View File

3
 
3
 
4
 url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
4
 url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1'
5
 
5
 
6
+
6
 def request(query, params):
7
 def request(query, params):
7
-    params['url'] =  url.format(query=urlencode({'q': query}))
8
+    params['url'] = url.format(query=urlencode({'q': query}))
8
     return params
9
     return params
9
 
10
 
10
 
11
 
13
     results = []
14
     results = []
14
     if 'Definition' in search_res:
15
     if 'Definition' in search_res:
15
         if search_res.get('AbstractURL'):
16
         if search_res.get('AbstractURL'):
16
-            res = {'title'    : search_res.get('Heading', '')
17
-                  ,'content'  : search_res.get('Definition', '')
18
-                  ,'url'      : search_res.get('AbstractURL', '')
19
-                  ,'class'   : 'definition_result'
20
-                  }
17
+            res = {'title': search_res.get('Heading', ''),
18
+                   'content': search_res.get('Definition', ''),
19
+                   'url': search_res.get('AbstractURL', ''),
20
+                   'class': 'definition_result'}
21
             results.append(res)
21
             results.append(res)
22
 
22
 
23
     return results
23
     return results

+ 17
- 8
searx/engines/filecrop.py View File

2
 from HTMLParser import HTMLParser
2
 from HTMLParser import HTMLParser
3
 
3
 
4
 url = 'http://www.filecrop.com/'
4
 url = 'http://www.filecrop.com/'
5
-search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'
5
+search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1'  # noqa
6
+
6
 
7
 
7
 class FilecropResultParser(HTMLParser):
8
 class FilecropResultParser(HTMLParser):
8
     def __init__(self):
9
     def __init__(self):
18
     def handle_starttag(self, tag, attrs):
19
     def handle_starttag(self, tag, attrs):
19
 
20
 
20
         if tag == 'tr':
21
         if tag == 'tr':
21
-            if ('bgcolor', '#edeff5') in attrs or ('bgcolor', '#ffffff') in attrs:
22
+            if ('bgcolor', '#edeff5') in attrs or\
23
+               ('bgcolor', '#ffffff') in attrs:
22
                 self.__start_processing = True
24
                 self.__start_processing = True
23
 
25
 
24
         if not self.__start_processing:
26
         if not self.__start_processing:
25
             return
27
             return
26
 
28
 
27
         if tag == 'label':
29
         if tag == 'label':
28
-            self.result['title'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
29
-        elif tag == 'a' and ('rel', 'nofollow') in attrs and ('class', 'sourcelink') in attrs:
30
+            self.result['title'] = [attr[1] for attr in attrs
31
+                                    if attr[0] == 'title'][0]
32
+        elif tag == 'a' and ('rel', 'nofollow') in attrs\
33
+                and ('class', 'sourcelink') in attrs:
30
             if 'content' in self.result:
34
             if 'content' in self.result:
31
-                self.result['content'] += [attr[1] for attr in attrs if attr[0] == 'title'][0]
35
+                self.result['content'] += [attr[1] for attr in attrs
36
+                                           if attr[0] == 'title'][0]
32
             else:
37
             else:
33
-                self.result['content'] = [attr[1] for attr in attrs if attr[0] == 'title'][0]
38
+                self.result['content'] = [attr[1] for attr in attrs
39
+                                          if attr[0] == 'title'][0]
34
             self.result['content'] += ' '
40
             self.result['content'] += ' '
35
         elif tag == 'a':
41
         elif tag == 'a':
36
-            self.result['url'] = url + [attr[1] for attr in attrs if attr[0] == 'href'][0]
42
+            self.result['url'] = url + [attr[1] for attr in attrs
43
+                                        if attr[0] == 'href'][0]
37
 
44
 
38
     def handle_endtag(self, tag):
45
     def handle_endtag(self, tag):
39
         if self.__start_processing is False:
46
         if self.__start_processing is False:
60
 
67
 
61
         self.data_counter += 1
68
         self.data_counter += 1
62
 
69
 
70
+
63
 def request(query, params):
71
 def request(query, params):
64
-    params['url'] = search_url.format(query=urlencode({'w' :query}))
72
+    params['url'] = search_url.format(query=urlencode({'w': query}))
65
     return params
73
     return params
66
 
74
 
75
+
67
 def response(resp):
76
 def response(resp):
68
     parser = FilecropResultParser()
77
     parser = FilecropResultParser()
69
     parser.feed(resp.text)
78
     parser.feed(resp.text)

+ 8
- 2
searx/engines/flickr.py View File

8
 
8
 
9
 url = 'https://secure.flickr.com/'
9
 url = 'https://secure.flickr.com/'
10
 search_url = url+'search/?{query}'
10
 search_url = url+'search/?{query}'
11
+results_xpath = '//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'  # noqa
12
+
11
 
13
 
12
 def request(query, params):
14
 def request(query, params):
13
     params['url'] = search_url.format(query=urlencode({'q': query}))
15
     params['url'] = search_url.format(query=urlencode({'q': query}))
14
     return params
16
     return params
15
 
17
 
18
+
16
 def response(resp):
19
 def response(resp):
17
     global base_url
20
     global base_url
18
     results = []
21
     results = []
19
     dom = html.fromstring(resp.text)
22
     dom = html.fromstring(resp.text)
20
-    for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'):
23
+    for result in dom.xpath(results_xpath):
21
         href = urljoin(url, result.attrib.get('href'))
24
         href = urljoin(url, result.attrib.get('href'))
22
         img = result.xpath('.//img')[0]
25
         img = result.xpath('.//img')[0]
23
         title = img.attrib.get('alt', '')
26
         title = img.attrib.get('alt', '')
24
         img_src = img.attrib.get('data-defer-src')
27
         img_src = img.attrib.get('data-defer-src')
25
         if not img_src:
28
         if not img_src:
26
             continue
29
             continue
27
-        results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'})
30
+        results.append({'url': href,
31
+                        'title': title,
32
+                        'img_src': img_src,
33
+                        'template': 'images.html'})
28
     return results
34
     return results

+ 5
- 2
searx/engines/github.py View File

4
 
4
 
5
 categories = ['it']
5
 categories = ['it']
6
 
6
 
7
-search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'
7
+search_url = 'https://api.github.com/search/repositories?sort=stars&order=desc&{query}'  # noqa
8
+
9
+accept_header = 'application/vnd.github.preview.text-match+json'
10
+
8
 
11
 
9
 def request(query, params):
12
 def request(query, params):
10
     global search_url
13
     global search_url
11
     params['url'] = search_url.format(query=urlencode({'q': query}))
14
     params['url'] = search_url.format(query=urlencode({'q': query}))
12
-    params['headers']['Accept'] = 'application/vnd.github.preview.text-match+json'
15
+    params['headers']['Accept'] = accept_header
13
     return params
16
     return params
14
 
17
 
15
 
18
 

+ 8
- 2
searx/engines/google_images.py View File

6
 categories = ['images']
6
 categories = ['images']
7
 
7
 
8
 url = 'https://ajax.googleapis.com/'
8
 url = 'https://ajax.googleapis.com/'
9
-search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}'
9
+search_url = url + 'ajax/services/search/images?v=1.0&start=0&rsz=large&safe=off&filter=off&{query}'  # noqa
10
+
10
 
11
 
11
 def request(query, params):
12
 def request(query, params):
12
     params['url'] = search_url.format(query=urlencode({'q': query}))
13
     params['url'] = search_url.format(query=urlencode({'q': query}))
13
     return params
14
     return params
14
 
15
 
16
+
15
 def response(resp):
17
 def response(resp):
16
     results = []
18
     results = []
17
     search_res = loads(resp.text)
19
     search_res = loads(resp.text)
24
         title = result['title']
26
         title = result['title']
25
         if not result['url']:
27
         if not result['url']:
26
             continue
28
             continue
27
-        results.append({'url': href, 'title': title, 'content': '', 'img_src': result['url'], 'template': 'images.html'})
29
+        results.append({'url': href,
30
+                        'title': title,
31
+                        'content': '',
32
+                        'img_src': result['url'],
33
+                        'template': 'images.html'})
28
     return results
34
     return results

+ 14
- 6
searx/engines/json_engine.py View File

2
 from json import loads
2
 from json import loads
3
 from collections import Iterable
3
 from collections import Iterable
4
 
4
 
5
-search_url    = None
6
-url_query     = None
5
+search_url = None
6
+url_query = None
7
 content_query = None
7
 content_query = None
8
-title_query   = None
8
+title_query = None
9
 #suggestion_xpath = ''
9
 #suggestion_xpath = ''
10
 
10
 
11
+
11
 def iterate(iterable):
12
 def iterate(iterable):
12
     if type(iterable) == dict:
13
     if type(iterable) == dict:
13
         it = iterable.iteritems()
14
         it = iterable.iteritems()
17
     for index, value in it:
18
     for index, value in it:
18
         yield str(index), value
19
         yield str(index), value
19
 
20
 
21
+
20
 def is_iterable(obj):
22
 def is_iterable(obj):
21
-    if type(obj) == str: return False
22
-    if type(obj) == unicode: return False
23
+    if type(obj) == str:
24
+        return False
25
+    if type(obj) == unicode:
26
+        return False
23
     return isinstance(obj, Iterable)
27
     return isinstance(obj, Iterable)
24
 
28
 
29
+
25
 def parse(query):
30
 def parse(query):
26
     q = []
31
     q = []
27
     for part in query.split('/'):
32
     for part in query.split('/'):
31
             q.append(part)
36
             q.append(part)
32
     return q
37
     return q
33
 
38
 
39
+
34
 def do_query(data, q):
40
 def do_query(data, q):
35
     ret = []
41
     ret = []
36
     if not len(q):
42
     if not len(q):
38
 
44
 
39
     qkey = q[0]
45
     qkey = q[0]
40
 
46
 
41
-    for key,value in iterate(data):
47
+    for key, value in iterate(data):
42
 
48
 
43
         if len(q) == 1:
49
         if len(q) == 1:
44
             if key == qkey:
50
             if key == qkey:
54
                 ret.extend(do_query(value, q))
60
                 ret.extend(do_query(value, q))
55
     return ret
61
     return ret
56
 
62
 
63
+
57
 def query(data, query_string):
64
 def query(data, query_string):
58
     q = parse(query_string)
65
     q = parse(query_string)
59
 
66
 
60
     return do_query(data, q)
67
     return do_query(data, q)
61
 
68
 
69
+
62
 def request(query, params):
70
 def request(query, params):
63
     query = urlencode({'q': query})[2:]
71
     query = urlencode({'q': query})[2:]
64
     params['url'] = search_url.format(query=query)
72
     params['url'] = search_url.format(query=query)

+ 4
- 4
searx/engines/mediawiki.py View File

3
 
3
 
4
 url = 'https://en.wikipedia.org/'
4
 url = 'https://en.wikipedia.org/'
5
 
5
 
6
+search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'  # noqa
7
+
6
 number_of_results = 10
8
 number_of_results = 10
7
 
9
 
10
+
8
 def request(query, params):
11
 def request(query, params):
9
-    search_url = url + 'w/api.php?action=query&list=search&{query}&srprop=timestamp&format=json'
10
     params['url'] = search_url.format(query=urlencode({'srsearch': query}))
12
     params['url'] = search_url.format(query=urlencode({'srsearch': query}))
11
     return params
13
     return params
12
 
14
 
14
 def response(resp):
16
 def response(resp):
15
     search_results = loads(resp.text)
17
     search_results = loads(resp.text)
16
     res = search_results.get('query', {}).get('search', [])
18
     res = search_results.get('query', {}).get('search', [])
17
-
18
-    return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),
19
+    return [{'url': url + 'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')),  # noqa
19
         'title': result['title']} for result in res[:int(number_of_results)]]
20
         'title': result['title']} for result in res[:int(number_of_results)]]
20
-

+ 18
- 9
searx/engines/piratebay.py View File

7
 
7
 
8
 url = 'https://thepiratebay.se/'
8
 url = 'https://thepiratebay.se/'
9
 search_url = url + 'search/{search_term}/0/99/{search_type}'
9
 search_url = url + 'search/{search_term}/0/99/{search_type}'
10
-search_types = {'videos': '200'
11
-               ,'music' : '100'
12
-               ,'files' : '0'
13
-               }
10
+search_types = {'videos': '200',
11
+                'music': '100',
12
+                'files': '0'}
13
+
14
+magnet_xpath = './/a[@title="Download this torrent using magnet"]'
15
+content_xpath = './/font[@class="detDesc"]//text()'
16
+
14
 
17
 
15
 def request(query, params):
18
 def request(query, params):
16
-    params['url'] = search_url.format(search_term=quote(query), search_type=search_types.get(params['category']))
19
+    search_type = search_types.get(params['category'])
20
+    params['url'] = search_url.format(search_term=quote(query),
21
+                                      search_type=search_type)
17
     return params
22
     return params
18
 
23
 
19
 
24
 
27
         link = result.xpath('.//div[@class="detName"]//a')[0]
32
         link = result.xpath('.//div[@class="detName"]//a')[0]
28
         href = urljoin(url, link.attrib.get('href'))
33
         href = urljoin(url, link.attrib.get('href'))
29
         title = ' '.join(link.xpath('.//text()'))
34
         title = ' '.join(link.xpath('.//text()'))
30
-        content = escape(' '.join(result.xpath('.//font[@class="detDesc"]//text()')))
35
+        content = escape(' '.join(result.xpath(content_xpath)))
31
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
36
         seed, leech = result.xpath('.//td[@align="right"]/text()')[:2]
32
-        magnetlink = result.xpath('.//a[@title="Download this torrent using magnet"]')[0]
33
-        results.append({'url': href, 'title': title, 'content': content,
34
-                        'seed': seed, 'leech': leech, 'magnetlink': magnetlink.attrib['href'],
37
+        magnetlink = result.xpath(magnet_xpath)[0]
38
+        results.append({'url': href,
39
+                        'title': title,
40
+                        'content': content,
41
+                        'seed': seed,
42
+                        'leech': leech,
43
+                        'magnetlink': magnetlink.attrib['href'],
35
                         'template': 'torrent.html'})
44
                         'template': 'torrent.html'})
36
     return results
45
     return results

+ 5
- 2
searx/engines/soundcloud.py View File

5
 
5
 
6
 guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
6
 guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
7
 url = 'https://api.soundcloud.com/'
7
 url = 'https://api.soundcloud.com/'
8
-search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id
8
+search_url = url + 'search?{query}&facet=model&limit=20&offset=0&linked_partitioning=1&client_id='+guest_client_id  # noqa
9
+
9
 
10
 
10
 def request(query, params):
11
 def request(query, params):
11
     global search_url
12
     global search_url
21
         if result['kind'] in ('track', 'playlist'):
22
         if result['kind'] in ('track', 'playlist'):
22
             title = result['title']
23
             title = result['title']
23
             content = result['description']
24
             content = result['description']
24
-            results.append({'url': result['permalink_url'], 'title': title, 'content': content})
25
+            results.append({'url': result['permalink_url'],
26
+                            'title': title,
27
+                            'content': content})
25
     return results
28
     return results

+ 3
- 1
searx/engines/stackoverflow.py View File

7
 
7
 
8
 url = 'http://stackoverflow.com/'
8
 url = 'http://stackoverflow.com/'
9
 search_url = url+'search?'
9
 search_url = url+'search?'
10
+result_xpath = './/div[@class="excerpt"]//text()'
11
+
10
 
12
 
11
 def request(query, params):
13
 def request(query, params):
12
     params['url'] = search_url + urlencode({'q': query})
14
     params['url'] = search_url + urlencode({'q': query})
20
         link = result.xpath('.//div[@class="result-link"]//a')[0]
22
         link = result.xpath('.//div[@class="result-link"]//a')[0]
21
         href = urljoin(url, link.attrib.get('href'))
23
         href = urljoin(url, link.attrib.get('href'))
22
         title = escape(' '.join(link.xpath('.//text()')))
24
         title = escape(' '.join(link.xpath('.//text()')))
23
-        content = escape(' '.join(result.xpath('.//div[@class="excerpt"]//text()')))
25
+        content = escape(' '.join(result.xpath(result_xpath)))
24
         results.append({'url': href, 'title': title, 'content': content})
26
         results.append({'url': href, 'title': title, 'content': content})
25
     return results
27
     return results

+ 2
- 4
searx/engines/startpage.py View File

1
 from urllib import urlencode
1
 from urllib import urlencode
2
 from lxml import html
2
 from lxml import html
3
-from urlparse import urlparse
4
-from cgi import escape
5
 
3
 
6
 base_url = 'https://startpage.com/'
4
 base_url = 'https://startpage.com/'
7
 search_url = base_url+'do/search'
5
 search_url = base_url+'do/search'
8
 
6
 
7
+
9
 def request(query, params):
8
 def request(query, params):
10
     global search_url
9
     global search_url
11
     query = urlencode({'q': query})[2:]
10
     query = urlencode({'q': query})[2:]
20
     results = []
19
     results = []
21
     dom = html.fromstring(resp.content)
20
     dom = html.fromstring(resp.content)
22
     # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
21
     # ads xpath //div[@id="results"]/div[@id="sponsored"]//div[@class="result"]
23
-    # not ads : div[@class="result"] are the direct childs of div[@id="results"]
22
+    # not ads: div[@class="result"] are the direct childs of div[@id="results"]
24
     for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
23
     for result in dom.xpath('//div[@id="results"]/div[@class="result"]'):
25
         link = result.xpath('.//h3/a')[0]
24
         link = result.xpath('.//h3/a')[0]
26
         url = link.attrib.get('href')
25
         url = link.attrib.get('href')
27
-        parsed_url = urlparse(url)
28
         title = link.text_content()
26
         title = link.text_content()
29
         content = result.xpath('./p[@class="desc"]')[0].text_content()
27
         content = result.xpath('./p[@class="desc"]')[0].text_content()
30
         results.append({'url': url, 'title': title, 'content': content})
28
         results.append({'url': url, 'title': title, 'content': content})

+ 8
- 3
searx/engines/twitter.py View File

7
 
7
 
8
 base_url = 'https://twitter.com/'
8
 base_url = 'https://twitter.com/'
9
 search_url = base_url+'search?'
9
 search_url = base_url+'search?'
10
+title_xpath = './/span[@class="username js-action-profile-name"]//text()'
11
+content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
12
+
10
 
13
 
11
 def request(query, params):
14
 def request(query, params):
12
     global search_url
15
     global search_url
21
     for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
24
     for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
22
         link = tweet.xpath('.//small[@class="time"]//a')[0]
25
         link = tweet.xpath('.//small[@class="time"]//a')[0]
23
         url = urljoin(base_url, link.attrib.get('href'))
26
         url = urljoin(base_url, link.attrib.get('href'))
24
-        title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()'))
25
-        content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()')))
26
-        results.append({'url': url, 'title': title, 'content': content})
27
+        title = ''.join(tweet.xpath(title_xpath))
28
+        content = escape(''.join(tweet.xpath(content_xpath)))
29
+        results.append({'url': url,
30
+                        'title': title,
31
+                        'content': content})
27
     return results
32
     return results

+ 15
- 12
searx/engines/vimeo.py View File

5
 
5
 
6
 base_url = 'http://vimeo.com'
6
 base_url = 'http://vimeo.com'
7
 search_url = base_url + '/search?{query}'
7
 search_url = base_url + '/search?{query}'
8
-url_xpath     = None
8
+url_xpath = None
9
 content_xpath = None
9
 content_xpath = None
10
-title_xpath   = None
10
+title_xpath = None
11
 results_xpath = ''
11
 results_xpath = ''
12
+content_tpl = '<a href="{0}">  <img src="{2}"/> </a>'
12
 
13
 
13
-# the cookie set by vimeo contains all the following values, but only __utma seems to be requiered
14
+# the cookie set by vimeo contains all the following values,
15
+# but only __utma seems to be requiered
14
 cookie = {
16
 cookie = {
15
     #'vuid':'918282893.1027205400'
17
     #'vuid':'918282893.1027205400'
16
     # 'ab_bs':'%7B%223%22%3A279%7D'
18
     # 'ab_bs':'%7B%223%22%3A279%7D'
17
-     '__utma':'00000000.000#0000000.0000000000.0000000000.0000000000.0'
19
+     '__utma': '00000000.000#0000000.0000000000.0000000000.0000000000.0'
18
     # '__utmb':'18302654.1.10.1388942090'
20
     # '__utmb':'18302654.1.10.1388942090'
19
     #, '__utmc':'18302654'
21
     #, '__utmc':'18302654'
20
-    #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'
22
+    #, '__utmz':'18#302654.1388942090.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'  # noqa
21
     #, '__utml':'search'
23
     #, '__utml':'search'
22
 }
24
 }
23
 
25
 
26
+
24
 def request(query, params):
27
 def request(query, params):
25
-    params['url'] = search_url.format(query=urlencode({'q' :query}))
28
+    params['url'] = search_url.format(query=urlencode({'q': query}))
26
     params['cookies'] = cookie
29
     params['cookies'] = cookie
27
     return params
30
     return params
28
 
31
 
32
+
29
 def response(resp):
33
 def response(resp):
30
     results = []
34
     results = []
31
     dom = html.fromstring(resp.text)
35
     dom = html.fromstring(resp.text)
36
         url = base_url + result.xpath(url_xpath)[0]
40
         url = base_url + result.xpath(url_xpath)[0]
37
         title = p.unescape(extract_text(result.xpath(title_xpath)))
41
         title = p.unescape(extract_text(result.xpath(title_xpath)))
38
         thumbnail = extract_text(result.xpath(content_xpath)[0])
42
         thumbnail = extract_text(result.xpath(content_xpath)[0])
39
-        content = '<a href="{0}">  <img src="{2}"/> </a>'.format(url, title, thumbnail)
40
-        results.append({'url': url
41
-                        , 'title': title
42
-                        , 'content': content 
43
-                        , 'template':'videos.html'
44
-                        , 'thumbnail': thumbnail})
43
+        results.append({'url': url,
44
+                        'title': title,
45
+                        'content': content_tpl.format(url, title, thumbnail),
46
+                        'template': 'videos.html',
47
+                        'thumbnail': thumbnail})
45
     return results
48
     return results

+ 17
- 12
searx/engines/xpath.py View File

1
 from lxml import html
1
 from lxml import html
2
 from urllib import urlencode, unquote
2
 from urllib import urlencode, unquote
3
 from urlparse import urlparse, urljoin
3
 from urlparse import urlparse, urljoin
4
-from cgi import escape
5
 from lxml.etree import _ElementStringResult
4
 from lxml.etree import _ElementStringResult
5
+from searx.utils import html_to_text
6
 
6
 
7
-search_url    = None
8
-url_xpath     = None
7
+search_url = None
8
+url_xpath = None
9
 content_xpath = None
9
 content_xpath = None
10
-title_xpath   = None
10
+title_xpath = None
11
 suggestion_xpath = ''
11
 suggestion_xpath = ''
12
 results_xpath = ''
12
 results_xpath = ''
13
 
13
 
14
+
14
 '''
15
 '''
15
 if xpath_results is list, extract the text from each result and concat the list
16
 if xpath_results is list, extract the text from each result and concat the list
16
-if xpath_results is a xml element, extract all the text node from it ( text_content() method from lxml )
17
+if xpath_results is a xml element, extract all the text node from it
18
+   ( text_content() method from lxml )
17
 if xpath_results is a string element, then it's already done
19
 if xpath_results is a string element, then it's already done
18
 '''
20
 '''
21
+
22
+
19
 def extract_text(xpath_results):
23
 def extract_text(xpath_results):
20
     if type(xpath_results) == list:
24
     if type(xpath_results) == list:
21
         # it's list of result : concat everything using recursive call
25
         # it's list of result : concat everything using recursive call
30
         return ''.join(xpath_results)
34
         return ''.join(xpath_results)
31
     else:
35
     else:
32
         # it's a element
36
         # it's a element
33
-        return xpath_results.text_content()
37
+        return html_to_text(xpath_results.text_content())
34
 
38
 
35
 
39
 
36
 def extract_url(xpath_results):
40
 def extract_url(xpath_results):
60
         url += '/'
64
         url += '/'
61
 
65
 
62
     # FIXME : hack for yahoo
66
     # FIXME : hack for yahoo
63
-    if parsed_url.hostname == 'search.yahoo.com' and parsed_url.path.startswith('/r'):
67
+    if parsed_url.hostname == 'search.yahoo.com'\
68
+       and parsed_url.path.startswith('/r'):
64
         p = parsed_url.path
69
         p = parsed_url.path
65
         mark = p.find('/**')
70
         mark = p.find('/**')
66
         if mark != -1:
71
         if mark != -1:
82
     if results_xpath:
87
     if results_xpath:
83
         for result in dom.xpath(results_xpath):
88
         for result in dom.xpath(results_xpath):
84
             url = extract_url(result.xpath(url_xpath))
89
             url = extract_url(result.xpath(url_xpath))
85
-            title = extract_text(result.xpath(title_xpath)[0 ])
90
+            title = extract_text(result.xpath(title_xpath)[0])
86
             content = extract_text(result.xpath(content_xpath)[0])
91
             content = extract_text(result.xpath(content_xpath)[0])
87
             results.append({'url': url, 'title': title, 'content': content})
92
             results.append({'url': url, 'title': title, 'content': content})
88
     else:
93
     else:
89
         for url, title, content in zip(
94
         for url, title, content in zip(
90
-            map(extract_url, dom.xpath(url_xpath)), \
91
-            map(extract_text, dom.xpath(title_xpath)), \
92
-            map(extract_text, dom.xpath(content_xpath)), \
93
-                ):
95
+            map(extract_url, dom.xpath(url_xpath)),
96
+            map(extract_text, dom.xpath(title_xpath)),
97
+            map(extract_text, dom.xpath(content_xpath))
98
+        ):
94
             results.append({'url': url, 'title': title, 'content': content})
99
             results.append({'url': url, 'title': title, 'content': content})
95
 
100
 
96
     if not suggestion_xpath:
101
     if not suggestion_xpath:

+ 4
- 2
searx/engines/yacy.py View File

4
 url = 'http://localhost:8090'
4
 url = 'http://localhost:8090'
5
 search_url = '/yacysearch.json?{query}&maximumRecords=10'
5
 search_url = '/yacysearch.json?{query}&maximumRecords=10'
6
 
6
 
7
+
7
 def request(query, params):
8
 def request(query, params):
8
-    params['url'] = url + search_url.format(query=urlencode({'query':query}))
9
+    params['url'] = url + search_url.format(query=urlencode({'query': query}))
9
     return params
10
     return params
10
 
11
 
12
+
11
 def response(resp):
13
 def response(resp):
12
     raw_search_results = loads(resp.text)
14
     raw_search_results = loads(resp.text)
13
 
15
 
25
         tmp_result['content'] = ''
27
         tmp_result['content'] = ''
26
 
28
 
27
         if len(result['description']):
29
         if len(result['description']):
28
-            tmp_result['content'] += result['description'] +"<br/>"
30
+            tmp_result['content'] += result['description'] + "<br/>"
29
 
31
 
30
         if len(result['pubDate']):
32
         if len(result['pubDate']):
31
             tmp_result['content'] += result['pubDate'] + "<br/>"
33
             tmp_result['content'] += result['pubDate'] + "<br/>"

+ 7
- 7
searx/engines/youtube.py View File

5
 
5
 
6
 search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
6
 search_url = 'https://gdata.youtube.com/feeds/api/videos?alt=json&{query}'
7
 
7
 
8
+
8
 def request(query, params):
9
 def request(query, params):
9
     params['url'] = search_url.format(query=urlencode({'q': query}))
10
     params['url'] = search_url.format(query=urlencode({'q': query}))
10
     return params
11
     return params
30
         thumbnail = ''
31
         thumbnail = ''
31
         if len(result['media$group']['media$thumbnail']):
32
         if len(result['media$group']['media$thumbnail']):
32
             thumbnail = result['media$group']['media$thumbnail'][0]['url']
33
             thumbnail = result['media$group']['media$thumbnail'][0]['url']
33
-            content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail)
34
+            content += '<a href="{0}" title="{0}" ><img src="{1}" /></a>'.format(url, thumbnail)  # noqa
34
         if len(content):
35
         if len(content):
35
             content += '<br />' + result['content']['$t']
36
             content += '<br />' + result['content']['$t']
36
         else:
37
         else:
37
             content = result['content']['$t']
38
             content = result['content']['$t']
38
 
39
 
39
-        results.append({'url': url
40
-                        , 'title': title
41
-                        , 'content': content
42
-                        , 'template':'videos.html'
43
-                        , 'thumbnail':thumbnail})
40
+        results.append({'url': url,
41
+                        'title': title,
42
+                        'content': content,
43
+                        'template': 'videos.html',
44
+                        'thumbnail': thumbnail})
44
 
45
 
45
     return results
46
     return results
46
-

+ 111
- 0
searx/settings.yml View File

1
+server:
2
+    port : 8888
3
+    secret_key : "ultrasecretkey" # change this!
4
+    debug : True
5
+    request_timeout : 3.0 # seconds
6
+    base_url: False
7
+
8
+engines:
9
+  - name : wikipedia
10
+    engine : mediawiki
11
+    url    : https://en.wikipedia.org/
12
+    number_of_results : 1
13
+
14
+  - name : bing
15
+    engine : bing
16
+    locale : en-US
17
+
18
+  - name : currency
19
+    engine : currency_convert
20
+    categories : general
21
+
22
+  - name : deviantart
23
+    engine : deviantart
24
+    categories : images
25
+
26
+  - name : ddg definitions
27
+    engine : duckduckgo_definitions
28
+
29
+  - name : duckduckgo
30
+    engine : duckduckgo
31
+    locale : en-us
32
+
33
+  - name : filecrop
34
+    engine : filecrop
35
+    categories : files
36
+
37
+  - name : flickr
38
+    engine : flickr
39
+    categories : images
40
+
41
+  - name : github
42
+    engine : github
43
+    categories : it
44
+
45
+  - name : google
46
+    engine        : json_engine
47
+    search_url    : https://ajax.googleapis.com/ajax/services/search/web?v=2.0&start=0&rsz=large&safe=off&filter=off&q={query}
48
+    categories    : general
49
+    url_query     : /responseData/results/unescapedUrl
50
+    content_query : /responseData/results/content
51
+    title_query   : /responseData/results/titleNoFormatting
52
+
53
+  - name : google images
54
+    engine : google_images
55
+    categories : images
56
+
57
+  - name : piratebay
58
+    engine : piratebay
59
+    categories : videos, music, files
60
+
61
+  - name : soundcloud
62
+    engine : soundcloud
63
+    categories : music
64
+
65
+  - name : stackoverflow
66
+    engine : stackoverflow
67
+    categories : it
68
+
69
+  - name : startpage
70
+    engine : startpage
71
+
72
+  - name : twitter
73
+    engine : twitter
74
+    categories : social media
75
+
76
+  - name : urbandictionary
77
+    engine        : xpath
78
+    search_url    : http://www.urbandictionary.com/define.php?term={query}
79
+    url_xpath     : //div[@class="word"]//a/@href
80
+    title_xpath   : //div[@class="word"]//a
81
+    content_xpath : //div[@class="definition"]
82
+
83
+  - name : yahoo
84
+    engine           : xpath
85
+    search_url       : http://search.yahoo.com/search?p={query}
86
+    results_xpath    : //div[@class="res"]
87
+    url_xpath        : .//h3/a/@href
88
+    title_xpath      : .//h3/a
89
+    content_xpath    : .//div[@class="abstr"]
90
+    suggestion_xpath : //div[@id="satat"]//a
91
+
92
+  - name : youtube
93
+    engine : youtube
94
+    categories : videos
95
+
96
+  - name : dailymotion
97
+    engine : dailymotion
98
+    locale : en_US
99
+    categories : videos
100
+
101
+  - name : vimeo
102
+    engine : vimeo
103
+    categories : videos
104
+    results_xpath : //div[@id="browse_content"]/ol/li
105
+    url_xpath : ./a/@href
106
+    title_xpath : ./a/div[@class="data"]/p[@class="title"]/text()
107
+    content_xpath : ./a/img/@src
108
+
109
+locales:
110
+    en : English
111
+    hu : Magyar

+ 0
- 16
searx/settings_robot.py View File

1
-
2
-port = 11111
3
-
4
-secret_key = "ultrasecretkey" # change this!
5
-
6
-debug = False
7
-
8
-request_timeout = 5.0 # seconds
9
-
10
-weights = {} # 'search_engine_name': float(weight) | default is 1.0
11
-
12
-blacklist = [] # search engine blacklist
13
-
14
-categories = {} # custom search engine categories
15
-
16
-base_url = None # "https://your.domain.tld/" or None (to use request parameters)

settings.yml → searx/settings_robot.yml View File

1
 server:
1
 server:
2
-    port : 8888
2
+    port : 11111
3
     secret_key : "ultrasecretkey" # change this!
3
     secret_key : "ultrasecretkey" # change this!
4
-    debug : True
4
+    debug : False
5
     request_timeout : 3.0 # seconds
5
     request_timeout : 3.0 # seconds
6
     base_url: False
6
     base_url: False
7
 
7
 

+ 4
- 1
searx/static/css/style.css View File

49
 
49
 
50
 input[type="checkbox"] { visibility: hidden; }
50
 input[type="checkbox"] { visibility: hidden; }
51
 
51
 
52
+fieldset { margin: 8px; }
53
+
52
 #categories { margin: 0 10px; }
54
 #categories { margin: 0 10px; }
53
 
55
 
54
 .checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
56
 .checkbox_container { display: inline-block; position: relative; margin: 0 3px; padding: 0px; }
79
 a:visited { color: #7b11be; }
81
 a:visited { color: #7b11be; }
80
 
82
 
81
 .result { margin: 19px 0 18px 0; padding: 0; max-width: 55em;  clear: both; }
83
 .result { margin: 19px 0 18px 0; padding: 0; max-width: 55em;  clear: both; }
82
-.result:hover { background: #e8e7e6; }
83
 .result_title { margin-bottom: 0; }
84
 .result_title { margin-bottom: 0; }
84
 .result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
85
 .result h3 { font-size: 1em; word-wrap:break-word; margin: 5px 0 1px 0; padding: 0 }
85
 .result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
86
 .result .content { font-size: 0.8em; margin: 0; padding: 0; max-width: 54em; word-wrap:break-word; line-height: 1.24; }
201
 
202
 
202
   .result img { max-width: 90%; width: auto; height: auto }
203
   .result img { max-width: 90%; width: auto; height: auto }
203
 }
204
 }
205
+
206
+.favicon { float: left; margin-right: 4px; }

+ 11
- 11
searx/templates/about.html View File

8
     </p>
8
     </p>
9
     <h2>Why use Searx?</h2>
9
     <h2>Why use Searx?</h2>
10
     <ul>
10
     <ul>
11
-        <li>Maybe Searx won’t offer you as personalised results as Google, but it doesn't make a profile about you</li>
12
-        <li>Searx doesn't care about what you search, never shares anything with a third party, and it can't be used to compromise you</li>
13
-        <li>Searx is a free software, the code is 100% open and you can help to make it better. See more on <a href="https://gmail.com/asciimoo/searx">github</a></li>
11
+        <li>Searx may not offer you as personalised results as Google, but it doesn't generate a profile about you</li>
12
+        <li>Searx doesn't care about what you search for, never shares anything with a third party, and it can't be used to compromise you</li>
13
+        <li>Searx is free software, the code is 100% open and you can help to make it better. See more on <a href="https://github.com/asciimoo/searx">github</a></li>
14
     </ul>
14
     </ul>
15
-    <p>If you do care about privacy, want to be a conscious user, moreover believe
15
+    <p>If you do care about privacy, want to be a conscious user, or otherwise believe
16
     in digital freedom, make Searx your default search engine or run it on your own server</p>
16
     in digital freedom, make Searx your default search engine or run it on your own server</p>
17
 
17
 
18
 <h2>Technical details - How does it work?</h2>
18
 <h2>Technical details - How does it work?</h2>
19
 
19
 
20
 <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>,
20
 <p>Searx is a <a href="https://en.wikipedia.org/wiki/Metasearch_engine">metasearch engine</a>,
21
 inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br />
21
 inspired by the <a href="http://seeks-project.info/">seeks project</a>.<br />
22
-It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they don't show up in our logs, neither in your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
23
-Searx can be added to your browser's search bar, moreover it can be set as the default search engine.
22
+It provides basic privacy by mixing your queries with searches on other platforms without storing search data. Queries are made using a POST request on every browser (except chrome*). Therefore they show up in neither our logs, nor your url history. In case of Chrome* users there is an exception, Searx uses the search bar to perform GET requests.<br />
23
+Searx can be added to your browser's search bar; moreover, it can be set as the default search engine.
24
 </p>
24
 </p>
25
 
25
 
26
-<h2>How can I have my own?</h2>
26
+<h2>How can I make it my own?</h2>
27
 
27
 
28
-<p>Searx appreciates your suspicion regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people to have privacy and make the Internet freer!
29
-<br />The more decentralized the Internet is the more freedom we have!</p>
28
+<p>Searx appreciates your concern regarding logs, so take the <a href="https://github.com/asciimoo/searx">code</a> and run it yourself! <br />Add your Searx to this <a href="https://github.com/asciimoo/searx/wiki/Searx-instances">list</a> to help other people reclaim their privacy and make the Internet freer!
29
+<br />The more decentralized the Internet, is the more freedom we have!</p>
30
 
30
 
31
 <hr />
31
 <hr />
32
 
32
 
39
 
39
 
40
 <h3>New engines?</h3>
40
 <h3>New engines?</h3>
41
 <ul>
41
 <ul>
42
-    <li>Edit your engines.cfg, see <a href="https://raw.github.com/asciimoo/searx/master/engines.cfg_sample">sample config</a></li>
42
+    <li>Edit your <a href="https://raw.github.com/asciimoo/searx/master/searx/settings.yml">settings.yml</a></li>
43
     <li>Create your custom engine module, check the <a href="https://github.com/asciimoo/searx/blob/master/examples/basic_engine.py">example engine</a></li>
43
     <li>Create your custom engine module, check the <a href="https://github.com/asciimoo/searx/blob/master/examples/basic_engine.py">example engine</a></li>
44
 </ul>
44
 </ul>
45
 <p>Don't forget to restart searx after config edit!</p>
45
 <p>Don't forget to restart searx after config edit!</p>
48
 <p>See the <a href="https://github.com/asciimoo/searx/wiki/Installation">installation and setup</a> wiki page</p>
48
 <p>See the <a href="https://github.com/asciimoo/searx/wiki/Installation">installation and setup</a> wiki page</p>
49
 
49
 
50
 <h3>How to debug engines?</h3>
50
 <h3>How to debug engines?</h3>
51
-<p><a href="/stats">Stats page</a> contains some useful data about the used engines.</p>
51
+<p><a href="/stats">Stats page</a> contains some useful data about the engines used.</p>
52
 
52
 
53
 </div>
53
 </div>
54
 {% endblock %}
54
 {% endblock %}

+ 1
- 1
searx/templates/categories.html View File

1
 <div id="categories">
1
 <div id="categories">
2
 {% for category in categories %}
2
 {% for category in categories %}
3
     <div class="checkbox_container">
3
     <div class="checkbox_container">
4
-        <input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ category }}</label>
4
+        <input type="checkbox" id="checkbox_{{ category|replace(' ', '_') }}" name="category_{{ category }}" {% if category in selected_categories %}checked="checked"{% endif %} /><label for="checkbox_{{ category|replace(' ', '_') }}">{{ _(category) }}</label>
5
     </div>
5
     </div>
6
 {% endfor %}
6
 {% endfor %}
7
 </div>
7
 </div>

+ 4
- 5
searx/templates/engines.html View File

1
 {% extends 'base.html' %}
1
 {% extends 'base.html' %}
2
 {% block content %}
2
 {% block content %}
3
 <div class="row">
3
 <div class="row">
4
-<h2>Currently used search engines</h2>
4
+    <h2>{{ _('Currently used search engines') }}</h2>
5
 
5
 
6
     <table style="width: 80%;">
6
     <table style="width: 80%;">
7
         <tr>
7
         <tr>
8
-            <th>Engine name</th>
9
-            <th>Category</th>
8
+            <th>{{ _('Engine name') }}</th>
9
+            <th>{{ _('Category') }}</th>
10
         </tr>
10
         </tr>
11
     {% for (categ,search_engines) in categs %}
11
     {% for (categ,search_engines) in categs %}
12
         {% for search_engine in search_engines %}
12
         {% for search_engine in search_engines %}
20
         {% endfor %}
20
         {% endfor %}
21
     {% endfor %}
21
     {% endfor %}
22
     </table>
22
     </table>
23
-<p>Please add more engines to this list, pull requests are welcome!</p>
24
-<p class="right"><a href="/">back</a></p>
23
+<p class="right"><a href="/">{{ _('back') }}</a></p>
25
 </div>
24
 </div>
26
 {% endblock %}
25
 {% endblock %}

+ 2
- 2
searx/templates/index.html View File

4
     <div class="title"><h1>searx</h1></div>
4
     <div class="title"><h1>searx</h1></div>
5
     {% include 'search.html' %}
5
     {% include 'search.html' %}
6
     <p class="top_margin">
6
     <p class="top_margin">
7
-        <a href="/about" class="hmarg">about</a>
8
-        <a href="/preferences" class="hmarg">preferences</a>
7
+        <a href="/about" class="hmarg">{{ _('about') }}</a>
8
+        <a href="/preferences" class="hmarg">{{ _('preferences') }}</a>
9
     </p>
9
     </p>
10
 </div>
10
 </div>
11
 {% endblock %}
11
 {% endblock %}

+ 16
- 6
searx/templates/preferences.html View File

2
 {% block head %} {% endblock %}
2
 {% block head %} {% endblock %}
3
 {% block content %}
3
 {% block content %}
4
 <div class="row">
4
 <div class="row">
5
-    <h2>Preferences</h2>
5
+    <h2>{{ _('Preferences') }}</h2>
6
 
6
 
7
 
7
 
8
+    <form method="post" action="/preferences" id="search_form">
8
     <fieldset>
9
     <fieldset>
9
-        <legend>Default categories</legend>
10
-        <form method="post" action="/preferences" id="search_form">
10
+        <legend>{{ _('Default categories') }}</legend>
11
         <p>
11
         <p>
12
         {% include 'categories.html' %}
12
         {% include 'categories.html' %}
13
         </p>
13
         </p>
14
-        <input type="submit" value="save" />
15
-        </form>
16
     </fieldset>
14
     </fieldset>
17
-    <div class="right"><a href="/">back</a></div>
15
+    <fieldset>
16
+        <legend>{{ _('Interface language') }}</legend>
17
+        <p>
18
+        <select name='locale'>
19
+            {% for locale_id,locale_name in locales.items() %}
20
+            <option value={{ locale_id }} {% if locale_id == current_locale %}selected="selected"{% endif %}>{{ locale_name}}</option>
21
+            {% endfor %}
22
+        </select>
23
+        </p>
24
+    </fieldset>
25
+    <input type="submit" value="{{ _('save') }}" />
26
+    </form>
27
+    <div class="right"><a href="/">{{ _('back') }}</a></div>
18
 </div>
28
 </div>
19
 {% endblock %}
29
 {% endblock %}

+ 2
- 4
searx/templates/result_templates/default.html View File

1
 <div class="result {{ result.class }}">
1
 <div class="result {{ result.class }}">
2
 
2
 
3
   {% if result['favicon'] %}
3
   {% if result['favicon'] %}
4
-  <div style="float:left; margin:2px;">
5
-    <img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
6
-  </div>
4
+    <img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
7
   {% endif %}
5
   {% endif %}
8
 
6
 
9
   <div>
7
   <div>
10
-    <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3></br>
8
+    <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
11
     <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
9
     <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p>
12
     <p class="url">{{ result.pretty_url }}</p>
10
     <p class="url">{{ result.pretty_url }}</p>
13
   </div>
11
   </div>

+ 2
- 4
searx/templates/result_templates/videos.html View File

1
 <div class="result">
1
 <div class="result">
2
   {% if result['favicon'] %}
2
   {% if result['favicon'] %}
3
-  <div style="float:left; margin:2px;">
4
-    <img width="18" height="18" src="static/img/icon_{{result['favicon']}}.ico" alt="{{result['favicon']}}.ico" title="{{result['favicon']}}.ico" />
5
-  </div>
3
+    <img width="14" height="14" class="favicon" src="static/img/icon_{{result['favicon']}}.ico" />
6
   {% endif %}
4
   {% endif %}
7
 
5
 
8
     <p>
6
     <p>
9
       <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
7
       <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3>
10
-      <a href="{{ result.url }}"><img width="300" height="170"  src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
8
+      <a href="{{ result.url }}"><img width="400px" src="{{ result.thumbnail }}" title={{ result.title }} alt=" {{ result.title }}"/></a>
11
       <p class="url">{{ result.url }}</p>
9
       <p class="url">{{ result.url }}</p>
12
     </p>
10
     </p>
13
 </div>
11
 </div>

+ 4
- 4
searx/templates/results.html View File

7
 </div>
7
 </div>
8
 <div id="results">
8
 <div id="results">
9
     {% if suggestions %}
9
     {% if suggestions %}
10
-    <div id="suggestions"><span>Suggestions: </span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
10
+    <div id="suggestions"><span>{{ _('Suggestions') }}:</span>{% for suggestion in suggestions %}<form method="post" action="/"><input type="hidden" name="q" value="{{suggestion}}"><input type="submit" value="{{ suggestion }}" /></form>{% endfor %}</div>
11
     {% endif %}
11
     {% endif %}
12
-    
12
+
13
 
13
 
14
     <div id ="result_count">
14
     <div id ="result_count">
15
-        Number of results: {{ number_of_results }}
15
+        {{ _('Number of results') }}: {{ number_of_results }}
16
     </div>
16
     </div>
17
 
17
 
18
     {% for result in results %}
18
     {% for result in results %}
23
         {% endif %}
23
         {% endif %}
24
     {% endfor %}
24
     {% endfor %}
25
     <div id="apis">
25
     <div id="apis">
26
-      Download results
26
+      {{ _('Download results') }}
27
       <form method="post" action="/">
27
       <form method="post" action="/">
28
         <div class="left">
28
         <div class="left">
29
           <input type="hidden" name="q" value="{{ q }}" />
29
           <input type="hidden" name="q" value="{{ q }}" />

+ 1
- 1
searx/templates/stats.html View File

1
 {% extends "base.html" %}
1
 {% extends "base.html" %}
2
 {% block head %} {% endblock %}
2
 {% block head %} {% endblock %}
3
 {% block content %}
3
 {% block content %}
4
-<h2>Engine stats</h2>
4
+<h2>{{ _('Engine stats') }}</h2>
5
 
5
 
6
 {% for stat_name,stat_category in stats %}
6
 {% for stat_name,stat_category in stats %}
7
 <div class="left">
7
 <div class="left">

+ 16
- 3
searx/testing.py View File

7
 
7
 
8
 import os
8
 import os
9
 import subprocess
9
 import subprocess
10
-import sys
11
 
10
 
12
 
11
 
13
 class SearxTestLayer:
12
 class SearxTestLayer:
13
+    """Base layer for non-robot tests."""
14
 
14
 
15
     __name__ = u'SearxTestLayer'
15
     __name__ = u'SearxTestLayer'
16
 
16
 
36
 
36
 
37
     def setUp(self):
37
     def setUp(self):
38
         os.setpgrp()  # create new process group, become its leader
38
         os.setpgrp()  # create new process group, become its leader
39
+
40
+        # get program paths
39
         webapp = os.path.join(
41
         webapp = os.path.join(
40
             os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
42
             os.path.abspath(os.path.dirname(os.path.realpath(__file__))),
41
             'webapp.py'
43
             'webapp.py'
42
         )
44
         )
43
         exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
45
         exe = os.path.abspath(os.path.dirname(__file__) + '/../bin/py')
46
+
47
+        # set robot settings path
48
+        os.environ['SEARX_SETTINGS_PATH'] = os.path.abspath(
49
+            os.path.dirname(__file__) + '/settings_robot.yml')
50
+
51
+        # run the server
44
         self.server = subprocess.Popen(
52
         self.server = subprocess.Popen(
45
-            [exe, webapp, 'settings_robot'],
53
+            [exe, webapp],
46
             stdout=subprocess.PIPE,
54
             stdout=subprocess.PIPE,
47
             stderr=subprocess.STDOUT
55
             stderr=subprocess.STDOUT
48
         )
56
         )
49
 
57
 
50
     def tearDown(self):
58
     def tearDown(self):
51
-        # TERM all processes in my group
59
+        # send TERM signal to all processes in my group, to stop subprocesses
52
         os.killpg(os.getpgid(self.server.pid), 15)
60
         os.killpg(os.getpgid(self.server.pid), 15)
53
 
61
 
62
+        # remove previously set environment variable
63
+        del os.environ['SEARX_SETTINGS_PATH']
64
+
54
 
65
 
55
 SEARXROBOTLAYER = SearxRobotLayer()
66
 SEARXROBOTLAYER = SearxRobotLayer()
56
 
67
 
57
 
68
 
58
 class SearxTestCase(TestCase):
69
 class SearxTestCase(TestCase):
70
+    """Base test case for non-robot tests."""
71
+
59
     layer = SearxTestLayer
72
     layer = SearxTestLayer

BIN
searx/translations/hu/LC_MESSAGES/messages.mo View File


+ 115
- 0
searx/translations/hu/LC_MESSAGES/messages.po View File

1
+# Hungarian translations for PROJECT.
2
+# Copyright (C) 2014 ORGANIZATION
3
+# This file is distributed under the same license as the PROJECT project.
4
+# FIRST AUTHOR <EMAIL@ADDRESS>, 2014.
5
+#
6
+msgid ""
7
+msgstr ""
8
+"Project-Id-Version: PROJECT VERSION\n"
9
+"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n"
10
+"POT-Creation-Date: 2014-01-22 00:55+0100\n"
11
+"PO-Revision-Date: 2014-01-21 23:33+0100\n"
12
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
13
+"Language-Team: hu <LL@li.org>\n"
14
+"Plural-Forms: nplurals=1; plural=0\n"
15
+"MIME-Version: 1.0\n"
16
+"Content-Type: text/plain; charset=utf-8\n"
17
+"Content-Transfer-Encoding: 8bit\n"
18
+"Generated-By: Babel 1.3\n"
19
+
20
+#: searx/engines/__init__.py:274
21
+msgid "Page loads (sec)"
22
+msgstr "Válaszidők (sec)"
23
+
24
+#: searx/engines/__init__.py:278 searx/templates/results.html:15
25
+msgid "Number of results"
26
+msgstr "Találatok száma"
27
+
28
+#: searx/engines/__init__.py:282
29
+msgid "Scores"
30
+msgstr "Pontszámok"
31
+
32
+#: searx/engines/__init__.py:286
33
+msgid "Scores per result"
34
+msgstr "Pontszámok találatonként"
35
+
36
+#: searx/engines/__init__.py:290
37
+msgid "Errors"
38
+msgstr "Hibák"
39
+
40
+#: searx/templates/engines.html:4
41
+msgid "Currently used search engines"
42
+msgstr "Jelenleg használt keresők"
43
+
44
+#: searx/templates/engines.html:8
45
+msgid "Engine name"
46
+msgstr "Kereső neve"
47
+
48
+#: searx/templates/engines.html:9
49
+msgid "Category"
50
+msgstr "Kategória"
51
+
52
+#: searx/templates/engines.html:23 searx/templates/preferences.html:27
53
+msgid "back"
54
+msgstr "vissza"
55
+
56
+#: searx/templates/index.html:7
57
+msgid "about"
58
+msgstr "rólunk"
59
+
60
+#: searx/templates/index.html:8
61
+msgid "preferences"
62
+msgstr "beállítások"
63
+
64
+#: searx/templates/preferences.html:5
65
+msgid "Preferences"
66
+msgstr "Beállítások"
67
+
68
+#: searx/templates/preferences.html:10
69
+msgid "Default categories"
70
+msgstr "Alapértelmezett kategóriák"
71
+
72
+#: searx/templates/preferences.html:16
73
+msgid "Interface language"
74
+msgstr "Nyelv"
75
+
76
+#: searx/templates/preferences.html:25
77
+msgid "save"
78
+msgstr "mentés"
79
+
80
+#: searx/templates/results.html:10
81
+msgid "Suggestions"
82
+msgstr "Javaslatok"
83
+
84
+#: searx/templates/results.html:26
85
+msgid "Download results"
86
+msgstr "Találatok letöltése"
87
+
88
+#: searx/templates/stats.html:4
89
+msgid "Engine stats"
90
+msgstr "Kereső statisztikák"
91
+
92
+# categories - manually added
93
+# TODO - automatically add
94
+
95
+msgid "files"
96
+msgstr "fájlok"
97
+
98
+msgid "general"
99
+msgstr "általános"
100
+
101
+msgid "music"
102
+msgstr "zene"
103
+
104
+msgid "social media"
105
+msgstr "közösségi média"
106
+
107
+msgid "images"
108
+msgstr "képek"
109
+
110
+msgid "videos"
111
+msgstr "videók"
112
+
113
+msgid "it"
114
+msgstr "it"
115
+

+ 20
- 6
searx/utils.py View File

1
 from HTMLParser import HTMLParser
1
 from HTMLParser import HTMLParser
2
 #import htmlentitydefs
2
 #import htmlentitydefs
3
 import csv
3
 import csv
4
-import codecs
4
+from codecs import getincrementalencoder
5
 import cStringIO
5
 import cStringIO
6
 import re
6
 import re
7
 
7
 
8
+
8
 def gen_useragent():
9
 def gen_useragent():
9
     # TODO
10
     # TODO
10
-    return "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
11
+    ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0"
12
+    return ua
13
+
11
 
14
 
12
 def highlight_content(content, query):
15
 def highlight_content(content, query):
13
 
16
 
34
 
37
 
35
     return content
38
     return content
36
 
39
 
40
+
37
 class HTMLTextExtractor(HTMLParser):
41
 class HTMLTextExtractor(HTMLParser):
38
     def __init__(self):
42
     def __init__(self):
39
         HTMLParser.__init__(self)
43
         HTMLParser.__init__(self)
40
-        self.result = [ ]
44
+        self.result = []
41
 
45
 
42
     def handle_data(self, d):
46
     def handle_data(self, d):
43
         self.result.append(d)
47
         self.result.append(d)
44
 
48
 
45
     def handle_charref(self, number):
49
     def handle_charref(self, number):
46
-        codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
50
+        if number[0] in (u'x', u'X'):
51
+            codepoint = int(number[1:], 16)
52
+        else:
53
+            codepoint = int(number)
47
         self.result.append(unichr(codepoint))
54
         self.result.append(unichr(codepoint))
48
 
55
 
49
     def handle_entityref(self, name):
56
     def handle_entityref(self, name):
54
     def get_text(self):
61
     def get_text(self):
55
         return u''.join(self.result)
62
         return u''.join(self.result)
56
 
63
 
64
+
57
 def html_to_text(html):
65
 def html_to_text(html):
58
     s = HTMLTextExtractor()
66
     s = HTMLTextExtractor()
59
     s.feed(html)
67
     s.feed(html)
71
         self.queue = cStringIO.StringIO()
79
         self.queue = cStringIO.StringIO()
72
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
80
         self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
73
         self.stream = f
81
         self.stream = f
74
-        self.encoder = codecs.getincrementalencoder(encoding)()
82
+        self.encoder = getincrementalencoder(encoding)()
75
 
83
 
76
     def writerow(self, row):
84
     def writerow(self, row):
77
-        self.writer.writerow([(s.encode("utf-8").strip() if type(s) == str or type(s) == unicode else str(s)) for s in row])
85
+        unicode_row = []
86
+        for col in row:
87
+            if type(col) == str or type(col) == unicode:
88
+                unicode_row.append(col.encode('utf-8').strip())
89
+            else:
90
+                unicode_row.append(col)
91
+        self.writer.writerow(unicode_row)
78
         # Fetch UTF-8 output from the queue ...
92
         # Fetch UTF-8 output from the queue ...
79
         data = self.queue.getvalue()
93
         data = self.queue.getvalue()
80
         data = data.decode("utf-8")
94
         data = data.decode("utf-8")

+ 104
- 46
searx/webapp.py View File

17
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
17
 (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
18
 '''
18
 '''
19
 
19
 
20
+import json
21
+import cStringIO
20
 import os
22
 import os
21
-import sys
22
-if __name__ == "__main__":
23
-    sys.path.append(os.path.realpath(os.path.dirname(os.path.realpath(__file__))+'/../'))
24
 
23
 
25
-from searx import settings
24
+from flask import Flask, request, render_template
25
+from flask import url_for, Response, make_response, redirect
26
+from flask import send_from_directory
26
 
27
 
27
-from flask import Flask, request, render_template, url_for, Response, make_response, redirect
28
+from searx import settings
28
 from searx.engines import search, categories, engines, get_engines_stats
29
 from searx.engines import search, categories, engines, get_engines_stats
29
-import json
30
-import cStringIO
31
 from searx.utils import UnicodeWriter
30
 from searx.utils import UnicodeWriter
32
-from flask import send_from_directory
33
 from searx.utils import highlight_content, html_to_text
31
 from searx.utils import highlight_content, html_to_text
34
 
32
 
33
+from flask.ext.babel import Babel
34
+
35
 
35
 
36
+app = Flask(
37
+    __name__,
38
+    static_folder=os.path.join(os.path.dirname(__file__), 'static'),
39
+    template_folder=os.path.join(os.path.dirname(__file__), 'templates')
40
+)
36
 
41
 
37
-app = Flask(__name__)
38
 app.secret_key = settings['server']['secret_key']
42
 app.secret_key = settings['server']['secret_key']
39
 
43
 
44
+babel = Babel(app)
45
+
46
+#TODO configurable via settings.yml
47
+favicons = ['wikipedia', 'youtube', 'vimeo', 'soundcloud',
48
+            'twitter', 'stackoverflow', 'github']
49
+
40
 
50
 
41
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
51
 opensearch_xml = '''<?xml version="1.0" encoding="utf-8"?>
42
 <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
52
 <OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
51
 '''
61
 '''
52
 
62
 
53
 
63
 
64
+@babel.localeselector
65
+def get_locale():
66
+    locale = request.accept_languages.best_match(settings['locales'].keys())
67
+
68
+    if request.cookies.get('locale', '') in settings['locales']:
69
+        locale = request.cookies.get('locale', '')
70
+
71
+    if 'locale' in request.args\
72
+       and request.args['locale'] in settings['locales']:
73
+        locale = request.args['locale']
74
+
75
+    if 'locale' in request.form\
76
+       and request.form['locale'] in settings['locales']:
77
+        locale = request.form['locale']
78
+
79
+    return locale
80
+
81
+
54
 def get_base_url():
82
 def get_base_url():
55
     if settings['server']['base_url']:
83
     if settings['server']['base_url']:
56
         hostname = settings['server']['base_url']
84
         hostname = settings['server']['base_url']
65
 def render(template_name, **kwargs):
93
 def render(template_name, **kwargs):
66
     global categories
94
     global categories
67
     kwargs['categories'] = ['general']
95
     kwargs['categories'] = ['general']
68
-    kwargs['categories'].extend(x for x in sorted(categories.keys()) if x != 'general')
96
+    kwargs['categories'].extend(x for x in
97
+                                sorted(categories.keys()) if x != 'general')
69
     if not 'selected_categories' in kwargs:
98
     if not 'selected_categories' in kwargs:
70
         kwargs['selected_categories'] = []
99
         kwargs['selected_categories'] = []
71
         cookie_categories = request.cookies.get('categories', '').split(',')
100
         cookie_categories = request.cookies.get('categories', '').split(',')
76
             kwargs['selected_categories'] = ['general']
105
             kwargs['selected_categories'] = ['general']
77
     return render_template(template_name, **kwargs)
106
     return render_template(template_name, **kwargs)
78
 
107
 
108
+
79
 def parse_query(query):
109
 def parse_query(query):
80
     query_engines = []
110
     query_engines = []
81
     query_parts = query.split()
111
     query_parts = query.split()
89
 def index():
119
 def index():
90
     global categories
120
     global categories
91
 
121
 
92
-    if request.method=='POST':
122
+    if request.method == 'POST':
93
         request_data = request.form
123
         request_data = request.form
94
     else:
124
     else:
95
         request_data = request.args
125
         request_data = request.args
101
     query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
131
     query, selected_engines = parse_query(request_data['q'].encode('utf-8'))
102
 
132
 
103
     if not len(selected_engines):
133
     if not len(selected_engines):
104
-        for pd_name,pd in request_data.items():
134
+        for pd_name, pd in request_data.items():
105
             if pd_name.startswith('category_'):
135
             if pd_name.startswith('category_'):
106
                 category = pd_name[9:]
136
                 category = pd_name[9:]
107
                 if not category in categories:
137
                 if not category in categories:
108
                     continue
138
                     continue
109
                 selected_categories.append(category)
139
                 selected_categories.append(category)
110
         if not len(selected_categories):
140
         if not len(selected_categories):
111
-            cookie_categories = request.cookies.get('categories', '').split(',')
141
+            cookie_categories = request.cookies.get('categories', '')
142
+            cookie_categories = cookie_categories.split(',')
112
             for ccateg in cookie_categories:
143
             for ccateg in cookie_categories:
113
                 if ccateg in categories:
144
                 if ccateg in categories:
114
                     selected_categories.append(ccateg)
145
                     selected_categories.append(ccateg)
116
             selected_categories = ['general']
147
             selected_categories = ['general']
117
 
148
 
118
         for categ in selected_categories:
149
         for categ in selected_categories:
119
-            selected_engines.extend({'category': categ, 'name': x.name} for x in categories[categ])
150
+            selected_engines.extend({'category': categ,
151
+                                     'name': x.name}
152
+                                    for x in categories[categ])
120
 
153
 
121
     results, suggestions = search(query, request, selected_engines)
154
     results, suggestions = search(query, request, selected_engines)
122
 
155
 
131
                 result['content'] = html_to_text(result['content']).strip()
164
                 result['content'] = html_to_text(result['content']).strip()
132
             result['title'] = html_to_text(result['title']).strip()
165
             result['title'] = html_to_text(result['title']).strip()
133
         if len(result['url']) > 74:
166
         if len(result['url']) > 74:
134
-            result['pretty_url'] = result['url'][:35] + '[..]' + result['url'][-35:]
167
+            url_parts = result['url'][:35], result['url'][-35:]
168
+            result['pretty_url'] = '{0}[...]{1}'.format(*url_parts)
135
         else:
169
         else:
136
             result['pretty_url'] = result['url']
170
             result['pretty_url'] = result['url']
137
 
171
 
138
         for engine in result['engines']:
172
         for engine in result['engines']:
139
-            if engine in ['wikipedia', 'youtube', 'vimeo', 'soundcloud', 'twitter', 'stackoverflow', 'github']:
173
+            if engine in favicons:
140
                 result['favicon'] = engine
174
                 result['favicon'] = engine
141
 
175
 
142
     if request_data.get('format') == 'json':
176
     if request_data.get('format') == 'json':
143
-        return Response(json.dumps({'query': query, 'results': results}), mimetype='application/json')
177
+        return Response(json.dumps({'query': query, 'results': results}),
178
+                        mimetype='application/json')
144
     elif request_data.get('format') == 'csv':
179
     elif request_data.get('format') == 'csv':
145
         csv = UnicodeWriter(cStringIO.StringIO())
180
         csv = UnicodeWriter(cStringIO.StringIO())
146
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
181
         keys = ('title', 'url', 'content', 'host', 'engine', 'score')
151
                 csv.writerow([row.get(key, '') for key in keys])
186
                 csv.writerow([row.get(key, '') for key in keys])
152
         csv.stream.seek(0)
187
         csv.stream.seek(0)
153
         response = Response(csv.stream.read(), mimetype='application/csv')
188
         response = Response(csv.stream.read(), mimetype='application/csv')
154
-        response.headers.add('Content-Disposition', 'attachment;Filename=searx_-_{0}.csv'.format('_'.join(query.split())))
189
+        content_disp = 'attachment;Filename=searx_-_{0}.csv'.format(query)
190
+        response.headers.add('Content-Disposition', content_disp)
155
         return response
191
         return response
156
     elif request_data.get('format') == 'rss':
192
     elif request_data.get('format') == 'rss':
157
-        response_rss = render('opensearch_response_rss.xml'
158
-                              ,results=results
159
-                              ,q=request_data['q']
160
-                              ,number_of_results=len(results)
161
-                              ,base_url=get_base_url()
162
-                              )
193
+        response_rss = render(
194
+            'opensearch_response_rss.xml',
195
+            results=results,
196
+            q=request_data['q'],
197
+            number_of_results=len(results),
198
+            base_url=get_base_url()
199
+        )
163
         return Response(response_rss, mimetype='text/xml')
200
         return Response(response_rss, mimetype='text/xml')
164
 
201
 
165
-
166
-    return render('results.html'
167
-                 ,results=results
168
-                 ,q=request_data['q']
169
-                 ,selected_categories=selected_categories
170
-                 ,number_of_results=len(results)+len(featured_results)
171
-                 ,featured_results=featured_results 
172
-                 ,suggestions=suggestions
173
-                 )
202
+    return render(
203
+        'results.html',
204
+        results=results,
205
+        q=request_data['q'],
206
+        selected_categories=selected_categories,
207
+        number_of_results=len(results) + len(featured_results),
208
+        featured_results=featured_results,
209
+        suggestions=suggestions
210
+    )
174
 
211
 
175
 
212
 
176
 @app.route('/about', methods=['GET'])
213
 @app.route('/about', methods=['GET'])
187
 @app.route('/preferences', methods=['GET', 'POST'])
224
 @app.route('/preferences', methods=['GET', 'POST'])
188
 def preferences():
225
 def preferences():
189
 
226
 
190
-    if request.method=='POST':
227
+    if request.method == 'POST':
191
         selected_categories = []
228
         selected_categories = []
192
-        for pd_name,pd in request.form.items():
229
+        locale = None
230
+        for pd_name, pd in request.form.items():
193
             if pd_name.startswith('category_'):
231
             if pd_name.startswith('category_'):
194
                 category = pd_name[9:]
232
                 category = pd_name[9:]
195
                 if not category in categories:
233
                 if not category in categories:
196
                     continue
234
                     continue
197
                 selected_categories.append(category)
235
                 selected_categories.append(category)
236
+            elif pd_name == 'locale' and pd in settings['locales']:
237
+                locale = pd
238
+
239
+        resp = make_response(redirect('/'))
240
+
241
+        if locale:
242
+            # cookie max age: 4 weeks
243
+            resp.set_cookie(
244
+                'locale', locale,
245
+                max_age=60 * 60 * 24 * 7 * 4
246
+            )
247
+
198
         if selected_categories:
248
         if selected_categories:
199
-            resp = make_response(redirect('/'))
200
             # cookie max age: 4 weeks
249
             # cookie max age: 4 weeks
201
-            resp.set_cookie('categories', ','.join(selected_categories), max_age=60*60*24*7*4)
202
-            return resp
203
-    return render('preferences.html')
250
+            resp.set_cookie(
251
+                'categories', ','.join(selected_categories),
252
+                max_age=60 * 60 * 24 * 7 * 4
253
+            )
254
+        return resp
255
+    return render('preferences.html',
256
+                  locales=settings['locales'],
257
+                  current_locale=get_locale())
204
 
258
 
205
 
259
 
206
 @app.route('/stats', methods=['GET'])
260
 @app.route('/stats', methods=['GET'])
216
 Allow: /
270
 Allow: /
217
 Allow: /about
271
 Allow: /about
218
 Disallow: /stats
272
 Disallow: /stats
273
+Disallow: /engines
219
 """, mimetype='text/plain')
274
 """, mimetype='text/plain')
220
 
275
 
221
 
276
 
229
     base_url = get_base_url()
284
     base_url = get_base_url()
230
     ret = opensearch_xml.format(method=method, host=base_url)
285
     ret = opensearch_xml.format(method=method, host=base_url)
231
     resp = Response(response=ret,
286
     resp = Response(response=ret,
232
-                status=200,
233
-                mimetype="application/xml")
287
+                    status=200,
288
+                    mimetype="application/xml")
234
     return resp
289
     return resp
235
 
290
 
291
+
236
 @app.route('/favicon.ico')
292
 @app.route('/favicon.ico')
237
 def favicon():
293
 def favicon():
238
     return send_from_directory(os.path.join(app.root_path, 'static/img'),
294
     return send_from_directory(os.path.join(app.root_path, 'static/img'),
239
-                               'favicon.png', mimetype='image/vnd.microsoft.icon')
295
+                               'favicon.png',
296
+                               mimetype='image/vnd.microsoft.icon')
240
 
297
 
241
 
298
 
242
 def run():
299
 def run():
243
     from gevent import monkey
300
     from gevent import monkey
244
     monkey.patch_all()
301
     monkey.patch_all()
245
 
302
 
246
-    app.run(debug        = settings['server']['debug']
247
-           ,use_debugger = settings['server']['debug']
248
-           ,port         = settings['server']['port']
249
-           )
303
+    app.run(
304
+        debug=settings['server']['debug'],
305
+        use_debugger=settings['server']['debug'],
306
+        port=settings['server']['port']
307
+    )
250
 
308
 
251
 
309
 
252
 if __name__ == "__main__":
310
 if __name__ == "__main__":

+ 20
- 3
setup.py View File

11
     return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
11
     return open(os.path.join(os.path.dirname(__file__), *rnames)).read()
12
 
12
 
13
 
13
 
14
-long_description = read('README.md')
14
+long_description = read('README.rst')
15
 
15
 
16
 setup(
16
 setup(
17
     name='searx',
17
     name='searx',
18
-    version="0.1",
19
-    description="",
18
+    version="0.1.2",
19
+    description="A privacy-respecting, hackable metasearch engine",
20
     long_description=long_description,
20
     long_description=long_description,
21
     classifiers=[
21
     classifiers=[
22
         "Programming Language :: Python",
22
         "Programming Language :: Python",
30
     zip_safe=False,
30
     zip_safe=False,
31
     install_requires=[
31
     install_requires=[
32
         'flask',
32
         'flask',
33
+        'flask-babel',
33
         'grequests',
34
         'grequests',
34
         'lxml',
35
         'lxml',
35
         'pyyaml',
36
         'pyyaml',
49
             'zope.testrunner',
50
             'zope.testrunner',
50
         ]
51
         ]
51
     },
52
     },
53
+    entry_points={
54
+        'console_scripts': [
55
+            'searx-run = searx.webapp:run'
56
+        ]
57
+    },
58
+    package_data={
59
+        'searx': [
60
+            'settings.yml',
61
+            '../README.rst',
62
+            'static/*/*',
63
+            'translations/*/*',
64
+            'templates/*.html',
65
+            'templates/result_templates/*.html',
66
+        ],
67
+    },
68
+
52
 )
69
 )

+ 1
- 0
versions.cfg View File

1
 [versions]
1
 [versions]
2
 Flask = 0.10.1
2
 Flask = 0.10.1
3
+Flask-Babel = 0.9
3
 Jinja2 = 2.7.2
4
 Jinja2 = 2.7.2
4
 MarkupSafe = 0.18
5
 MarkupSafe = 0.18
5
 WebOb = 1.3.1
6
 WebOb = 1.3.1