__init__.py 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. '''
  2. searx is free software: you can redistribute it and/or modify
  3. it under the terms of the GNU Affero General Public License as published by
  4. the Free Software Foundation, either version 3 of the License, or
  5. (at your option) any later version.
  6. searx is distributed in the hope that it will be useful,
  7. but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. GNU Affero General Public License for more details.
  10. You should have received a copy of the GNU Affero General Public License
  11. along with searx. If not, see < http://www.gnu.org/licenses/ >.
  12. (C) 2013- by Adam Tauber, <asciimoo@gmail.com>
  13. '''
  14. from os.path import realpath, dirname
  15. import sys
  16. from flask_babel import gettext
  17. from operator import itemgetter
  18. from json import loads
  19. from requests import get
  20. from searx import settings
  21. from searx import logger
  22. from searx.utils import load_module
  23. logger = logger.getChild('engines')
  24. engine_dir = dirname(realpath(__file__))
  25. engines = {}
  26. categories = {'general': []}
  27. _initialized = False
  28. engine_shortcuts = {}
  29. engine_default_args = {'paging': False,
  30. 'categories': ['general'],
  31. 'language_support': True,
  32. 'supported_languages': [],
  33. 'safesearch': False,
  34. 'timeout': settings['outgoing']['request_timeout'],
  35. 'shortcut': '-',
  36. 'disabled': False,
  37. 'suspend_end_time': 0,
  38. 'continuous_errors': 0,
  39. 'time_range_support': False}
  40. def load_engine(engine_data):
  41. if '_' in engine_data['name']:
  42. logger.error('Engine name conains underscore: "{}"'.format(engine_data['name']))
  43. sys.exit(1)
  44. engine_module = engine_data['engine']
  45. try:
  46. engine = load_module(engine_module + '.py', engine_dir)
  47. except:
  48. logger.exception('Cannot load engine "{}"'.format(engine_module))
  49. return None
  50. for param_name in engine_data:
  51. if param_name == 'engine':
  52. continue
  53. if param_name == 'categories':
  54. if engine_data['categories'] == 'none':
  55. engine.categories = []
  56. else:
  57. engine.categories = map(
  58. str.strip, engine_data['categories'].split(','))
  59. continue
  60. setattr(engine, param_name, engine_data[param_name])
  61. for arg_name, arg_value in engine_default_args.iteritems():
  62. if not hasattr(engine, arg_name):
  63. setattr(engine, arg_name, arg_value)
  64. # checking required variables
  65. for engine_attr in dir(engine):
  66. if engine_attr.startswith('_'):
  67. continue
  68. if getattr(engine, engine_attr) is None:
  69. logger.error('Missing engine config attribute: "{0}.{1}"'
  70. .format(engine.name, engine_attr))
  71. sys.exit(1)
  72. # assign supported languages from json file
  73. if engine_data['name'] in languages:
  74. setattr(engine, 'supported_languages', languages[engine_data['name']])
  75. # assign language fetching method if auxiliary method exists
  76. if hasattr(engine, '_fetch_supported_languages'):
  77. setattr(engine, 'fetch_supported_languages',
  78. lambda: engine._fetch_supported_languages(get(engine.supported_languages_url)))
  79. engine.stats = {
  80. 'result_count': 0,
  81. 'search_count': 0,
  82. 'page_load_time': 0,
  83. 'page_load_count': 0,
  84. 'engine_time': 0,
  85. 'engine_time_count': 0,
  86. 'score_count': 0,
  87. 'errors': 0
  88. }
  89. for category_name in engine.categories:
  90. categories.setdefault(category_name, []).append(engine)
  91. if engine.shortcut in engine_shortcuts:
  92. logger.error('Engine config error: ambigious shortcut: {0}'.format(engine.shortcut))
  93. sys.exit(1)
  94. engine_shortcuts[engine.shortcut] = engine.name
  95. return engine
  96. def to_percentage(stats, maxvalue):
  97. for engine_stat in stats:
  98. if maxvalue:
  99. engine_stat['percentage'] = int(engine_stat['avg'] / maxvalue * 100)
  100. else:
  101. engine_stat['percentage'] = 0
  102. return stats
  103. def get_engines_stats():
  104. # TODO refactor
  105. pageloads = []
  106. engine_times = []
  107. results = []
  108. scores = []
  109. errors = []
  110. scores_per_result = []
  111. max_pageload = max_engine_times = max_results = max_score = max_errors = max_score_per_result = 0 # noqa
  112. for engine in engines.values():
  113. if engine.stats['search_count'] == 0:
  114. continue
  115. results_num = \
  116. engine.stats['result_count'] / float(engine.stats['search_count'])
  117. if engine.stats['page_load_count'] != 0:
  118. load_times = engine.stats['page_load_time'] / float(engine.stats['page_load_count']) # noqa
  119. else:
  120. load_times = 0
  121. if engine.stats['engine_time_count'] != 0:
  122. this_engine_time = engine.stats['engine_time'] / float(engine.stats['engine_time_count']) # noqa
  123. else:
  124. this_engine_time = 0
  125. if results_num:
  126. score = engine.stats['score_count'] / float(engine.stats['search_count']) # noqa
  127. score_per_result = score / results_num
  128. else:
  129. score = score_per_result = 0.0
  130. max_pageload = max(load_times, max_pageload)
  131. max_engine_times = max(this_engine_time, max_engine_times)
  132. max_results = max(results_num, max_results)
  133. max_score = max(score, max_score)
  134. max_score_per_result = max(score_per_result, max_score_per_result)
  135. max_errors = max(max_errors, engine.stats['errors'])
  136. pageloads.append({'avg': load_times, 'name': engine.name})
  137. engine_times.append({'avg': this_engine_time, 'name': engine.name})
  138. results.append({'avg': results_num, 'name': engine.name})
  139. scores.append({'avg': score, 'name': engine.name})
  140. errors.append({'avg': engine.stats['errors'], 'name': engine.name})
  141. scores_per_result.append({
  142. 'avg': score_per_result,
  143. 'name': engine.name
  144. })
  145. pageloads = to_percentage(pageloads, max_pageload)
  146. engine_times = to_percentage(engine_times, max_engine_times)
  147. results = to_percentage(results, max_results)
  148. scores = to_percentage(scores, max_score)
  149. scores_per_result = to_percentage(scores_per_result, max_score_per_result)
  150. erros = to_percentage(errors, max_errors)
  151. return [
  152. (
  153. gettext('Engine time (sec)'),
  154. sorted(engine_times, key=itemgetter('avg'))
  155. ),
  156. (
  157. gettext('Page loads (sec)'),
  158. sorted(pageloads, key=itemgetter('avg'))
  159. ),
  160. (
  161. gettext('Number of results'),
  162. sorted(results, key=itemgetter('avg'), reverse=True)
  163. ),
  164. (
  165. gettext('Scores'),
  166. sorted(scores, key=itemgetter('avg'), reverse=True)
  167. ),
  168. (
  169. gettext('Scores per result'),
  170. sorted(scores_per_result, key=itemgetter('avg'), reverse=True)
  171. ),
  172. (
  173. gettext('Errors'),
  174. sorted(errors, key=itemgetter('avg'), reverse=True)
  175. ),
  176. ]
  177. if 'engines' not in settings or not settings['engines']:
  178. logger.error('No engines found. Edit your settings.yml')
  179. exit(2)
  180. languages = loads(open(engine_dir + '/../data/engines_languages.json').read())
  181. for engine_data in settings['engines']:
  182. engine = load_engine(engine_data)
  183. if engine is not None:
  184. engines[engine.name] = engine