浏览代码

[mod] add a search_one_request_safe function wrapper to call search_one_request. All exceptions are catched in this wrapper.

Alexandre Flament 8 年前
父节点
当前提交
0a2fde19d0
共有 1 个文件被更改,包括 90 次插入81 次删除
  1. 90
    81
      searx/search.py

+ 90
- 81
searx/search.py 查看文件

@@ -20,6 +20,7 @@ import threading
20 20
 from thread import start_new_thread
21 21
 from time import time
22 22
 from uuid import uuid4
23
+import requests.exceptions
23 24
 import searx.poolrequests as requests_lib
24 25
 from searx.engines import (
25 26
     categories, engines
@@ -37,109 +38,117 @@ number_of_searches = 0
37 38
 
38 39
 
39 40
 def send_http_request(engine, request_params, timeout_limit):
40
-    response = None
41
-    try:
42
-        # create dictionary which contain all
43
-        # informations about the request
44
-        request_args = dict(
45
-            headers=request_params['headers'],
46
-            cookies=request_params['cookies'],
47
-            timeout=timeout_limit,
48
-            verify=request_params['verify']
49
-        )
50
-        # specific type of request (GET or POST)
51
-        if request_params['method'] == 'GET':
52
-            req = requests_lib.get
53
-        else:
54
-            req = requests_lib.post
55
-            request_args['data'] = request_params['data']
56
-
57
-        # for page_load_time stats
58
-        time_before_request = time()
59
-
60
-        # send the request
61
-        response = req(request_params['url'], **request_args)
62
-
63
-        with threading.RLock():
64
-            # no error : reset the suspend variables
65
-            engine.continuous_errors = 0
66
-            engine.suspend_end_time = 0
67
-            # update stats with current page-load-time
68
-            # only the HTTP request
69
-            engine.stats['page_load_time'] += time() - time_before_request
70
-            engine.stats['page_load_count'] += 1
71
-
72
-        # is there a timeout (no parsing in this case)
73
-        timeout_overhead = 0.2  # seconds
74
-        search_duration = time() - request_params['started']
75
-        if search_duration > timeout_limit + timeout_overhead:
76
-            logger.exception('engine timeout on HTTP request:'
77
-                             '{0} (search duration : {1} ms, time-out: {2} )'
78
-                             .format(engine.name, search_duration, timeout_limit))
79
-            with threading.RLock():
80
-                engine.stats['errors'] += 1
81
-            return False
41
+    # for page_load_time stats
42
+    time_before_request = time()
43
+
44
+    # create dictionary which contain all
45
+    # informations about the request
46
+    request_args = dict(
47
+        headers=request_params['headers'],
48
+        cookies=request_params['cookies'],
49
+        timeout=timeout_limit,
50
+        verify=request_params['verify']
51
+    )
52
+
53
+    # specific type of request (GET or POST)
54
+    if request_params['method'] == 'GET':
55
+        req = requests_lib.get
56
+    else:
57
+        req = requests_lib.post
58
+        request_args['data'] = request_params['data']
82 59
 
83
-        # everything is ok : return the response
84
-        return response
60
+    # send the request
61
+    response = req(request_params['url'], **request_args)
85 62
 
86
-    except:
87
-        # increase errors stats
88
-        with threading.RLock():
89
-            engine.stats['errors'] += 1
90
-            engine.continuous_errors += 1
91
-            engine.suspend_end_time = time() + min(60, engine.continuous_errors)
63
+    # is there a timeout (no parsing in this case)
64
+    timeout_overhead = 0.2  # seconds
65
+    search_duration = time() - request_params['started']
66
+    if search_duration > timeout_limit + timeout_overhead:
67
+        raise Timeout(response=response)
92 68
 
93
-        # print engine name and specific error message
94
-        logger.exception('engine crash: {0}'.format(engine.name))
95
-        return False
69
+    with threading.RLock():
70
+        # no error : reset the suspend variables
71
+        engine.continuous_errors = 0
72
+        engine.suspend_end_time = 0
73
+        # update stats with current page-load-time
74
+        # only the HTTP request
75
+        engine.stats['page_load_time'] += time() - time_before_request
76
+        engine.stats['page_load_count'] += 1
96 77
 
78
+    # everything is ok : return the response
79
+    return response
97 80
 
98
-def search_one_request(engine_name, query, request_params, result_container, timeout_limit):
99
-    engine = engines[engine_name]
100 81
 
82
+def search_one_request(engine, query, request_params, timeout_limit):
101 83
     # update request parameters dependent on
102 84
     # search-engine (contained in engines folder)
103 85
     engine.request(query, request_params)
104 86
 
105
-    # TODO add support of offline engines
87
+    # ignoring empty urls
106 88
     if request_params['url'] is None:
107
-        return False
89
+        return []
108 90
 
109
-    # ignoring empty urls
110 91
     if not request_params['url']:
111
-        return False
92
+        return []
112 93
 
113 94
     # send request
114 95
     response = send_http_request(engine, request_params, timeout_limit)
115 96
 
116
-    # parse response
117
-    success = None
118
-    if response:
119
-        # parse the response
120
-        response.search_params = request_params
121
-        try:
122
-            search_results = engine.response(response)
123
-        except:
124
-            logger.exception('engine crash: {0}'.format(engine.name))
125
-            search_results = []
97
+    # parse the response
98
+    response.search_params = request_params
99
+    return engine.response(response)
100
+
101
+
102
+def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit):
103
+    start_time = time()
104
+    engine = engines[engine_name]
105
+
106
+    try:
107
+        # send requests and parse the results
108
+        search_results = search_one_request(engine, query, request_params, timeout_limit)
126 109
 
127 110
         # add results
128 111
         for result in search_results:
129
-            result['engine'] = engine.name
112
+            result['engine'] = engine_name
113
+        result_container.extend(engine_name, search_results)
130 114
 
131
-        result_container.extend(engine.name, search_results)
132
-
133
-        success = True
134
-    else:
135
-        success = False
115
+        # update engine time when there is no exception
116
+        with threading.RLock():
117
+            engine.stats['engine_time'] += time() - start_time
118
+            engine.stats['engine_time_count'] += 1
119
+
120
+        return True
121
+
122
+    except Exception as e:
123
+        engine.stats['errors'] += 1
124
+
125
+        search_duration = time() - start_time
126
+        requests_exception = False
127
+
128
+        if (issubclass(e.__class__, requests.exceptions.Timeout)):
129
+            # requests timeout (connect or read)
130
+            logger.error("engine {0} : HTTP requests timeout"
131
+                         "(search duration : {1} s, timeout: {2} s) : {3}"
132
+                         .format(engine_name, search_duration, timeout_limit, e.__class__.__name__))
133
+            requests_exception = True
134
+        if (issubclass(e.__class__, requests.exceptions.RequestException)):
135
+            # other requests exception
136
+            logger.exception("engine {0} : requests exception"
137
+                             "(search duration : {1} s, timeout: {2} s) : {3}"
138
+                             .format(engine_name, search_duration, timeout_limit, e))
139
+            requests_exception = True
140
+        else:
141
+            # others errors
142
+            logger.exception('engine {0} : exception : {1}'.format(engine_name, e))
136 143
 
137
-    with threading.RLock():
138
-        # update stats : total time
139
-        engine.stats['engine_time'] += time() - request_params['started']
140
-        engine.stats['engine_time_count'] += 1
144
+        # update continuous_errors / suspend_end_time
145
+        if requests_exception:
146
+            with threading.RLock():
147
+                engine.continuous_errors += 1
148
+                engine.suspend_end_time = time() + min(60, engine.continuous_errors)
141 149
 
142
-    return success
150
+        #
151
+        return False
143 152
 
144 153
 
145 154
 def search_multiple_requests(requests, result_container, timeout_limit):
@@ -148,7 +157,7 @@ def search_multiple_requests(requests, result_container, timeout_limit):
148 157
 
149 158
     for engine_name, query, request_params in requests:
150 159
         th = threading.Thread(
151
-            target=search_one_request,
160
+            target=search_one_request_safe,
152 161
             args=(engine_name, query, request_params, result_container, timeout_limit),
153 162
             name=search_id,
154 163
         )