Browse Source

fix stackoverflow and add comments

Thomas Pointhuber 10 years ago
parent
commit
a46bbb4042
2 changed files with 35 additions and 8 deletions
  1. 35
    7
      searx/engines/stackoverflow.py
  2. 0
    1
      searx/settings.yml

+ 35
- 7
searx/engines/stackoverflow.py View File

1
+## Stackoverflow (It)
2
+# 
3
+# @website     https://stackoverflow.com/
4
+# @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
5
+# 
6
+# @using-api   no
7
+# @results     HTML
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, content
10
+
1
 from urlparse import urljoin
11
 from urlparse import urljoin
2
 from cgi import escape
12
 from cgi import escape
3
 from urllib import urlencode
13
 from urllib import urlencode
4
 from lxml import html
14
 from lxml import html
5
 
15
 
16
+# engine dependent config
6
 categories = ['it']
17
 categories = ['it']
18
+paging = True
7
 
19
 
20
+# search-url
8
 url = 'http://stackoverflow.com/'
21
 url = 'http://stackoverflow.com/'
9
 search_url = url+'search?{query}&page={pageno}'
22
 search_url = url+'search?{query}&page={pageno}'
10
-result_xpath = './/div[@class="excerpt"]//text()'
11
 
23
 
12
-paging = True
24
+# specific xpath variables
25
+results_xpath = '//div[contains(@class,"question-summary")]'
26
+link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
27
+title_xpath = './/text()'
28
+content_xpath = './/div[@class="excerpt"]//text()'
13
 
29
 
14
 
30
 
31
+# do search-request
15
 def request(query, params):
32
 def request(query, params):
16
     params['url'] = search_url.format(query=urlencode({'q': query}),
33
     params['url'] = search_url.format(query=urlencode({'q': query}),
17
                                       pageno=params['pageno'])
34
                                       pageno=params['pageno'])
35
+
18
     return params
36
     return params
19
 
37
 
20
 
38
 
39
+# get response from search-request
21
 def response(resp):
40
 def response(resp):
22
     results = []
41
     results = []
42
+
23
     dom = html.fromstring(resp.text)
43
     dom = html.fromstring(resp.text)
24
-    for result in dom.xpath('//div[@class="question-summary search-result"]'):
25
-        link = result.xpath('.//div[@class="result-link"]//a')[0]
44
+
45
+    # parse results
46
+    for result in dom.xpath(results_xpath):
47
+        link = result.xpath(link_xpath)[0]
26
         href = urljoin(url, link.attrib.get('href'))
48
         href = urljoin(url, link.attrib.get('href'))
27
-        title = escape(' '.join(link.xpath('.//text()')))
28
-        content = escape(' '.join(result.xpath(result_xpath)))
29
-        results.append({'url': href, 'title': title, 'content': content})
49
+        title = escape(' '.join(link.xpath(title_xpath)))
50
+        content = escape(' '.join(result.xpath(content_xpath)))
51
+
52
+        # append result
53
+        results.append({'url': href, 
54
+                        'title': title, 
55
+                        'content': content})
56
+
57
+    # return results
30
     return results
58
     return results

+ 0
- 1
searx/settings.yml View File

90
 
90
 
91
   - name : stackoverflow
91
   - name : stackoverflow
92
     engine : stackoverflow
92
     engine : stackoverflow
93
-    categories : it
94
     shortcut : st
93
     shortcut : st
95
 
94
 
96
   - name : startpage
95
   - name : startpage