Browse Source

update generalfile engine and add comments

Thomas Pointhuber 10 years ago
parent
commit
c5d83059d5
2 changed files with 28 additions and 4 deletions
  1. 28
    3
      searx/engines/generalfile.py
  2. 0
    1
      searx/settings.yml

+ 28
- 3
searx/engines/generalfile.py View File

1
+## General Files (Files)
2
+# 
3
+# @website     http://www.general-files.org
4
+# @provide-api no (nothing found)
5
+# 
6
+# @using-api   no (because nothing found)
7
+# @results     HTML (using search portal)
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, content
10
+#
11
+# @todo        detect torrents?
12
+
1
 from lxml import html
13
 from lxml import html
2
 
14
 
15
+# engine dependent config
16
+categories = ['files']
17
+paging = True
3
 
18
 
19
+# search-url
4
 base_url = 'http://www.general-file.com'
20
 base_url = 'http://www.general-file.com'
5
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
21
 search_url = base_url + '/files-{letter}/{query}/{pageno}'
6
 
22
 
23
+# specific xpath variables
7
 result_xpath = '//table[@class="block-file"]'
24
 result_xpath = '//table[@class="block-file"]'
8
 title_xpath = './/h2/a//text()'
25
 title_xpath = './/h2/a//text()'
9
 url_xpath = './/h2/a/@href'
26
 url_xpath = './/h2/a/@href'
10
 content_xpath = './/p//text()'
27
 content_xpath = './/p//text()'
11
 
28
 
12
-paging = True
13
-
14
 
29
 
30
+# do search-request
15
 def request(query, params):
31
 def request(query, params):
32
+
16
     params['url'] = search_url.format(query=query,
33
     params['url'] = search_url.format(query=query,
17
                                       letter=query[0],
34
                                       letter=query[0],
18
                                       pageno=params['pageno'])
35
                                       pageno=params['pageno'])
36
+
19
     return params
37
     return params
20
 
38
 
21
 
39
 
40
+# get response from search-request
22
 def response(resp):
41
 def response(resp):
23
-
24
     results = []
42
     results = []
43
+
25
     dom = html.fromstring(resp.text)
44
     dom = html.fromstring(resp.text)
45
+
46
+    # parse results
26
     for result in dom.xpath(result_xpath):
47
     for result in dom.xpath(result_xpath):
27
         url = result.xpath(url_xpath)[0]
48
         url = result.xpath(url_xpath)[0]
49
+
28
         # skip fast download links
50
         # skip fast download links
29
         if not url.startswith('/'):
51
         if not url.startswith('/'):
30
             continue
52
             continue
53
+
54
+        # append result
31
         results.append({'url': base_url + url,
55
         results.append({'url': base_url + url,
32
                         'title': ''.join(result.xpath(title_xpath)),
56
                         'title': ''.join(result.xpath(title_xpath)),
33
                         'content': ''.join(result.xpath(content_xpath))})
57
                         'content': ''.join(result.xpath(content_xpath))})
34
 
58
 
59
+    # return results
35
     return results
60
     return results

+ 0
- 1
searx/settings.yml View File

62
 
62
 
63
   - name : general-file
63
   - name : general-file
64
     engine : generalfile
64
     engine : generalfile
65
-    categories : files
66
     shortcut : gf
65
     shortcut : gf
67
 
66
 
68
   - name : github
67
   - name : github