瀏覽代碼

Add support for dokuwiki engine

Guilhem Bonnefille 9 年之前
父節點
當前提交
cf09b500f3
共有 2 個檔案被更改,包括 169 行新增0 行删除
  1. 83
    0
      searx/engines/doku.py
  2. 86
    0
      tests/unit/engines/test_doku.py

+ 83
- 0
searx/engines/doku.py 查看文件

@@ -0,0 +1,83 @@
1
+# Doku Wiki
2
+#
3
+# @website     https://www.dokuwiki.org/
4
+# @provide-api yes
5
+#              (https://www.dokuwiki.org/devel:xmlrpc)
6
+#
7
+# @using-api   no
8
+# @results     HTML
9
+# @stable      yes
10
+# @parse       (general)    url, title, content
11
+
12
+from urllib import urlencode
13
+from lxml.html import fromstring
14
+from searx.engines.xpath import extract_text
15
+
16
+# engine dependent config
17
+categories = ['general']  # TODO , 'images', 'music', 'videos', 'files'
18
+paging = False
19
+language_support = False
20
+number_of_results = 5
21
+
22
+# search-url
23
+# Doku is OpenSearch compatible
24
+base_url = 'http://localhost:8090'
25
+search_url = '/?do=search'\
26
+             '&id={query}'
27
+# TODO             '&startRecord={offset}'\
28
+# TODO             '&maximumRecords={limit}'\
29
+
30
+# do search-request
31
+def request(query, params):
32
+
33
+    params['url'] = base_url +\
34
+        search_url.format(query=urlencode({'query': query}))
35
+
36
+    return params
37
+
38
+
39
+# get response from search-request
40
+def response(resp):
41
+    results = []
42
+
43
+    doc = fromstring(resp.text)
44
+
45
+    # parse results
46
+    # Quickhits
47
+    for r in doc.xpath('//div[@class="search_quickresult"]/ul/li'):
48
+        try:
49
+            res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
50
+        except:
51
+            continue
52
+
53
+        if not res_url:
54
+            continue
55
+
56
+        title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
57
+
58
+        # append result
59
+        results.append({'title': title,
60
+                        'content': "",
61
+                        'url': base_url + res_url})
62
+
63
+    # Search results
64
+    for r in doc.xpath('//dl[@class="search_results"]/*'):
65
+        try:
66
+            if r.tag == "dt":
67
+                res_url = r.xpath('.//a[@class="wikilink1"]/@href')[-1]
68
+                title = extract_text(r.xpath('.//a[@class="wikilink1"]/@title'))
69
+            elif r.tag == "dd":
70
+                content = extract_text(r.xpath('.'))
71
+
72
+                # append result
73
+                results.append({'title': title,
74
+                                'content': content,
75
+                                'url': base_url + res_url})
76
+        except:
77
+            continue
78
+
79
+        if not res_url:
80
+            continue
81
+
82
+    # return results
83
+    return results

+ 86
- 0
tests/unit/engines/test_doku.py 查看文件

@@ -0,0 +1,86 @@
1
+# -*- coding: utf-8 -*-
2
+from collections import defaultdict
3
+import mock
4
+from searx.engines import doku
5
+from searx.testing import SearxTestCase
6
+
7
+
8
+class TestDokuEngine(SearxTestCase):
9
+
10
+    def test_request(self):
11
+        query = 'test_query'
12
+        dicto = defaultdict(dict)
13
+        params = doku.request(query, dicto)
14
+        self.assertIn('url', params)
15
+        self.assertIn(query, params['url'])
16
+
17
+    def test_response(self):
18
+        self.assertRaises(AttributeError, doku.response, None)
19
+        self.assertRaises(AttributeError, doku.response, [])
20
+        self.assertRaises(AttributeError, doku.response, '')
21
+        self.assertRaises(AttributeError, doku.response, '[]')
22
+
23
+        response = mock.Mock(text='<html></html>')
24
+        self.assertEqual(doku.response(response), [])
25
+
26
+        html = u"""
27
+        <div class="search_quickresult">
28
+            <h3>Pages trouvées :</h3>
29
+            <ul class="search_quickhits">
30
+                <li> <a href="/xfconf-query" class="wikilink1" title="xfconf-query">xfconf-query</a></li>
31
+            </ul>
32
+            <div class="clearer"></div>
33
+        </div>
34
+        """
35
+        response = mock.Mock(text=html)
36
+        results = doku.response(response)
37
+        self.assertEqual(doku.response(response), [{'content': '', 'title': 'xfconf-query', 'url': 'http://localhost:8090/xfconf-query'}])
38
+
39
+        html = u"""
40
+        <dl class="search_results">
41
+            <dt><a href="/xvnc?s[]=query" class="wikilink1" title="xvnc">xvnc</a>: 40 Occurrences trouvées</dt>
42
+            <dd>er = /usr/bin/Xvnc
43
+         server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 640x480 -depth 8 -Secur... er = /usr/bin/Xvnc
44
+         server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 800x600 -depth 8 -Secur... er = /usr/bin/Xvnc
45
+         server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1024x768 -depth 8 -Secu... er = /usr/bin/Xvnc
46
+         server_args = -inetd -<strong class="search_hit">query</strong> localhost -once -geometry 1280x1024 -depth 8 -Sec</dd>
47
+            <dt><a href="/postfix_mysql_tls_sasl_1404?s[]=query" class="wikilink1" title="postfix_mysql_tls_sasl_1404">postfix_mysql_tls_sasl_1404</a>: 14 Occurrences trouvées</dt>
48
+            <dd>tdepasse
49
+  hosts = 127.0.0.1
50
+  dbname = postfix
51
+  <strong class="search_hit">query</strong> = SELECT goto FROM alias WHERE address='%s' AND a... tdepasse
52
+  hosts = 127.0.0.1
53
+  dbname = postfix
54
+  <strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s'
55
+  #optional <strong class="search_hit">query</strong> to use when relaying for backup MX
56
+  #<strong class="search_hit">query</strong> = SELECT domain FROM domain WHERE domain='%s' and backupmx =</dd><dt><a href="/tutoriel/comment_creer_un_terminal_x_ou_recycler_une_vieille_machine?s[]=query" class="wikilink1" title="tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine">tutoriel:comment_creer_un_terminal_x_ou_recycler_une_vieille_machine</a>: 13 Occurrences trouvées</dt><dd>z gdm (ubuntu) tapez sudo /etc/init.d/gdm stop
57
+X -<strong class="search_hit">query</strong> 192.168.1.2
58
+&lt;/code&gt;
59
+:)
60
+Si vous désirez, sur la mê... ans une console (tjs sur le vieil ordi)
61
+&lt;code&gt;
62
+X -<strong class="search_hit">query</strong> 192.168.1.2 :1
63
+&lt;/code&gt;
64
+Un écran de login devrait ... ure.
65
+&lt;note tip&gt;Rajouter "-once" à la commande "X -<strong class="search_hit">query</strong> 192.168.1.2 :1" permet de quitter la session et r... d'une ubuntu/kubuntu\\
66
+Testez d'abord que le //X -<strong class="search_hit">query</strong> ...// fonctionne, dans une console (CTRL-ALT-F1) </dd>
67
+          <dt><a href="/bind9?s[]=query" class="wikilink1" title="bind9">bind9</a>: 12 Occurrences trouvées</dt>
68
+          <dd>  printcmd
69
+;; Got answer:
70
+;; -&gt;&gt;HEADER&lt;&lt;- opcode: <strong class="search_hit">QUERY</strong>, status: NOERROR, id: 13427
71
+;; flags: qr aa rd ra; <strong class="search_hit">QUERY</strong>: 1, ANSWER: 1, AUTHORITY: 1, ADDITIONAL: 1
72
+
73
+[...]
74
+
75
+;; <strong class="search_hit">Query</strong> time: 1 msec
76
+;; SERVER: 127.0.0.1#53(127.0.0.1)
77
+;... ne énorme diminution du temps mis par la requête (<strong class="search_hit">Query</strong> time) , entre la première et la deuxième requête.</dd>
78
+        </dl>
79
+        """
80
+        response = mock.Mock(text=html)
81
+        results = doku.response(response)
82
+        self.assertEqual(type(results), list)
83
+        self.assertEqual(len(results), 4)
84
+        self.assertEqual(results[0]['title'], 'xvnc')
85
+# FIXME        self.assertEqual(results[0]['url'], u'http://this.should.be.the.link/ű')
86
+# FIXME        self.assertEqual(results[0]['content'], 'This should be the content.')