Procházet zdrojové kódy

Merge pull request #170 from pointhi/little_fixes

Little fixes, add searx-autocompletion, more informations about torrents
Adam Tauber před 10 roky
rodič
revize
8ef709ea1f

+ 79
- 0
searx/autocomplete.py Zobrazit soubor

@@ -20,6 +20,85 @@ from lxml import etree
20 20
 from requests import get
21 21
 from json import loads
22 22
 from urllib import urlencode
23
+from searx.languages import language_codes
24
+from searx.engines import (
25
+    categories, engines, engine_shortcuts
26
+)
27
+
28
+
29
+def searx_bang(full_query):
30
+    '''check if the searchQuery contain a bang, and create fitting autocompleter results'''
31
+    # check if there is a query which can be parsed
32
+    if len(full_query.getSearchQuery()) == 0:
33
+        return []
34
+
35
+    results = []
36
+
37
+    # check if current query stats with !bang
38
+    if full_query.getSearchQuery()[0] == '!':
39
+        if len(full_query.getSearchQuery()) == 1:
40
+            # show some example queries
41
+            # TODO, check if engine is not avaliable
42
+            results.append("!images")
43
+            results.append("!wikipedia")
44
+            results.append("!osm")
45
+        else:
46
+            engine_query = full_query.getSearchQuery()[1:]
47
+
48
+            # check if query starts with categorie name
49
+            for categorie in categories:
50
+                if categorie.startswith(engine_query):
51
+                    results.append('!{categorie}'.format(categorie=categorie))
52
+
53
+            # check if query starts with engine name
54
+            for engine in engines:
55
+                if engine.startswith(engine_query.replace('_', ' ')):
56
+                    results.append('!{engine}'.format(engine=engine.replace(' ', '_')))
57
+
58
+            # check if query starts with engine shortcut
59
+            for engine_shortcut in engine_shortcuts:
60
+                if engine_shortcut.startswith(engine_query):
61
+                    results.append('!{engine_shortcut}'.format(engine_shortcut=engine_shortcut))
62
+
63
+    # check if current query stats with :bang
64
+    elif full_query.getSearchQuery()[0] == ':':
65
+        if len(full_query.getSearchQuery()) == 1:
66
+            # show some example queries
67
+            results.append(":en")
68
+            results.append(":en_us")
69
+            results.append(":english")
70
+            results.append(":united_kingdom")
71
+        else:
72
+            engine_query = full_query.getSearchQuery()[1:]
73
+
74
+            for lc in language_codes:
75
+                lang_id, lang_name, country = map(str.lower, lc)
76
+
77
+                # check if query starts with language-id
78
+                if lang_id.startswith(engine_query):
79
+                    if len(engine_query) <= 2:
80
+                        results.append(':{lang_id}'.format(lang_id=lang_id.split('_')[0]))
81
+                    else:
82
+                        results.append(':{lang_id}'.format(lang_id=lang_id))
83
+
84
+                # check if query starts with language name
85
+                if lang_name.startswith(engine_query):
86
+                    results.append(':{lang_name}'.format(lang_name=lang_name))
87
+
88
+                # check if query starts with country
89
+                if country.startswith(engine_query.replace('_', ' ')):
90
+                    results.append(':{country}'.format(country=country.replace(' ', '_')))
91
+
92
+    # remove duplicates
93
+    result_set = set(results)
94
+
95
+    # remove results which are already contained in the query
96
+    for query_part in full_query.query_parts:
97
+        if query_part in result_set:
98
+            result_set.remove(query_part)
99
+
100
+    # convert result_set back to list
101
+    return list(result_set)
23 102
 
24 103
 
25 104
 def dbpedia(query):

+ 31
- 0
searx/engines/kickass.py Zobrazit soubor

@@ -24,6 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/'
24 24
 
25 25
 # specific xpath variables
26 26
 magnet_xpath = './/a[@title="Torrent magnet link"]'
27
+torrent_xpath = './/a[@title="Download torrent file"]'
27 28
 content_xpath = './/span[@class="font11px lightgrey block"]'
28 29
 
29 30
 
@@ -60,6 +61,9 @@ def response(resp):
60 61
                                        method="text"))
61 62
         seed = result.xpath('.//td[contains(@class, "green")]/text()')[0]
62 63
         leech = result.xpath('.//td[contains(@class, "red")]/text()')[0]
64
+        filesize = result.xpath('.//td[contains(@class, "nobr")]/text()')[0]
65
+        filesize_multiplier = result.xpath('.//td[contains(@class, "nobr")]//span/text()')[0]
66
+        files = result.xpath('.//td[contains(@class, "center")][2]/text()')[0]
63 67
 
64 68
         # convert seed to int if possible
65 69
         if seed.isdigit():
@@ -73,15 +77,42 @@ def response(resp):
73 77
         else:
74 78
             leech = 0
75 79
 
80
+        # convert filesize to byte if possible
81
+        try:
82
+            filesize = float(filesize)
83
+
84
+            # convert filesize to byte
85
+            if filesize_multiplier == 'TB':
86
+                filesize = int(filesize * 1024 * 1024 * 1024 * 1024)
87
+            elif filesize_multiplier == 'GB':
88
+                filesize = int(filesize * 1024 * 1024 * 1024)
89
+            elif filesize_multiplier == 'MB':
90
+                filesize = int(filesize * 1024 * 1024)
91
+            elif filesize_multiplier == 'kb':
92
+                filesize = int(filesize * 1024)
93
+        except:
94
+            filesize = None
95
+
96
+        # convert files to int if possible
97
+        if files.isdigit():
98
+            files = int(files)
99
+        else:
100
+            files = None
101
+
76 102
         magnetlink = result.xpath(magnet_xpath)[0].attrib['href']
77 103
 
104
+        torrentfile = result.xpath(torrent_xpath)[0].attrib['href']
105
+
78 106
         # append result
79 107
         results.append({'url': href,
80 108
                         'title': title,
81 109
                         'content': content,
82 110
                         'seed': seed,
83 111
                         'leech': leech,
112
+                        'filesize': filesize,
113
+                        'files': files,
84 114
                         'magnetlink': magnetlink,
115
+                        'torrentfile': torrentfile,
85 116
                         'template': 'torrent.html'})
86 117
 
87 118
     # return results sorted by seeder

+ 1
- 1
searx/query.py Zobrazit soubor

@@ -77,7 +77,7 @@ class Query(object):
77 77
                     if lang == lang_id\
78 78
                        or lang_id.startswith(lang)\
79 79
                        or lang == lang_name\
80
-                       or lang == country:
80
+                       or lang.replace('_', ' ') == country:
81 81
                         parse_next = True
82 82
                         self.languages.append(lang)
83 83
                         break

+ 1
- 0
searx/templates/oscar/base.html Zobrazit soubor

@@ -7,6 +7,7 @@
7 7
     <meta http-equiv="X-UA-Compatible" content="IE=edge">
8 8
     <meta name="generator" content="searx/{{ searx_version }}">
9 9
     <meta name="viewport" content="width=device-width, initial-scale=1 , maximum-scale=1.0, user-scalable=1" />
10
+    {% block meta %}{% endblock %}
10 11
     <title>{% block title %}{% endblock %}searx</title>
11 12
     
12 13
     <link rel="stylesheet" href="{{ url_for('static', filename='css/bootstrap.min.css') }}" type="text/css" />

+ 1
- 3
searx/templates/oscar/result_templates/default.html Zobrazit soubor

@@ -11,9 +11,7 @@
11 11
 
12 12
 {% if result.embedded %}
13 13
 <div id="result-media-{{ index }}" class="collapse">
14
-{% autoescape false %}
15
-   {{ result.embedded }}
16
-{% endautoescape %}
14
+   {{ result.embedded|safe }}
17 15
 </div>
18 16
 {% endif %}
19 17
 

+ 14
- 3
searx/templates/oscar/result_templates/torrent.html Zobrazit soubor

@@ -5,9 +5,20 @@
5 5
 {% if result.publishedDate %}<time class="text-muted" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time>{% endif %}
6 6
 <small><a class="text-info" href="https://web.archive.org/web/{{ result.url }}">{{ icon('link') }} {{ _('cached') }}</a></small>
7 7
 
8
-<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span>, {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>
9
-<br/>
10
-<a href="{{ result.magnetlink }}" class="magnetlink">{{ icon('magnet') }} magnet link</a></p>
8
+<p class="result-content">{{ icon('transfer') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> &bull; {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span>
9
+{% if result.filesize %}</br>{{ icon('floppy-disk') }} {{ _('Filesize') }} 
10
+    <span class="badge">
11
+        {% if result.filesize < 1024 %}{{ result.filesize }} Byte
12
+        {% elif result.filesize < 1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024) }} kb
13
+        {% elif result.filesize < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024) }} MB
14
+        {% elif result.filesize < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024) }} GB
15
+        {% else %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024/1024) }} TB{% endif %}
16
+    </span>{% endif %}
17
+{% if result.files %}</br>{{ icon('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span>{% endif %}</p>
18
+<p class="result-content">
19
+    <a href="{{ result.magnetlink }}" class="magnetlink">{{ icon('magnet') }} {{ _('magnet link') }}</a>
20
+    {% if result.torrentfile %}</br><a href="{{ result.torrentfile }}" class="torrentfile">{{ icon('download-alt') }} {{ _('torrent file') }}</a>{% endif %}
21
+</p>
11 22
 
12 23
 {% if result.content %}<p class="result-content">{{ result.content|safe }}</p>{% endif %}
13 24
 

+ 1
- 3
searx/templates/oscar/result_templates/videos.html Zobrazit soubor

@@ -11,9 +11,7 @@
11 11
 
12 12
 {% if result.embedded %}
13 13
 <div id="result-video-{{ index }}" class="collapse">
14
-{% autoescape false %}
15
-   {{ result.embedded }}
16
-{% endautoescape %}
14
+   {{ result.embedded|safe }}
17 15
 </div>
18 16
 {% endif %}
19 17
 

+ 1
- 0
searx/templates/oscar/results.html Zobrazit soubor

@@ -1,5 +1,6 @@
1 1
 {% extends "oscar/base.html" %}
2 2
 {% block title %}{{ q }} - {% endblock %}
3
+{% block meta %}<link rel="alternate" type="application/rss+xml" title="Searx search: {{ q }}" href="{{ url_for('index') }}?q={{ q }}&format=rss&{% for category in selected_categories %}category_{{ category }}=1&{% endfor %}pageno={{ pageno+1 }}">{% endblock %}
3 4
 {% block content %}
4 5
     <div class="row">
5 6
         <div class="col-sm-8" id="main_results">

+ 8
- 3
searx/webapp.py Zobrazit soubor

@@ -46,7 +46,7 @@ from searx.languages import language_codes
46 46
 from searx.https_rewrite import https_url_rewrite
47 47
 from searx.search import Search
48 48
 from searx.query import Query
49
-from searx.autocomplete import backends as autocomplete_backends
49
+from searx.autocomplete import searx_bang, backends as autocomplete_backends
50 50
 from searx import logger
51 51
 
52 52
 
@@ -352,8 +352,13 @@ def autocompleter():
352 352
     if not completer:
353 353
         return '', 400
354 354
 
355
-    # run autocompletion
356
-    raw_results = completer(query.getSearchQuery())
355
+    # parse searx specific autocompleter results like !bang
356
+    raw_results = searx_bang(query)
357
+
358
+    # normal autocompletion results only appear if max 3. searx results returned
359
+    if len(raw_results) <= 3:
360
+        # run autocompletion
361
+        raw_results.extend(completer(query.getSearchQuery()))
357 362
 
358 363
     # parse results (write :language and !engine back to result string)
359 364
     results = []