Browse Source

Merge branch 'master' into nyaa

misnyo 7 years ago
parent
commit
c3232b0e1a

+ 0
- 62
searx/engines/generalfile.py View File

1
-"""
2
- General Files (Files)
3
-
4
- @website     http://www.general-files.org
5
- @provide-api no (nothing found)
6
-
7
- @using-api   no (because nothing found)
8
- @results     HTML (using search portal)
9
- @stable      no (HTML can change)
10
- @parse       url, title, content
11
-
12
- @todo        detect torrents?
13
-"""
14
-
15
-from lxml import html
16
-
17
-# engine dependent config
18
-categories = ['files']
19
-paging = True
20
-
21
-# search-url
22
-base_url = 'http://www.general-file.com'
23
-search_url = base_url + '/files-{letter}/{query}/{pageno}'
24
-
25
-# specific xpath variables
26
-result_xpath = '//table[@class="block-file"]'
27
-title_xpath = './/h2/a//text()'
28
-url_xpath = './/h2/a/@href'
29
-content_xpath = './/p//text()'
30
-
31
-
32
-# do search-request
33
-def request(query, params):
34
-
35
-    params['url'] = search_url.format(query=query,
36
-                                      letter=query[0],
37
-                                      pageno=params['pageno'])
38
-
39
-    return params
40
-
41
-
42
-# get response from search-request
43
-def response(resp):
44
-    results = []
45
-
46
-    dom = html.fromstring(resp.text)
47
-
48
-    # parse results
49
-    for result in dom.xpath(result_xpath):
50
-        url = result.xpath(url_xpath)[0]
51
-
52
-        # skip fast download links
53
-        if not url.startswith('/'):
54
-            continue
55
-
56
-        # append result
57
-        results.append({'url': base_url + url,
58
-                        'title': ''.join(result.xpath(title_xpath)),
59
-                        'content': ''.join(result.xpath(content_xpath))})
60
-
61
-    # return results
62
-    return results

+ 6
- 2
searx/engines/gigablast.py View File

10
  @parse       url, title, content
10
  @parse       url, title, content
11
 """
11
 """
12
 
12
 
13
+import random
13
 from json import loads
14
 from json import loads
14
 from time import time
15
 from time import time
15
 from lxml.html import fromstring
16
 from lxml.html import fromstring
32
     '&qh=0'\
33
     '&qh=0'\
33
     '&qlang={lang}'\
34
     '&qlang={lang}'\
34
     '&ff={safesearch}'\
35
     '&ff={safesearch}'\
35
-    '&rxikd={rxikd}'  # random number - 9 digits
36
+    '&rxieu={rxieu}'\
37
+    '&rand={rxikd}'  # current unix timestamp
36
 
38
 
37
 # specific xpath variables
39
 # specific xpath variables
38
 results_xpath = '//response//result'
40
 results_xpath = '//response//result'
59
     else:
61
     else:
60
         safesearch = 0
62
         safesearch = 0
61
 
63
 
64
+    # rxieu is some kind of hash from the search query, but accepts random atm
62
     search_path = search_string.format(query=urlencode({'q': query}),
65
     search_path = search_string.format(query=urlencode({'q': query}),
63
                                        offset=offset,
66
                                        offset=offset,
64
                                        number_of_results=number_of_results,
67
                                        number_of_results=number_of_results,
65
-                                       rxikd=str(time())[:9],
68
+                                       rxikd=int(time() * 1000),
69
+                                       rxieu=random.randint(1000000000, 9999999999),
66
                                        lang=language,
70
                                        lang=language,
67
                                        safesearch=safesearch)
71
                                        safesearch=safesearch)
68
 
72
 

+ 2
- 2
searx/engines/google_news.py View File

67
     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
67
     for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
68
         try:
68
         try:
69
             r = {
69
             r = {
70
-                'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0],
71
-                'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')),
70
+                'url': result.xpath('.//a[@class="l _PMs"]')[0].attrib.get("href"),
71
+                'title': ''.join(result.xpath('.//a[@class="l _PMs"]//text()')),
72
                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
72
                 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
73
             }
73
             }
74
         except:
74
         except:

+ 13
- 12
searx/settings.yml View File

242
     disabled: True
242
     disabled: True
243
 
243
 
244
   - name : gitlab
244
   - name : gitlab
245
-    engine : xpath
245
+    engine : json_engine
246
     paging : True
246
     paging : True
247
-    search_url : https://gitlab.com/search?page={pageno}&search={query}
248
-    url_xpath : //li[@class="project-row"]//a[@class="project"]/@href
249
-    title_xpath : //li[@class="project-row"]//span[contains(@class, "project-full-name")]
250
-    content_xpath : //li[@class="project-row"]//div[@class="description"]/p
247
+    search_url : https://gitlab.com/api/v4/projects?search={query}&page={pageno}
248
+    url_query : web_url
249
+    title_query : name_with_namespace
250
+    content_query : description
251
+    page_size : 20
251
     categories : it
252
     categories : it
252
     shortcut : gl
253
     shortcut : gl
253
-    timeout : 5.0
254
+    timeout : 10.0
254
     disabled : True
255
     disabled : True
255
 
256
 
256
   - name : github
257
   - name : github
321
     engine : xpath
322
     engine : xpath
322
     paging : True
323
     paging : True
323
     search_url : https://geektimes.ru/search/page{pageno}/?q={query}
324
     search_url : https://geektimes.ru/search/page{pageno}/?q={query}
324
-    url_xpath : //div[@class="search_results"]//a[@class="post__title_link"]/@href
325
-    title_xpath : //div[@class="search_results"]//a[@class="post__title_link"]
326
-    content_xpath : //div[@class="search_results"]//div[contains(@class, "content")]
325
+    url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href
326
+    title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]
327
+    content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")]
327
     categories : it
328
     categories : it
328
     timeout : 4.0
329
     timeout : 4.0
329
     disabled : True
330
     disabled : True
333
     engine : xpath
334
     engine : xpath
334
     paging : True
335
     paging : True
335
     search_url : https://habrahabr.ru/search/page{pageno}/?q={query}
336
     search_url : https://habrahabr.ru/search/page{pageno}/?q={query}
336
-    url_xpath : //div[@class="search_results"]//a[contains(@class, "post__title_link")]/@href
337
-    title_xpath : //div[@class="search_results"]//a[contains(@class, "post__title_link")]
338
-    content_xpath : //div[@class="search_results"]//div[contains(@class, "content")]
337
+    url_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]/@href
338
+    title_xpath : //article[contains(@class, "post")]//a[@class="post__title_link"]
339
+    content_xpath : //article[contains(@class, "post")]//div[contains(@class, "post__text")]
339
     categories : it
340
     categories : it
340
     timeout : 4.0
341
     timeout : 4.0
341
     disabled : True
342
     disabled : True

+ 56
- 6
tests/unit/engines/test_google_news.py
File diff suppressed because it is too large
View File