浏览代码

Merge pull request #830 from davidar/se

Semantic Scholar
Adam Tauber 8 年前
父节点
当前提交
57149661e4
共有 2 个文件被更改,包括 15 次插入4 次删除
  1. 4
    4
      searx/engines/xpath.py
  2. 11
    0
      searx/settings.yml

+ 4
- 4
searx/engines/xpath.py 查看文件

31
 def extract_text(xpath_results):
31
 def extract_text(xpath_results):
32
     if type(xpath_results) == list:
32
     if type(xpath_results) == list:
33
         # it's list of result : concat everything using recursive call
33
         # it's list of result : concat everything using recursive call
34
-        if not xpath_results:
35
-            raise Exception('Empty url resultset')
36
         result = ''
34
         result = ''
37
         for e in xpath_results:
35
         for e in xpath_results:
38
             result = result + extract_text(e)
36
             result = result + extract_text(e)
48
 
46
 
49
 
47
 
50
 def extract_url(xpath_results, search_url):
48
 def extract_url(xpath_results, search_url):
49
+    if xpath_results == []:
50
+        raise Exception('Empty url resultset')
51
     url = extract_text(xpath_results)
51
     url = extract_text(xpath_results)
52
 
52
 
53
     if url.startswith('//'):
53
     if url.startswith('//'):
103
     if results_xpath:
103
     if results_xpath:
104
         for result in dom.xpath(results_xpath):
104
         for result in dom.xpath(results_xpath):
105
             url = extract_url(result.xpath(url_xpath), search_url)
105
             url = extract_url(result.xpath(url_xpath), search_url)
106
-            title = extract_text(result.xpath(title_xpath)[0])
107
-            content = extract_text(result.xpath(content_xpath)[0])
106
+            title = extract_text(result.xpath(title_xpath))
107
+            content = extract_text(result.xpath(content_xpath))
108
             results.append({'url': url, 'title': title, 'content': content})
108
             results.append({'url': url, 'title': title, 'content': content})
109
     else:
109
     else:
110
         for url, title, content in zip(
110
         for url, title, content in zip(

+ 11
- 0
searx/settings.yml 查看文件

462
 #        - ...
462
 #        - ...
463
 #    disabled : True
463
 #    disabled : True
464
 
464
 
465
+  - name : semantic scholar
466
+    engine : xpath
467
+    paging : True
468
+    search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false
469
+    results_xpath : //article
470
+    url_xpath : .//div[@class="search-result-title"]/a/@href
471
+    title_xpath : .//div[@class="search-result-title"]/a
472
+    content_xpath : .//div[@class="search-result-abstract"]
473
+    shortcut : se
474
+    categories : science
475
+
465
   - name : spotify
476
   - name : spotify
466
     engine : spotify
477
     engine : spotify
467
     shortcut : stf
478
     shortcut : stf