Quellcode durchsuchen

[fix] yahoo url mods

Adam Tauber vor 11 Jahren
Ursprung
Commit
4627788834
1 geänderte Dateien mit 5 neuen und 1 gelöschten Zeilen
  1. 5
    1
      searx/engines/yahoo.py

+ 5
- 1
searx/engines/yahoo.py Datei anzeigen

1
 #!/usr/bin/env python
1
 #!/usr/bin/env python
2
 
2
 
3
 from urllib import urlencode
3
 from urllib import urlencode
4
+from urlparse import unquote
4
 from lxml import html
5
 from lxml import html
5
 from searx.engines.xpath import extract_text, extract_url
6
 from searx.engines.xpath import extract_text, extract_url
6
 
7
 
33
     dom = html.fromstring(resp.text)
34
     dom = html.fromstring(resp.text)
34
 
35
 
35
     for result in dom.xpath(results_xpath):
36
     for result in dom.xpath(results_xpath):
36
-        url = extract_url(result.xpath(url_xpath), search_url)
37
+        url_string = extract_url(result.xpath(url_xpath), search_url)
38
+        start = url_string.find('/RU=')+4
39
+        end = url_string.rfind('/RS')
40
+        url = unquote(url_string[start:end])
37
         title = extract_text(result.xpath(title_xpath)[0])
41
         title = extract_text(result.xpath(title_xpath)[0])
38
         content = extract_text(result.xpath(content_xpath)[0])
42
         content = extract_text(result.xpath(content_xpath)[0])
39
         results.append({'url': url, 'title': title, 'content': content})
43
         results.append({'url': url, 'title': title, 'content': content})