Digg engines, with thumbnails Add pubdate for twitter

10 lat temu · e7e2981536
--- a/searx/engines/digg.py
+++ b/searx/engines/digg.py
 
															+## Digg (News, Social media)
														
 
															+#
														
 
															+# @website     https://digg.com/
														
 
															+# @provide-api no
														
 
															+#
														
 
															+# @using-api   no
														
 
															+# @results     HTML (using search portal)
														
 
															+# @stable      no (HTML can change)
														
 
															+# @parse       url, title, content, publishedDate, thumbnail
														
 
															+
														
 
															+from urllib import quote_plus
														
 
															+from json import loads
														
 
															+from lxml import html
														
 
															+from cgi import escape
														
 
															+from dateutil import parser
														
 
															+
														
 
															+# engine dependent config
														
 
															+categories = ['news', 'social media']
														
 
															+paging = True
														
 
															+
														
 
															+# search-url
														
 
															+base_url = 'https://digg.com/'
														
 
															+search_url = base_url+'api/search/{query}.json?position={position}&format=html'
														
 
															+
														
 
															+# specific xpath variables
														
 
															+results_xpath = '//article'
														
 
															+link_xpath = './/small[@class="time"]//a'
														
 
															+title_xpath = './/h2//a//text()'
														
 
															+content_xpath = './/p//text()'
														
 
															+pubdate_xpath = './/time'
														
 
															+
														
 
															+
														
 
															+# do search-request
														
 
															+def request(query, params):
														
 
															+    offset = (params['pageno'] - 1) * 10
														
 
															+    params['url'] = search_url.format(position=offset,
														
 
															+                                      query=quote_plus(query))
														
 
															+    return params
														
 
															+
														
 
															+
														
 
															+# get response from search-request
														
 
															+def response(resp):
														
 
															+    results = []
														
 
															+
														
 
															+    search_result = loads(resp.text)
														
 
															+
														
 
															+    dom = html.fromstring(search_result['html'])
														
 
															+
														
 
															+    # parse results
														
 
															+    for result in dom.xpath(results_xpath):
														
 
															+        url = result.attrib.get('data-contenturl')
														
 
															+        thumbnail = result.xpath('.//img')[0].attrib.get('src')
														
 
															+        title = ''.join(result.xpath(title_xpath))
														
 
															+        content = escape(''.join(result.xpath(content_xpath)))
														
 
															+        publishedDate = parser.parse(result.xpath(pubdate_xpath)[0].attrib.get('datetime'))
														
 
															+
														
 
															+        # append result
														
 
															+        results.append({'url': url,
														
 
															+                        'title': title,
														
 
															+                        'content': content,
														
 
															+                        'template': 'videos.html',
														
 
															+                        'publishedDate': publishedDate,
														
 
															+                        'thumbnail': thumbnail})
														
 
															+
														
 
															+    # return results
														
 
															+    return results
														
--- a/searx/engines/twitter.py
+++ b/searx/engines/twitter.py
 
															 ## Twitter (Social media)
														
 
															 #
														
 
															-# @website     https://www.bing.com/news
														
 
															+# @website     https://twitter.com/
														
 
															 # @provide-api yes (https://dev.twitter.com/docs/using-search)
														
 
															 #
														
 
															 # @using-api   no
														
 
															 from urllib import urlencode
														
 
															 from lxml import html
														
 
															 from cgi import escape
														
 
															+from datetime import datetime
														
 
															 # engine dependent config
														
 
															 categories = ['social media']
														
 
															 link_xpath = './/small[@class="time"]//a'
														
 
															 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
														
 
															 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
														
 
															+timestamp_xpath = './/span[contains(@class,"_timestamp")]'
														
 
															 # do search-request
														
 
															         url = urljoin(base_url, link.attrib.get('href'))
														
 
															         title = ''.join(tweet.xpath(title_xpath))
														
 
															         content = escape(''.join(tweet.xpath(content_xpath)))
														
 
															-
														
 
															-        # append result
														
 
															-        results.append({'url': url,
														
 
															-                        'title': title,
														
 
															-                        'content': content})
														
 
															+        pubdate = tweet.xpath(timestamp_xpath)
														
 
															+        if len(pubdate) > 0:
														
 
															+            publishedDate = datetime.fromtimestamp(float(pubdate[0].attrib.get('data-time')), None)
														
 
															+            # append result
														
 
															+            results.append({'url': url,
														
 
															+                            'title': title,
														
 
															+                            'content': content,
														
 
															+                            'publishedDate': publishedDate})
														
 
															+        else:
														
 
															+            # append result
														
 
															+            results.append({'url': url,
														
 
															+                            'title': title,
														
 
															+                            'content': content})
														
 
															     # return results
														
 
															     return results
														
--- a/searx/settings.yml
+++ b/searx/settings.yml
 
															   - name : ddg definitions
														
 
															     engine : duckduckgo_definitions
														
 
															     shortcut : ddd
														
 
															+    
														
 
															+  - name : digg
														
 
															+    engine : digg
														
 
															+    shortcut : dg
														
 
															   - name : wikidata
														
 
															     engine : wikidata