Bläddra i källkod

fix twitter engine and add comments

* add language-support
* add comments
* little refactoring
Thomas Pointhuber 10 år sedan
förälder
incheckning
9460750fea
1 ändrade filer med 35 tillägg och 2 borttagningar
  1. 35
    2
      searx/engines/twitter.py

+ 35
- 2
searx/engines/twitter.py Visa fil

@@ -1,30 +1,63 @@
1
+## Twitter (Social media)
2
+# 
3
+# @website     https://www.bing.com/news
4
+# @provide-api yes (https://dev.twitter.com/docs/using-search)
5
+# 
6
+# @using-api   no
7
+# @results     HTML (using search portal)
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, content
10
+#
11
+# @todo        publishedDate
12
+
1 13
 from urlparse import urljoin
2 14
 from urllib import urlencode
3 15
 from lxml import html
4 16
 from cgi import escape
5 17
 
18
+# engine dependent config
6 19
 categories = ['social media']
20
+language_support = True
7 21
 
22
+# search-url
8 23
 base_url = 'https://twitter.com/'
9 24
 search_url = base_url+'search?'
25
+
26
+# specific xpath variables
27
+results_xpath = '//li[@data-item-type="tweet"]'
28
+link_xpath = './/small[@class="time"]//a'
10 29
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
11 30
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
12 31
 
13 32
 
33
+# do search-request
14 34
 def request(query, params):
15 35
     params['url'] = search_url + urlencode({'q': query})
36
+
37
+    # set language if specified
38
+    if params['language'] != 'all':
39
+        params['cookies']['lang'] = params['language'].split('_')[0]
40
+
16 41
     return params
17 42
 
18 43
 
44
+# get response from search-request
19 45
 def response(resp):
20 46
     results = []
47
+
21 48
     dom = html.fromstring(resp.text)
22
-    for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
23
-        link = tweet.xpath('.//small[@class="time"]//a')[0]
49
+
50
+    # parse results
51
+    for tweet in dom.xpath(results_xpath):
52
+        link = tweet.xpath(link_xpath)[0]
24 53
         url = urljoin(base_url, link.attrib.get('href'))
25 54
         title = ''.join(tweet.xpath(title_xpath))
26 55
         content = escape(''.join(tweet.xpath(content_xpath)))
56
+
57
+        # append result
27 58
         results.append({'url': url,
28 59
                         'title': title,
29 60
                         'content': content})
61
+
62
+    # return results
30 63
     return results