Преглед изворни кода

fix twitter engine and add comments

* add language-support
* add comments
* little refactoring
Thomas Pointhuber пре 10 година
родитељ
комит
9460750fea
1 измењених фајлова са 35 додато и 2 уклоњено
  1. 35
    2
      searx/engines/twitter.py

+ 35
- 2
searx/engines/twitter.py Прегледај датотеку

1
+## Twitter (Social media)
2
+# 
3
+# @website     https://www.bing.com/news
4
+# @provide-api yes (https://dev.twitter.com/docs/using-search)
5
+# 
6
+# @using-api   no
7
+# @results     HTML (using search portal)
8
+# @stable      no (HTML can change)
9
+# @parse       url, title, content
10
+#
11
+# @todo        publishedDate
12
+
1
 from urlparse import urljoin
13
 from urlparse import urljoin
2
 from urllib import urlencode
14
 from urllib import urlencode
3
 from lxml import html
15
 from lxml import html
4
 from cgi import escape
16
 from cgi import escape
5
 
17
 
18
+# engine dependent config
6
 categories = ['social media']
19
 categories = ['social media']
20
+language_support = True
7
 
21
 
22
+# search-url
8
 base_url = 'https://twitter.com/'
23
 base_url = 'https://twitter.com/'
9
 search_url = base_url+'search?'
24
 search_url = base_url+'search?'
25
+
26
+# specific xpath variables
27
+results_xpath = '//li[@data-item-type="tweet"]'
28
+link_xpath = './/small[@class="time"]//a'
10
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
29
 title_xpath = './/span[@class="username js-action-profile-name"]//text()'
11
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
30
 content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()'
12
 
31
 
13
 
32
 
33
+# do search-request
14
 def request(query, params):
34
 def request(query, params):
15
     params['url'] = search_url + urlencode({'q': query})
35
     params['url'] = search_url + urlencode({'q': query})
36
+
37
+    # set language if specified
38
+    if params['language'] != 'all':
39
+        params['cookies']['lang'] = params['language'].split('_')[0]
40
+
16
     return params
41
     return params
17
 
42
 
18
 
43
 
44
+# get response from search-request
19
 def response(resp):
45
 def response(resp):
20
     results = []
46
     results = []
47
+
21
     dom = html.fromstring(resp.text)
48
     dom = html.fromstring(resp.text)
22
-    for tweet in dom.xpath('//li[@data-item-type="tweet"]'):
23
-        link = tweet.xpath('.//small[@class="time"]//a')[0]
49
+
50
+    # parse results
51
+    for tweet in dom.xpath(results_xpath):
52
+        link = tweet.xpath(link_xpath)[0]
24
         url = urljoin(base_url, link.attrib.get('href'))
53
         url = urljoin(base_url, link.attrib.get('href'))
25
         title = ''.join(tweet.xpath(title_xpath))
54
         title = ''.join(tweet.xpath(title_xpath))
26
         content = escape(''.join(tweet.xpath(content_xpath)))
55
         content = escape(''.join(tweet.xpath(content_xpath)))
56
+
57
+        # append result
27
         results.append({'url': url,
58
         results.append({'url': url,
28
                         'title': title,
59
                         'title': title,
29
                         'content': content})
60
                         'content': content})
61
+
62
+    # return results
30
     return results
63
     return results