11 years ago · b0fd71b7b3
--- a/searx/engines/yahoo_news.py
+++ b/searx/engines/yahoo_news.py
@@ -1,8 +1,9 @@
 
				-## Yahoo (News)
			
 
				-# 
			
 
				+# Yahoo (News)
			
 
				+#
			
 
				 # @website     https://news.yahoo.com
			
 
				-# @provide-api yes (https://developer.yahoo.com/boss/search/), $0.80/1000 queries
			
 
				-# 
			
 
				+# @provide-api yes (https://developer.yahoo.com/boss/search/)
			
 
				+#              $0.80/1000 queries
			
 
				+#
			
 
				 # @using-api   no (because pricing)
			
 
				 # @results     HTML (using search portal)
			
 
				 # @stable      no (HTML can change)
			
@@ -22,7 +23,7 @@ paging = True
 
				 language_support = True
			
 
				 
			
 
				 # search-url
			
 
				-search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}'
			
 
				+search_url = 'https://news.search.yahoo.com/search?{query}&b={offset}&fl=1&vl=lang_{lang}'  # noqa
			
 
				 
			
 
				 # specific xpath variables
			
 
				 results_xpath = '//div[@class="res"]'
			
@@ -41,7 +42,7 @@ def request(query, params):
 
				         language = 'en'
			
 
				     else:
			
 
				         language = params['language'].split('_')[0]
			
 
				-    
			
 
				+
			
 
				     params['url'] = search_url.format(offset=offset,
			
 
				                                       query=urlencode({'p': query}),
			
 
				                                       lang=language)
			
--- a/searx/https_rewrite.py
+++ b/searx/https_rewrite.py
@@ -45,11 +45,9 @@ def load_single_https_ruleset(filepath):
 
				     # get root node
			
 
				     root = tree.getroot()
			
 
				 
			
 
				-    #print(etree.tostring(tree))
			
 
				-
			
 
				     # check if root is a node with the name ruleset
			
 
				     # TODO improve parsing
			
 
				-    if root.tag != 'ruleset':        
			
 
				+    if root.tag != 'ruleset':
			
 
				         return ()
			
 
				 
			
 
				     # check if rule is deactivated by default
			
@@ -68,36 +66,39 @@ def load_single_https_ruleset(filepath):
 
				     for ruleset in root:
			
 
				         # this child define a target
			
 
				         if ruleset.tag == 'target':
			
 
				-            # check if required tags available 
			
 
				+            # check if required tags available
			
 
				             if not ruleset.attrib.get('host'):
			
 
				                 continue
			
 
				 
			
 
				             # convert host-rule to valid regex
			
 
				-            host = ruleset.attrib.get('host').replace('.', '\.').replace('*', '.*')
			
 
				+            host = ruleset.attrib.get('host')\
			
 
				+                .replace('.', '\.').replace('*', '.*')
			
 
				 
			
 
				             # append to host list
			
 
				             hosts.append(host)
			
 
				 
			
 
				         # this child define a rule
			
 
				         elif ruleset.tag == 'rule':
			
 
				-            # check if required tags available 
			
 
				+            # check if required tags available
			
 
				             if not ruleset.attrib.get('from')\
			
 
				                or not ruleset.attrib.get('to'):
			
 
				                 continue
			
 
				 
			
 
				-            # TODO hack, which convert a javascript regex group into a valid python regex group
			
 
				+            # TODO hack, which convert a javascript regex group
			
 
				+            # into a valid python regex group
			
 
				             rule_from = ruleset.attrib.get('from').replace('$', '\\')
			
 
				             rule_to = ruleset.attrib.get('to').replace('$', '\\')
			
 
				 
			
 
				-            # TODO, not working yet because of the hack above, currently doing that in webapp.py
			
 
				-            #rule_from_rgx = re.compile(rule_from, re.I)
			
 
				+            # TODO, not working yet because of the hack above,
			
 
				+            # currently doing that in webapp.py
			
 
				+            # rule_from_rgx = re.compile(rule_from, re.I)
			
 
				 
			
 
				             # append rule
			
 
				             rules.append((rule_from, rule_to))
			
 
				 
			
 
				         # this child define an exclusion
			
 
				         elif ruleset.tag == 'exclusion':
			
 
				-            # check if required tags available 
			
 
				+            # check if required tags available
			
 
				             if not ruleset.attrib.get('pattern'):
			
 
				                 continue
			
 
				 
			
@@ -124,7 +125,9 @@ def load_https_rules(rules_path):
 
				         rules_path += '/'
			
 
				 
			
 
				     # search all xml files which are stored in the https rule directory
			
 
				-    xml_files = [ join(rules_path,f) for f in listdir(rules_path) if isfile(join(rules_path,f)) and f[-4:] == '.xml' ]
			
 
				+    xml_files = [join(rules_path, f)
			
 
				+                 for f in listdir(rules_path)
			
 
				+                 if isfile(join(rules_path, f)) and f[-4:] == '.xml']
			
 
				 
			
 
				     # load xml-files
			
 
				     for ruleset_file in xml_files:
			
@@ -137,5 +140,5 @@ def load_https_rules(rules_path):
 
				 
			
 
				         # append ruleset
			
 
				         https_rules.append(ruleset)
			
 
				-        
			
 
				+
			
 
				     print(' * {n} https-rules loaded'.format(n=len(https_rules)))