Browse Source

[enh] currency_convert engine: user can write "1 dollars in euros" instead of "1 USD in EUR".

The currency names are fetched wikidata and store into a static file : searx/data/currencies.json
This file is loaded when the currency_converter is loaded.
A database is perhaps more appropiated.
Alexandre Flament 9 years ago
parent
commit
5525625dae

+ 7655
- 0
searx/data/currencies.json
File diff suppressed because it is too large
View File


+ 47
- 3
searx/engines/currency_convert.py View File

@@ -1,11 +1,37 @@
1 1
 from datetime import datetime
2 2
 import re
3
+import os
4
+import json
5
+import unicodedata
6
+
3 7
 
4 8
 categories = []
5 9
 url = 'https://download.finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s={query}=X'
6 10
 weight = 100
7 11
 
8
-parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([a-z]{3})\W*(?:in)?\W*([a-z]{3})\W*$', re.I)  # noqa
12
+parser_re = re.compile(r'^\W*(\d+(?:\.\d+)?)\W*([^.0-9].+)\W*in?\W*([^\.]+)\W*$', re.I)  # noqa
13
+
14
+db = 1
15
+
16
+
17
+def normalize_name(name):
18
+    name = name.lower().replace('-', ' ')
19
+    name = re.sub(' +', ' ', name)
20
+    return unicodedata.normalize('NFKD', u"" + name).lower()
21
+
22
+
23
+def name_to_iso4217(name):
24
+    global db
25
+
26
+    name = normalize_name(name)
27
+    currencies = db['names'].get(name, [name])
28
+    return currencies[0]
29
+
30
+
31
+def iso4217_to_name(iso4217, language):
32
+    global db
33
+
34
+    return db['iso4217'].get(iso4217, {}).get(language, iso4217)
9 35
 
10 36
 
11 37
 def request(query, params):
@@ -16,6 +42,8 @@ def request(query, params):
16 42
 
17 43
     ammount, from_currency, to_currency = m.groups()
18 44
     ammount = float(ammount)
45
+    from_currency = name_to_iso4217(from_currency.strip())
46
+    to_currency = name_to_iso4217(to_currency.strip())
19 47
 
20 48
     q = (from_currency + to_currency).upper()
21 49
 
@@ -23,6 +51,8 @@ def request(query, params):
23 51
     params['ammount'] = ammount
24 52
     params['from'] = from_currency
25 53
     params['to'] = to_currency
54
+    params['from_name'] = iso4217_to_name(from_currency, 'en')
55
+    params['to_name'] = iso4217_to_name(to_currency, 'en')
26 56
 
27 57
     return params
28 58
 
@@ -35,12 +65,14 @@ def response(resp):
35 65
     except:
36 66
         return results
37 67
 
38
-    answer = '{0} {1} = {2} {3} (1 {1} = {4} {3})'.format(
68
+    answer = '{0} {1} = {2} {3}, 1 {1} ({5}) = {4} {3} ({6})'.format(
39 69
         resp.search_params['ammount'],
40 70
         resp.search_params['from'],
41 71
         resp.search_params['ammount'] * conversion_rate,
42 72
         resp.search_params['to'],
43
-        conversion_rate
73
+        conversion_rate,
74
+        resp.search_params['from_name'],
75
+        resp.search_params['to_name'],
44 76
     )
45 77
 
46 78
     now_date = datetime.now().strftime('%Y%m%d')
@@ -55,3 +87,15 @@ def response(resp):
55 87
     results.append({'answer': answer, 'url': url})
56 88
 
57 89
     return results
90
+
91
+
92
+def load():
93
+    global db
94
+
95
+    current_dir = os.path.dirname(os.path.realpath(__file__))
96
+    json_data = open(current_dir + "/../data/currencies.json").read()
97
+
98
+    db = json.loads(json_data)
99
+
100
+
101
+load()

+ 5
- 3
searx/tests/engines/test_currency_convert.py View File

@@ -27,9 +27,11 @@ class TestCurrencyConvertEngine(SearxTestCase):
27 27
 
28 28
     def test_response(self):
29 29
         dicto = defaultdict(dict)
30
-        dicto['ammount'] = 10
30
+        dicto['ammount'] = float(10)
31 31
         dicto['from'] = "EUR"
32 32
         dicto['to'] = "USD"
33
+        dicto['from_name'] = "euro"
34
+        dicto['to_name'] = "United States dollar"
33 35
         response = mock.Mock(text='a,b,c,d', search_params=dicto)
34 36
         self.assertEqual(currency_convert.response(response), [])
35 37
 
@@ -38,7 +40,7 @@ class TestCurrencyConvertEngine(SearxTestCase):
38 40
         results = currency_convert.response(response)
39 41
         self.assertEqual(type(results), list)
40 42
         self.assertEqual(len(results), 1)
41
-        self.assertEqual(results[0]['answer'], '10 EUR = 5.0 USD (1 EUR = 0.5 USD)')
43
+        self.assertEqual(results[0]['answer'], '10.0 EUR = 5.0 USD, 1 EUR (euro) = 0.5 USD (United States dollar)')
42 44
         now_date = datetime.now().strftime('%Y%m%d')
43 45
         self.assertEqual(results[0]['url'], 'https://finance.yahoo.com/currency/converter-results/' +
44
-                                            now_date + '/10-eur-to-usd.html')
46
+                                            now_date + '/10.0-eur-to-usd.html')

+ 161
- 0
utils/fetch_currencies.py View File

@@ -0,0 +1,161 @@
1
+# -*- coding: utf-8 -*-
2
+import json
3
+import re
4
+import unicodedata
5
+import string
6
+from urllib import urlencode
7
+from requests import get
8
+ 
9
+languages = {'de', 'en', 'es', 'fr', 'hu', 'it', 'nl', 'jp'}
10
+ 
11
+url_template = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&{query}&props=labels%7Cdatatype%7Cclaims%7Caliases&languages=' + '|'.join(languages)
12
+url_wmflabs_template = 'http://wdq.wmflabs.org/api?q=' 
13
+url_wikidata_search_template='http://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
14
+ 
15
+wmflabs_queries = [ 
16
+    'CLAIM[31:8142]', # all devise
17
+]
18
+ 
19
+db = {
20
+    'iso4217' : {
21
+        },
22
+    'names' : {
23
+        }
24
+}
25
+
26
+
27
+def remove_accents(data):
28
+    return unicodedata.normalize('NFKD', data).lower()
29
+        
30
+
31
+def normalize_name(name):
32
+    return re.sub(' +',' ', remove_accents(name.lower()).replace('-', ' '))
33
+
34
+
35
+def add_currency_name(name, iso4217):
36
+    global db
37
+
38
+    db_names = db['names']
39
+
40
+
41
+    if not isinstance(iso4217, basestring):
42
+        print "problem", name, iso4217
43
+        return
44
+
45
+    name = normalize_name(name)
46
+
47
+    if name == '':
48
+        print "name empty", iso4217
49
+        return
50
+
51
+    iso4217_set = db_names.get(name, None)
52
+    if iso4217_set is not None and iso4217 not in iso4217_set:
53
+        db_names[name].append(iso4217)
54
+    else:
55
+        db_names[name] = [ iso4217 ]
56
+
57
+
58
+def add_currency_label(label, iso4217, language):
59
+    global db
60
+
61
+    db['iso4217'][iso4217] = db['iso4217'].get(iso4217, {})
62
+    db['iso4217'][iso4217][language] = label
63
+
64
+
65
+def get_property_value(data, name):
66
+    prop = data.get('claims', {}).get(name, {})
67
+    if len(prop) == 0:
68
+        return None
69
+    
70
+    value = prop[0].get('mainsnak', {}).get('datavalue', {}).get('value', '') 
71
+    if value == '':
72
+        return None
73
+
74
+    return value
75
+    
76
+
77
+def parse_currency(data):
78
+    iso4217 = get_property_value(data, 'P498')
79
+        
80
+    if iso4217 is not None:
81
+        unit = get_property_value(data, 'P558')
82
+        if unit is not None:
83
+            add_currency_name(unit, iso4217)
84
+                
85
+        labels = data.get('labels', {})
86
+        for language in languages:
87
+            name = labels.get(language, {}).get('value', None)
88
+            if name != None:
89
+                add_currency_name(name, iso4217)
90
+                add_currency_label(name, iso4217, language)
91
+
92
+        aliases = data.get('aliases', {})
93
+        for language in aliases:
94
+            for i in range(0, len(aliases[language])):
95
+                alias = aliases[language][i].get('value', None)
96
+                add_currency_name(alias, iso4217)
97
+
98
+ 
99
+def fetch_data(wikidata_ids):
100
+    url = url_template.format(query=urlencode({'ids' : '|'.join(wikidata_ids)}))
101
+    htmlresponse = get(url)
102
+    jsonresponse = json.loads(htmlresponse.content)
103
+    entities = jsonresponse.get('entities', {})
104
+ 
105
+    for pname in entities:
106
+        pvalue = entities.get(pname)
107
+        parse_currency(pvalue)
108
+ 
109
+ 
110
+def add_q(i):
111
+    return "Q" + str(i)
112
+ 
113
+ 
114
+def fetch_data_batch(wikidata_ids):
115
+    while len(wikidata_ids) > 0:
116
+        if len(wikidata_ids) > 50:
117
+            fetch_data(wikidata_ids[0:49])
118
+            wikidata_ids = wikidata_ids[50:]
119
+        else:
120
+            fetch_data(wikidata_ids)
121
+            wikidata_ids = []
122
+    
123
+ 
124
+def wdq_query(query):
125
+    url = url_wmflabs_template + query
126
+    htmlresponse = get(url)
127
+    jsonresponse = json.loads(htmlresponse.content)
128
+    qlist = map(add_q, jsonresponse.get('items', {}))
129
+    error = jsonresponse.get('status', {}).get('error', None)
130
+    if error != None and error != 'OK':
131
+        print "error for query '" + query + "' :" + error
132
+
133
+    fetch_data_batch(qlist)
134
+ 
135
+ 
136
+def wd_query(query, offset=0):
137
+    qlist = []
138
+ 
139
+    url = url_wikidata_search_template.format(query=urlencode({'srsearch': query, 'srlimit': 50, 'sroffset': offset}))
140
+    htmlresponse = get(url)
141
+    jsonresponse = json.loads(htmlresponse.content)
142
+    for r in jsonresponse.get('query', {}).get('search', {}):
143
+        qlist.append(r.get('title', ''))
144
+    fetch_data_batch(qlist)
145
+ 
146
+## fetch ##
147
+for q in wmflabs_queries:
148
+    wdq_query(q)
149
+
150
+# static 
151
+add_currency_name(u"euro", 'EUR')
152
+add_currency_name(u"euros", 'EUR')
153
+add_currency_name(u"dollar", 'USD')
154
+add_currency_name(u"dollars", 'USD')
155
+add_currency_name(u"peso", 'MXN')
156
+add_currency_name(u"pesos", 'MXN')
157
+
158
+# write
159
+f = open("currencies.json", "wb")
160
+json.dump(db, f, indent=4, encoding="utf-8")
161
+f.close()