|  | @@ -5,17 +5,17 @@ import unicodedata
 | 
	
		
			
			| 5 | 5 |  import string
 | 
	
		
			
			| 6 | 6 |  from urllib import urlencode
 | 
	
		
			
			| 7 | 7 |  from requests import get
 | 
	
		
			
			| 8 |  | - 
 | 
	
		
			
			|  | 8 | +
 | 
	
		
			
			| 9 | 9 |  languages = {'de', 'en', 'es', 'fr', 'hu', 'it', 'nl', 'jp'}
 | 
	
		
			
			| 10 |  | - 
 | 
	
		
			
			|  | 10 | +
 | 
	
		
			
			| 11 | 11 |  url_template = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&{query}&props=labels%7Cdatatype%7Cclaims%7Caliases&languages=' + '|'.join(languages)
 | 
	
		
			
			| 12 |  | -url_wmflabs_template = 'http://wdq.wmflabs.org/api?q=' 
 | 
	
		
			
			|  | 12 | +url_wmflabs_template = 'http://wdq.wmflabs.org/api?q='
 | 
	
		
			
			| 13 | 13 |  url_wikidata_search_template='http://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
 | 
	
		
			
			| 14 |  | - 
 | 
	
		
			
			| 15 |  | -wmflabs_queries = [ 
 | 
	
		
			
			|  | 14 | +
 | 
	
		
			
			|  | 15 | +wmflabs_queries = [
 | 
	
		
			
			| 16 | 16 |      'CLAIM[31:8142]', # all devise
 | 
	
		
			
			| 17 | 17 |  ]
 | 
	
		
			
			| 18 |  | - 
 | 
	
		
			
			|  | 18 | +
 | 
	
		
			
			| 19 | 19 |  db = {
 | 
	
		
			
			| 20 | 20 |      'iso4217' : {
 | 
	
		
			
			| 21 | 21 |          },
 | 
	
	
		
			
			|  | @@ -26,7 +26,7 @@ db = {
 | 
	
		
			
			| 26 | 26 |  
 | 
	
		
			
			| 27 | 27 |  def remove_accents(data):
 | 
	
		
			
			| 28 | 28 |      return unicodedata.normalize('NFKD', data).lower()
 | 
	
		
			
			| 29 |  | -        
 | 
	
		
			
			|  | 29 | +
 | 
	
		
			
			| 30 | 30 |  
 | 
	
		
			
			| 31 | 31 |  def normalize_name(name):
 | 
	
		
			
			| 32 | 32 |      return re.sub(' +',' ', remove_accents(name.lower()).replace('-', ' '))
 | 
	
	
		
			
			|  | @@ -66,22 +66,22 @@ def get_property_value(data, name):
 | 
	
		
			
			| 66 | 66 |      prop = data.get('claims', {}).get(name, {})
 | 
	
		
			
			| 67 | 67 |      if len(prop) == 0:
 | 
	
		
			
			| 68 | 68 |          return None
 | 
	
		
			
			| 69 |  | -    
 | 
	
		
			
			| 70 |  | -    value = prop[0].get('mainsnak', {}).get('datavalue', {}).get('value', '') 
 | 
	
		
			
			|  | 69 | +
 | 
	
		
			
			|  | 70 | +    value = prop[0].get('mainsnak', {}).get('datavalue', {}).get('value', '')
 | 
	
		
			
			| 71 | 71 |      if value == '':
 | 
	
		
			
			| 72 | 72 |          return None
 | 
	
		
			
			| 73 | 73 |  
 | 
	
		
			
			| 74 | 74 |      return value
 | 
	
		
			
			| 75 |  | -    
 | 
	
		
			
			|  | 75 | +
 | 
	
		
			
			| 76 | 76 |  
 | 
	
		
			
			| 77 | 77 |  def parse_currency(data):
 | 
	
		
			
			| 78 | 78 |      iso4217 = get_property_value(data, 'P498')
 | 
	
		
			
			| 79 |  | -        
 | 
	
		
			
			|  | 79 | +
 | 
	
		
			
			| 80 | 80 |      if iso4217 is not None:
 | 
	
		
			
			| 81 | 81 |          unit = get_property_value(data, 'P558')
 | 
	
		
			
			| 82 | 82 |          if unit is not None:
 | 
	
		
			
			| 83 | 83 |              add_currency_name(unit, iso4217)
 | 
	
		
			
			| 84 |  | -                
 | 
	
		
			
			|  | 84 | +
 | 
	
		
			
			| 85 | 85 |          labels = data.get('labels', {})
 | 
	
		
			
			| 86 | 86 |          for language in languages:
 | 
	
		
			
			| 87 | 87 |              name = labels.get(language, {}).get('value', None)
 | 
	
	
		
			
			|  | @@ -95,22 +95,22 @@ def parse_currency(data):
 | 
	
		
			
			| 95 | 95 |                  alias = aliases[language][i].get('value', None)
 | 
	
		
			
			| 96 | 96 |                  add_currency_name(alias, iso4217)
 | 
	
		
			
			| 97 | 97 |  
 | 
	
		
			
			| 98 |  | - 
 | 
	
		
			
			|  | 98 | +
 | 
	
		
			
			| 99 | 99 |  def fetch_data(wikidata_ids):
 | 
	
		
			
			| 100 | 100 |      url = url_template.format(query=urlencode({'ids' : '|'.join(wikidata_ids)}))
 | 
	
		
			
			| 101 | 101 |      htmlresponse = get(url)
 | 
	
		
			
			| 102 | 102 |      jsonresponse = json.loads(htmlresponse.content)
 | 
	
		
			
			| 103 | 103 |      entities = jsonresponse.get('entities', {})
 | 
	
		
			
			| 104 |  | - 
 | 
	
		
			
			|  | 104 | +
 | 
	
		
			
			| 105 | 105 |      for pname in entities:
 | 
	
		
			
			| 106 | 106 |          pvalue = entities.get(pname)
 | 
	
		
			
			| 107 | 107 |          parse_currency(pvalue)
 | 
	
		
			
			| 108 |  | - 
 | 
	
		
			
			| 109 |  | - 
 | 
	
		
			
			|  | 108 | +
 | 
	
		
			
			|  | 109 | +
 | 
	
		
			
			| 110 | 110 |  def add_q(i):
 | 
	
		
			
			| 111 | 111 |      return "Q" + str(i)
 | 
	
		
			
			| 112 |  | - 
 | 
	
		
			
			| 113 |  | - 
 | 
	
		
			
			|  | 112 | +
 | 
	
		
			
			|  | 113 | +
 | 
	
		
			
			| 114 | 114 |  def fetch_data_batch(wikidata_ids):
 | 
	
		
			
			| 115 | 115 |      while len(wikidata_ids) > 0:
 | 
	
		
			
			| 116 | 116 |          if len(wikidata_ids) > 50:
 | 
	
	
		
			
			|  | @@ -119,8 +119,8 @@ def fetch_data_batch(wikidata_ids):
 | 
	
		
			
			| 119 | 119 |          else:
 | 
	
		
			
			| 120 | 120 |              fetch_data(wikidata_ids)
 | 
	
		
			
			| 121 | 121 |              wikidata_ids = []
 | 
	
		
			
			| 122 |  | -    
 | 
	
		
			
			| 123 |  | - 
 | 
	
		
			
			|  | 122 | +
 | 
	
		
			
			|  | 123 | +
 | 
	
		
			
			| 124 | 124 |  def wdq_query(query):
 | 
	
		
			
			| 125 | 125 |      url = url_wmflabs_template + query
 | 
	
		
			
			| 126 | 126 |      htmlresponse = get(url)
 | 
	
	
		
			
			|  | @@ -131,23 +131,23 @@ def wdq_query(query):
 | 
	
		
			
			| 131 | 131 |          print "error for query '" + query + "' :" + error
 | 
	
		
			
			| 132 | 132 |  
 | 
	
		
			
			| 133 | 133 |      fetch_data_batch(qlist)
 | 
	
		
			
			| 134 |  | - 
 | 
	
		
			
			| 135 |  | - 
 | 
	
		
			
			|  | 134 | +
 | 
	
		
			
			|  | 135 | +
 | 
	
		
			
			| 136 | 136 |  def wd_query(query, offset=0):
 | 
	
		
			
			| 137 | 137 |      qlist = []
 | 
	
		
			
			| 138 |  | - 
 | 
	
		
			
			|  | 138 | +
 | 
	
		
			
			| 139 | 139 |      url = url_wikidata_search_template.format(query=urlencode({'srsearch': query, 'srlimit': 50, 'sroffset': offset}))
 | 
	
		
			
			| 140 | 140 |      htmlresponse = get(url)
 | 
	
		
			
			| 141 | 141 |      jsonresponse = json.loads(htmlresponse.content)
 | 
	
		
			
			| 142 | 142 |      for r in jsonresponse.get('query', {}).get('search', {}):
 | 
	
		
			
			| 143 | 143 |          qlist.append(r.get('title', ''))
 | 
	
		
			
			| 144 | 144 |      fetch_data_batch(qlist)
 | 
	
		
			
			| 145 |  | - 
 | 
	
		
			
			|  | 145 | +
 | 
	
		
			
			| 146 | 146 |  ## fetch ##
 | 
	
		
			
			| 147 | 147 |  for q in wmflabs_queries:
 | 
	
		
			
			| 148 | 148 |      wdq_query(q)
 | 
	
		
			
			| 149 | 149 |  
 | 
	
		
			
			| 150 |  | -# static 
 | 
	
		
			
			|  | 150 | +# static
 | 
	
		
			
			| 151 | 151 |  add_currency_name(u"euro", 'EUR')
 | 
	
		
			
			| 152 | 152 |  add_currency_name(u"euros", 'EUR')
 | 
	
		
			
			| 153 | 153 |  add_currency_name(u"dollar", 'USD')
 |