|  | @@ -1,13 +1,12 @@
 | 
	
		
			
			| 1 | 1 |  import json
 | 
	
		
			
			| 2 |  | -from datetime import datetime
 | 
	
		
			
			| 3 | 2 |  from requests import get
 | 
	
		
			
			| 4 | 3 |  from urllib import urlencode
 | 
	
		
			
			|  | 4 | +from datetime import datetime
 | 
	
		
			
			| 5 | 5 |  
 | 
	
		
			
			| 6 | 6 |  resultCount=2
 | 
	
		
			
			| 7 |  | -urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectionsnippet&{query}'
 | 
	
		
			
			|  | 7 | +urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}'
 | 
	
		
			
			| 8 | 8 |  urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}'
 | 
	
		
			
			| 9 |  | -# find the right URL for urlMap
 | 
	
		
			
			| 10 |  | -urlMap = 'http://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
 | 
	
		
			
			|  | 9 | +urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M'
 | 
	
		
			
			| 11 | 10 |  
 | 
	
		
			
			| 12 | 11 |  def request(query, params):
 | 
	
		
			
			| 13 | 12 |      params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount}))
 | 
	
	
		
			
			|  | @@ -18,24 +17,27 @@ def request(query, params):
 | 
	
		
			
			| 18 | 17 |  def response(resp):
 | 
	
		
			
			| 19 | 18 |      results = []
 | 
	
		
			
			| 20 | 19 |      search_res = json.loads(resp.text)
 | 
	
		
			
			| 21 |  | -    # TODO parallel http queries
 | 
	
		
			
			| 22 |  | -    before = datetime.now()
 | 
	
		
			
			|  | 20 | +
 | 
	
		
			
			|  | 21 | +    wikidata_ids = set()
 | 
	
		
			
			| 23 | 22 |      for r in search_res.get('query', {}).get('search', {}):
 | 
	
		
			
			| 24 |  | -        wikidata_id = r.get('title', '')
 | 
	
		
			
			| 25 |  | -        results = results + getDetail(wikidata_id)
 | 
	
		
			
			| 26 |  | -    after = datetime.now()
 | 
	
		
			
			| 27 |  | -    print str(after - before) + " second(s)"
 | 
	
		
			
			|  | 23 | +        wikidata_ids.add(r.get('title', ''))
 | 
	
		
			
			| 28 | 24 |  
 | 
	
		
			
			| 29 |  | -    return results
 | 
	
		
			
			|  | 25 | +    language = resp.search_params['language'].split('_')[0]
 | 
	
		
			
			|  | 26 | +    if language == 'all':
 | 
	
		
			
			|  | 27 | +        language = 'en'
 | 
	
		
			
			|  | 28 | +    url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'}))
 | 
	
		
			
			|  | 29 | +
 | 
	
		
			
			|  | 30 | +    before = datetime.now()
 | 
	
		
			
			|  | 31 | +    htmlresponse = get(url)
 | 
	
		
			
			|  | 32 | +    print datetime.now() - before
 | 
	
		
			
			|  | 33 | +    jsonresponse = json.loads(htmlresponse.content)
 | 
	
		
			
			|  | 34 | +    for wikidata_id in wikidata_ids:
 | 
	
		
			
			|  | 35 | +        results = results + getDetail(jsonresponse, wikidata_id, language)
 | 
	
		
			
			| 30 | 36 |  
 | 
	
		
			
			| 31 |  | -def getDetail(wikidata_id):
 | 
	
		
			
			| 32 |  | -    language = 'fr'
 | 
	
		
			
			|  | 37 | +    return results
 | 
	
		
			
			| 33 | 38 |  
 | 
	
		
			
			| 34 |  | -    url = urlDetail.format(query=urlencode({'ids': wikidata_id, 'languages': language + '|en'}))
 | 
	
		
			
			| 35 |  | -    print url
 | 
	
		
			
			| 36 |  | -    response = get(url)
 | 
	
		
			
			| 37 |  | -    result = json.loads(response.content)
 | 
	
		
			
			| 38 |  | -    result = result.get('entities', {}).get(wikidata_id, {})
 | 
	
		
			
			|  | 39 | +def getDetail(jsonresponse, wikidata_id, language):
 | 
	
		
			
			|  | 40 | +    result = jsonresponse.get('entities', {}).get(wikidata_id, {})
 | 
	
		
			
			| 39 | 41 |      
 | 
	
		
			
			| 40 | 42 |      title = result.get('labels', {}).get(language, {}).get('value', None)
 | 
	
		
			
			| 41 | 43 |      if title == None:
 | 
	
	
		
			
			|  | @@ -50,7 +52,6 @@ def getDetail(wikidata_id):
 | 
	
		
			
			| 50 | 52 |  
 | 
	
		
			
			| 51 | 53 |      claims = result.get('claims', {})
 | 
	
		
			
			| 52 | 54 |      official_website = get_string(claims, 'P856', None)
 | 
	
		
			
			| 53 |  | -    print official_website
 | 
	
		
			
			| 54 | 55 |      if official_website != None:
 | 
	
		
			
			| 55 | 56 |          urls.append({ 'title' : 'Official site', 'url': official_website })
 | 
	
		
			
			| 56 | 57 |          results.append({ 'title': title, 'url' : official_website })
 | 
	
	
		
			
			|  | @@ -98,10 +99,12 @@ def getDetail(wikidata_id):
 | 
	
		
			
			| 98 | 99 |  
 | 
	
		
			
			| 99 | 100 |      return results
 | 
	
		
			
			| 100 | 101 |  
 | 
	
		
			
			|  | 102 | +
 | 
	
		
			
			| 101 | 103 |  def add_url(urls, title, url):
 | 
	
		
			
			| 102 | 104 |      if url != None:
 | 
	
		
			
			| 103 | 105 |          urls.append({'title' : title, 'url' : url})
 | 
	
		
			
			| 104 | 106 |  
 | 
	
		
			
			|  | 107 | +
 | 
	
		
			
			| 105 | 108 |  def get_mainsnak(claims, propertyName):
 | 
	
		
			
			| 106 | 109 |      propValue = claims.get(propertyName, {})
 | 
	
		
			
			| 107 | 110 |      if len(propValue) == 0:
 | 
	
	
		
			
			|  | @@ -110,6 +113,7 @@ def get_mainsnak(claims, propertyName):
 | 
	
		
			
			| 110 | 113 |      propValue = propValue[0].get('mainsnak', None)
 | 
	
		
			
			| 111 | 114 |      return propValue
 | 
	
		
			
			| 112 | 115 |  
 | 
	
		
			
			|  | 116 | +
 | 
	
		
			
			| 113 | 117 |  def get_string(claims, propertyName, defaultValue=None):
 | 
	
		
			
			| 114 | 118 |      propValue = claims.get(propertyName, {})
 | 
	
		
			
			| 115 | 119 |      if len(propValue) == 0:
 | 
	
	
		
			
			|  | @@ -129,6 +133,7 @@ def get_string(claims, propertyName, defaultValue=None):
 | 
	
		
			
			| 129 | 133 |      else:
 | 
	
		
			
			| 130 | 134 |          return ', '.join(result)
 | 
	
		
			
			| 131 | 135 |  
 | 
	
		
			
			|  | 136 | +
 | 
	
		
			
			| 132 | 137 |  def get_time(claims, propertyName, defaultValue=None):
 | 
	
		
			
			| 133 | 138 |      propValue = claims.get(propertyName, {})
 | 
	
		
			
			| 134 | 139 |      if len(propValue) == 0:
 | 
	
	
		
			
			|  | @@ -149,6 +154,7 @@ def get_time(claims, propertyName, defaultValue=None):
 | 
	
		
			
			| 149 | 154 |      else:
 | 
	
		
			
			| 150 | 155 |          return ', '.join(result)
 | 
	
		
			
			| 151 | 156 |  
 | 
	
		
			
			|  | 157 | +
 | 
	
		
			
			| 152 | 158 |  def get_geolink(claims, propertyName, defaultValue=''):
 | 
	
		
			
			| 153 | 159 |      mainsnak = get_mainsnak(claims, propertyName)
 | 
	
		
			
			| 154 | 160 |  
 | 
	
	
		
			
			|  | @@ -182,6 +188,7 @@ def get_geolink(claims, propertyName, defaultValue=''):
 | 
	
		
			
			| 182 | 188 |  
 | 
	
		
			
			| 183 | 189 |      return url
 | 
	
		
			
			| 184 | 190 |  
 | 
	
		
			
			|  | 191 | +
 | 
	
		
			
			| 185 | 192 |  def get_wikilink(result, wikiid):
 | 
	
		
			
			| 186 | 193 |      url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None)
 | 
	
		
			
			| 187 | 194 |      if url == None:
 |