|  | @@ -0,0 +1,141 @@
 | 
	
		
			
			|  | 1 | +# -*- coding: utf-8 -*-
 | 
	
		
			
			|  | 2 | +
 | 
	
		
			
			|  | 3 | +"""
 | 
	
		
			
			|  | 4 | + Arch Linux Wiki
 | 
	
		
			
			|  | 5 | +
 | 
	
		
			
			|  | 6 | + @website      https://wiki.archlinux.org
 | 
	
		
			
			|  | 7 | + @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it
 | 
	
		
			
			|  | 8 | + @using-api    no
 | 
	
		
			
			|  | 9 | + @results      HTML
 | 
	
		
			
			|  | 10 | + @stable       no (HTML can change)
 | 
	
		
			
			|  | 11 | + @parse        url, title
 | 
	
		
			
			|  | 12 | +"""
 | 
	
		
			
			|  | 13 | +
 | 
	
		
			
			|  | 14 | +from urlparse import urljoin
 | 
	
		
			
			|  | 15 | +from cgi import escape
 | 
	
		
			
			|  | 16 | +from urllib import urlencode
 | 
	
		
			
			|  | 17 | +from lxml import html
 | 
	
		
			
			|  | 18 | +from searx.engines.xpath import extract_text
 | 
	
		
			
			|  | 19 | +
 | 
	
		
			
			|  | 20 | +# engine dependent config
 | 
	
		
			
			|  | 21 | +categories = ['it']
 | 
	
		
			
			|  | 22 | +language_support = True
 | 
	
		
			
			|  | 23 | +paging = True
 | 
	
		
			
			|  | 24 | +base_url = 'https://wiki.archlinux.org'
 | 
	
		
			
			|  | 25 | +
 | 
	
		
			
			|  | 26 | +# xpath queries
 | 
	
		
			
			|  | 27 | +xpath_results = '//ul[@class="mw-search-results"]/li'
 | 
	
		
			
			|  | 28 | +xpath_link = './/div[@class="mw-search-result-heading"]/a'
 | 
	
		
			
			|  | 29 | +
 | 
	
		
			
			|  | 30 | +
 | 
	
		
			
			|  | 31 | +# cut 'en' from 'en_US', 'de' from 'de_CH', and so on
 | 
	
		
			
			|  | 32 | +def locale_to_lang_code(locale):
 | 
	
		
			
			|  | 33 | +    if locale.find('_') >= 0:
 | 
	
		
			
			|  | 34 | +        locale = locale.split('_')[0]
 | 
	
		
			
			|  | 35 | +    return locale
 | 
	
		
			
			|  | 36 | +
 | 
	
		
			
			|  | 37 | +# wikis for some languages were moved off from the main site, we need to make
 | 
	
		
			
			|  | 38 | +# requests to correct URLs to be able to get results in those languages
 | 
	
		
			
			|  | 39 | +lang_urls = {
 | 
	
		
			
			|  | 40 | +    'all': {
 | 
	
		
			
			|  | 41 | +        'base': 'https://wiki.archlinux.org',
 | 
	
		
			
			|  | 42 | +        'search': '/index.php?title=Special:Search&offset={offset}&{query}'
 | 
	
		
			
			|  | 43 | +    },
 | 
	
		
			
			|  | 44 | +    'de': {
 | 
	
		
			
			|  | 45 | +        'base': 'https://wiki.archlinux.de',
 | 
	
		
			
			|  | 46 | +        'search': '/index.php?title=Spezial:Suche&offset={offset}&{query}'
 | 
	
		
			
			|  | 47 | +    },
 | 
	
		
			
			|  | 48 | +    'fr': {
 | 
	
		
			
			|  | 49 | +        'base': 'https://wiki.archlinux.fr',
 | 
	
		
			
			|  | 50 | +        'search': '/index.php?title=Spécial:Recherche&offset={offset}&{query}'
 | 
	
		
			
			|  | 51 | +    },
 | 
	
		
			
			|  | 52 | +    'ja': {
 | 
	
		
			
			|  | 53 | +        'base': 'https://wiki.archlinuxjp.org',
 | 
	
		
			
			|  | 54 | +        'search': '/index.php?title=特別:検索&offset={offset}&{query}'
 | 
	
		
			
			|  | 55 | +    },
 | 
	
		
			
			|  | 56 | +    'ro': {
 | 
	
		
			
			|  | 57 | +        'base': 'http://wiki.archlinux.ro',
 | 
	
		
			
			|  | 58 | +        'search': '/index.php?title=Special:Căutare&offset={offset}&{query}'
 | 
	
		
			
			|  | 59 | +    },
 | 
	
		
			
			|  | 60 | +    'tr': {
 | 
	
		
			
			|  | 61 | +        'base': 'http://archtr.org/wiki',
 | 
	
		
			
			|  | 62 | +        'search': '/index.php?title=Özel:Ara&offset={offset}&{query}'
 | 
	
		
			
			|  | 63 | +    }
 | 
	
		
			
			|  | 64 | +}
 | 
	
		
			
			|  | 65 | +
 | 
	
		
			
			|  | 66 | +
 | 
	
		
			
			|  | 67 | +# get base & search URLs for selected language
 | 
	
		
			
			|  | 68 | +def get_lang_urls(language):
 | 
	
		
			
			|  | 69 | +    if language in lang_urls:
 | 
	
		
			
			|  | 70 | +        return lang_urls[language]
 | 
	
		
			
			|  | 71 | +    return lang_urls['all']
 | 
	
		
			
			|  | 72 | +
 | 
	
		
			
			|  | 73 | +# Language names to build search requests for
 | 
	
		
			
			|  | 74 | +# those languages which are hosted on the main site.
 | 
	
		
			
			|  | 75 | +main_langs = {
 | 
	
		
			
			|  | 76 | +    'ar': 'العربية',
 | 
	
		
			
			|  | 77 | +    'bg': 'Български',
 | 
	
		
			
			|  | 78 | +    'cs': 'Česky',
 | 
	
		
			
			|  | 79 | +    'da': 'Dansk',
 | 
	
		
			
			|  | 80 | +    'el': 'Ελληνικά',
 | 
	
		
			
			|  | 81 | +    'es': 'Español',
 | 
	
		
			
			|  | 82 | +    'he': 'עברית',
 | 
	
		
			
			|  | 83 | +    'hr': 'Hrvatski',
 | 
	
		
			
			|  | 84 | +    'hu': 'Magyar',
 | 
	
		
			
			|  | 85 | +    'it': 'Italiano',
 | 
	
		
			
			|  | 86 | +    'ko': '한국어',
 | 
	
		
			
			|  | 87 | +    'lt': 'Lietuviškai',
 | 
	
		
			
			|  | 88 | +    'nl': 'Nederlands',
 | 
	
		
			
			|  | 89 | +    'pl': 'Polski',
 | 
	
		
			
			|  | 90 | +    'pt': 'Português',
 | 
	
		
			
			|  | 91 | +    'ru': 'Русский',
 | 
	
		
			
			|  | 92 | +    'sl': 'Slovenský',
 | 
	
		
			
			|  | 93 | +    'th': 'ไทย',
 | 
	
		
			
			|  | 94 | +    'uk': 'Українська',
 | 
	
		
			
			|  | 95 | +    'zh': '简体中文'
 | 
	
		
			
			|  | 96 | +}
 | 
	
		
			
			|  | 97 | +
 | 
	
		
			
			|  | 98 | +
 | 
	
		
			
			|  | 99 | +# do search-request
 | 
	
		
			
			|  | 100 | +def request(query, params):
 | 
	
		
			
			|  | 101 | +    # translate the locale (e.g. 'en_US') to language code ('en')
 | 
	
		
			
			|  | 102 | +    language = locale_to_lang_code(params['language'])
 | 
	
		
			
			|  | 103 | +
 | 
	
		
			
			|  | 104 | +    # if our language is hosted on the main site, we need to add its name
 | 
	
		
			
			|  | 105 | +    # to the query in order to narrow the results to that language
 | 
	
		
			
			|  | 106 | +    if language in main_langs:
 | 
	
		
			
			|  | 107 | +        query += '(' + main_langs[language] + ')'
 | 
	
		
			
			|  | 108 | +
 | 
	
		
			
			|  | 109 | +    # prepare the request parameters
 | 
	
		
			
			|  | 110 | +    query = urlencode({'search': query})
 | 
	
		
			
			|  | 111 | +    offset = (params['pageno'] - 1) * 20
 | 
	
		
			
			|  | 112 | +
 | 
	
		
			
			|  | 113 | +    # get request URLs for our language of choice
 | 
	
		
			
			|  | 114 | +    urls = get_lang_urls(language)
 | 
	
		
			
			|  | 115 | +    search_url = urls['base'] + urls['search']
 | 
	
		
			
			|  | 116 | +
 | 
	
		
			
			|  | 117 | +    params['url'] = search_url.format(query=query, offset=offset)
 | 
	
		
			
			|  | 118 | +
 | 
	
		
			
			|  | 119 | +    return params
 | 
	
		
			
			|  | 120 | +
 | 
	
		
			
			|  | 121 | +
 | 
	
		
			
			|  | 122 | +# get response from search-request
 | 
	
		
			
			|  | 123 | +def response(resp):
 | 
	
		
			
			|  | 124 | +    # get the base URL for the language in which request was made
 | 
	
		
			
			|  | 125 | +    language = locale_to_lang_code(resp.search_params['language'])
 | 
	
		
			
			|  | 126 | +    base_url = get_lang_urls(language)['base']
 | 
	
		
			
			|  | 127 | +
 | 
	
		
			
			|  | 128 | +    results = []
 | 
	
		
			
			|  | 129 | +
 | 
	
		
			
			|  | 130 | +    dom = html.fromstring(resp.text)
 | 
	
		
			
			|  | 131 | +
 | 
	
		
			
			|  | 132 | +    # parse results
 | 
	
		
			
			|  | 133 | +    for result in dom.xpath(xpath_results):
 | 
	
		
			
			|  | 134 | +        link = result.xpath(xpath_link)[0]
 | 
	
		
			
			|  | 135 | +        href = urljoin(base_url, link.attrib.get('href'))
 | 
	
		
			
			|  | 136 | +        title = escape(extract_text(link))
 | 
	
		
			
			|  | 137 | +
 | 
	
		
			
			|  | 138 | +        results.append({'url': href,
 | 
	
		
			
			|  | 139 | +                        'title': title})
 | 
	
		
			
			|  | 140 | +
 | 
	
		
			
			|  | 141 | +    return results
 |