Explorar el Código

Merge 0f4ac029e57c5091add3051cd00e61dd613cf615 into f82ead3e303d75ba63a370dc038311e172e1330d

Alexandre Flament hace 6 años
padre
commit
157106c9da
Ninguna cuenta está vinculada al correo electrónico del colaborador
Se han modificado 3 ficheros con 92 adiciones y 19 borrados
  1. 15
    0
      searx/data/useragents.json
  2. 8
    19
      searx/utils.py
  3. 69
    0
      utils/fetch_firefox_version.py

+ 15
- 0
searx/data/useragents.json Ver fichero

@@ -0,0 +1,15 @@
1
+{
2
+    "os": [
3
+        "Windows NT 10; WOW64", 
4
+        "X11; Linux x86_64"
5
+    ], 
6
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}", 
7
+    "versions": [
8
+        "59.0.2", 
9
+        "59.0.1", 
10
+        "59.0", 
11
+        "58.0.2", 
12
+        "58.0.1", 
13
+        "58.0"
14
+    ]
15
+}

+ 8
- 19
searx/utils.py Ver fichero

@@ -10,8 +10,10 @@ from codecs import getincrementalencoder
10 10
 from imp import load_source
11 11
 from numbers import Number
12 12
 from os.path import splitext, join
13
+from io import open
13 14
 from random import choice
14 15
 import sys
16
+import json
15 17
 
16 18
 from searx import settings
17 19
 from searx.version import VERSION_STRING
@@ -39,28 +41,11 @@ else:
39 41
 
40 42
 logger = logger.getChild('utils')
41 43
 
42
-ua_versions = ('40.0',
43
-               '41.0',
44
-               '42.0',
45
-               '43.0',
46
-               '44.0',
47
-               '45.0',
48
-               '46.0',
49
-               '47.0')
50
-
51
-ua_os = ('Windows NT 6.3; WOW64',
52
-         'X11; Linux x86_64',
53
-         'X11; Linux x86')
54
-
55
-ua = "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
56
-
57 44
 blocked_tags = ('script',
58 45
                 'style')
59 46
 
60
-
61
-def gen_useragent():
62
-    # TODO
63
-    return ua.format(os=choice(ua_os), version=choice(ua_versions))
47
+useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
48
+                             + "/data/useragents.json", 'r', encoding='utf-8').read())
64 49
 
65 50
 
66 51
 def searx_useragent():
@@ -69,6 +54,10 @@ def searx_useragent():
69 54
            suffix=settings['outgoing'].get('useragent_suffix', ''))
70 55
 
71 56
 
57
+def gen_useragent():
58
+    return str(useragents['ua'].format(os=choice(useragents['os']), version=choice(useragents['versions'])))
59
+
60
+
72 61
 def highlight_content(content, query):
73 62
 
74 63
     if not content:

+ 69
- 0
utils/fetch_firefox_version.py Ver fichero

@@ -0,0 +1,69 @@
1
+#!/usr/bin/env python
2
+
3
+# set path
4
+from sys import path
5
+from os.path import realpath, dirname
6
+path.append(realpath(dirname(realpath(__file__)) + '/../'))
7
+
8
+#
9
+import json
10
+import requests
11
+import re
12
+from distutils.version import LooseVersion, StrictVersion
13
+from lxml import html
14
+from searx.url_utils import urlparse, urljoin
15
+
16
+URL = 'https://ftp.mozilla.org/pub/firefox/releases/'
17
+RELEASE_PATH = '/pub/firefox/releases/'
18
+
19
+NORMAL_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?(esr)?$')
20
+# BETA_REGEX = re.compile('.*[0-9]b([0-9\-a-z]+)$')
21
+# ESR_REGEX = re.compile('^[0-9]+\.[0-9](\.[0-9])?esr$')
22
+
23
+# 
24
+useragent = {
25
+    "versions": (),
26
+    "os": ('Windows NT 10; WOW64',
27
+           'X11; Linux x86_64'),
28
+    "ua": "Mozilla/5.0 ({os}; rv:{version}) Gecko/20100101 Firefox/{version}"
29
+}
30
+
31
+
32
+def fetch_firefox_versions():
33
+    resp = requests.get(URL, timeout=2.0)
34
+    if resp.status_code != 200:
35
+        raise Exception("Error fetching firefox versions, HTTP code " + resp.status_code)
36
+    else:
37
+        dom = html.fromstring(resp.text)
38
+        versions = []
39
+
40
+        for link in dom.xpath('//a/@href'):
41
+            url = urlparse(urljoin(URL, link))
42
+            path = url.path
43
+            if path.startswith(RELEASE_PATH):
44
+                version = path[len(RELEASE_PATH):-1]
45
+                if NORMAL_REGEX.match(version):
46
+                    versions.append(LooseVersion(version))
47
+
48
+        list.sort(versions, reverse=True)
49
+        return versions
50
+
51
+
52
+def fetch_firefox_last_versions():
53
+    versions = fetch_firefox_versions()
54
+
55
+    result = []
56
+    major_last = versions[0].version[0]
57
+    major_list = (major_last, major_last - 1)
58
+    for version in versions:
59
+        major_current = version.version[0]
60
+        if major_current in major_list and 'esr' not in version.version:
61
+            result.append(version.vstring)
62
+
63
+    return result
64
+
65
+
66
+useragent["versions"] = fetch_firefox_last_versions()
67
+f = open("../searx/data/useragents.json", "wb")
68
+json.dump(useragent, f, sort_keys=True, indent=4, ensure_ascii=False, encoding="utf-8")
69
+f.close()