Browse Source

initial commit of pdbe engine

Adds support for queries to the Protein Data Bank Europe (PDBe).
Alexander Minges 8 years ago
parent
commit
3c5883408c
4 changed files with 226 additions and 0 deletions
  1. 1
    0
      AUTHORS.rst
  2. 109
    0
      searx/engines/pdbe.py
  3. 7
    0
      searx/settings.yml
  4. 109
    0
      tests/unit/engines/test_pdbe.py

+ 1
- 0
AUTHORS.rst View File

@@ -59,3 +59,4 @@ generally made searx better:
59 59
 - Harry Wood @harry-wood
60 60
 - Thomas Renard @threnard
61 61
 - Pydo `<https://github.com/pydo>`_
62
+- Athemis `<https://github.com/Athemis>`_

+ 109
- 0
searx/engines/pdbe.py View File

@@ -0,0 +1,109 @@
1
+"""
2
+ PDBe (Protein Data Bank in Europe)
3
+
4
+ @website       https://www.ebi.ac.uk/pdbe
5
+ @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html),
6
+                unlimited
7
+ @using-api     yes
8
+ @results       python dictionary (from json)
9
+ @stable        yes
10
+ @parse         url, title, content, img_src
11
+"""
12
+
13
+from json import loads
14
+from flask_babel import gettext
15
+
16
+categories = ['science']
17
+
18
+hide_obsolete = False
19
+
20
+# status codes of unpublished entries
21
+pdb_unpublished_codes = ['HPUB', 'HOLD', 'PROC', 'WAIT', 'AUTH', 'AUCO', 'REPL', 'POLC', 'REFI', 'TRSF', 'WDRN']
22
+# url for api query
23
+pdbe_solr_url = 'https://www.ebi.ac.uk/pdbe/search/pdb/select?'
24
+# base url for results
25
+pdbe_entry_url = 'https://www.ebi.ac.uk/pdbe/entry/pdb/{pdb_id}'
26
+# link to preview image of structure
27
+pdbe_preview_url = 'https://www.ebi.ac.uk/pdbe/static/entry/{pdb_id}_deposited_chain_front_image-200x200.png'
28
+
29
+
30
+def request(query, params):
31
+
32
+    params['url'] = pdbe_solr_url
33
+    params['method'] = 'POST'
34
+    params['data'] = {
35
+        'q': query,
36
+        'wt': "json"  # request response in parsable format
37
+    }
38
+    return params
39
+
40
+
41
+def construct_body(result):
42
+    # set title
43
+    title = result['title']
44
+
45
+    # construct content body
46
+    content = """{title}<br />{authors} {journal} <strong>{volume}</strong>&nbsp;{page} ({year})"""
47
+
48
+    # replace placeholders with actual content
49
+    try:
50
+        if result['journal']:
51
+            content = content.format(
52
+                title=result['citation_title'],
53
+                authors=result['entry_author_list'][0], journal=result['journal'], volume=result['journal_volume'],
54
+                page=result['journal_page'], year=result['citation_year'])
55
+        else:
56
+            content = content.format(
57
+                title=result['citation_title'],
58
+                authors=result['entry_author_list'][0], journal='', volume='', page='', year=result['release_year'])
59
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
60
+    except (KeyError):
61
+        content = None
62
+        img_src = None
63
+
64
+    # construct url for preview image
65
+    try:
66
+        img_src = pdbe_preview_url.format(pdb_id=result['pdb_id'])
67
+    except (KeyError):
68
+        img_src = None
69
+
70
+    return [title, content, img_src]
71
+
72
+
73
+def response(resp):
74
+
75
+    results = []
76
+    json = loads(resp.text)['response']['docs']
77
+
78
+    # parse results
79
+    for result in json:
80
+        # catch obsolete entries and mark them accordingly
81
+        if result['status'] in pdb_unpublished_codes:
82
+            continue
83
+        if hide_obsolete:
84
+            continue
85
+        if result['status'] == 'OBS':
86
+            # expand title to add some sort of warning message
87
+            title = gettext('{title}&nbsp;(OBSOLETE)').format(title=result['title'])
88
+            superseded_url = pdbe_entry_url.format(pdb_id=result['superseded_by'])
89
+
90
+            # since we can't construct a proper body from the response, we'll make up our own
91
+            msg_superseded = gettext("This entry has been superseded by")
92
+            content = '<em>{msg_superseded} \<a href="{url}">{pdb_id}</a></em>'.format(
93
+                msg_superseded=msg_superseded,
94
+                url=superseded_url,
95
+                pdb_id=result['superseded_by'], )
96
+
97
+            # obsoleted entries don't have preview images
98
+            img_src = None
99
+        else:
100
+            title, content, img_src = construct_body(result)
101
+
102
+        results.append({
103
+            'url': pdbe_entry_url.format(pdb_id=result['pdb_id']),
104
+            'title': title,
105
+            'content': content,
106
+            'img_src': img_src
107
+        })
108
+
109
+    return results

+ 7
- 0
searx/settings.yml View File

@@ -339,6 +339,13 @@ engines:
339 339
     disabled : True
340 340
     shortcut : or
341 341
 
342
+  - name : pdbe
343
+    engine : pdbe
344
+    shortcut : pdb
345
+# Hide obsolete PDB entries.
346
+# Default is not to hide obsolete structures
347
+#    hide_obsolete : False
348
+
342 349
   - name : photon
343 350
     engine : photon
344 351
     shortcut : ph

+ 109
- 0
tests/unit/engines/test_pdbe.py View File

@@ -0,0 +1,109 @@
1
+import mock
2
+from collections import defaultdict
3
+from searx.engines import pdbe
4
+from searx.testing import SearxTestCase
5
+
6
+
7
+class TestPdbeEngine(SearxTestCase):
8
+    def test_request(self):
9
+        query = 'test_query'
10
+        dicto = defaultdict(dict)
11
+        params = pdbe.request(query, dicto)
12
+        self.assertTrue('url' in params)
13
+        self.assertTrue('ebi.ac.uk' in params['url'])
14
+        self.assertTrue('data' in params)
15
+        self.assertTrue('q' in params['data'])
16
+        self.assertTrue(query in params['data']['q'])
17
+        self.assertTrue('wt' in params['data'])
18
+        self.assertTrue('json' in params['data']['wt'])
19
+        self.assertTrue('method' in params)
20
+        self.assertTrue(params['method'] == 'POST')
21
+
22
+    def test_response(self):
23
+        self.assertRaises(AttributeError, pdbe.response, None)
24
+        self.assertRaises(AttributeError, pdbe.response, [])
25
+        self.assertRaises(AttributeError, pdbe.response, '')
26
+        self.assertRaises(AttributeError, pdbe.response, '[]')
27
+
28
+        json = """
29
+{
30
+  "response": {
31
+    "docs": [
32
+      {
33
+        "citation_title": "X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.",
34
+        "citation_year": 1993,
35
+        "entry_author_list": [
36
+          "Conti E, Moser C, Rizzi M, Mattevi A, Lionetti C, Coda A, Ascenzi P, Brunori M, Bolognesi M"
37
+        ],
38
+        "journal": "J. Mol. Biol.",
39
+        "journal_page": "498-508",
40
+        "journal_volume": "233",
41
+        "pdb_id": "2fal",
42
+        "status": "REL",
43
+        "title": "X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES"
44
+      }
45
+    ],
46
+    "numFound": 1,
47
+    "start": 0
48
+  },
49
+  "responseHeader": {
50
+    "QTime": 0,
51
+    "params": {
52
+      "q": "2fal",
53
+      "wt": "json"
54
+    },
55
+    "status": 0
56
+  }
57
+}
58
+"""
59
+
60
+        response = mock.Mock(text=json)
61
+        results = pdbe.response(response)
62
+        self.assertEqual(type(results), list)
63
+        self.assertEqual(len(results), 1)
64
+        self.assertEqual(results[0]['title'],
65
+                         'X-RAY CRYSTAL STRUCTURE OF FERRIC APLYSIA LIMACINA MYOGLOBIN IN DIFFERENT LIGANDED STATES')
66
+        self.assertEqual(results[0]['url'], pdbe.pdbe_entry_url.format(pdb_id='2fal'))
67
+        self.assertEqual(results[0]['img_src'], pdbe.pdbe_preview_url.format(pdb_id='2fal'))
68
+        self.assertTrue('Conti E' in results[0]['content'])
69
+        self.assertTrue('X-ray crystal structure of ferric Aplysia limacina myoglobin in different liganded states.' in
70
+                        results[0]['content'])
71
+        self.assertTrue('1993' in results[0]['content'])
72
+
73
+        # Testing proper handling of PDB entries marked as obsolete
74
+        json = """
75
+{
76
+  "response": {
77
+    "docs": [
78
+      {
79
+        "citation_title": "Obsolete entry test",
80
+        "citation_year": 2016,
81
+        "entry_author_list": ["Doe J"],
82
+        "journal": "J. Obs.",
83
+        "journal_page": "1-2",
84
+        "journal_volume": "1",
85
+        "pdb_id": "xxxx",
86
+        "status": "OBS",
87
+        "title": "OBSOLETE ENTRY TEST",
88
+        "superseded_by": "yyyy"
89
+      }
90
+    ],
91
+    "numFound": 1,
92
+    "start": 0
93
+  },
94
+  "responseHeader": {
95
+    "QTime": 0,
96
+    "params": {
97
+      "q": "xxxx",
98
+      "wt": "json"
99
+    },
100
+    "status": 0
101
+  }
102
+}
103
+"""
104
+        response = mock.Mock(text=json)
105
+        results = pdbe.response(response)
106
+        self.assertEqual(type(results), list)
107
+        self.assertEqual(len(results), 1)
108
+        self.assertEqual(results[0]['title'], 'OBSOLETE ENTRY TEST&nbsp;(OBSOLETE)')
109
+        self.assertTrue(results[0]['content'].startswith('<em>This entry has been superseded by'))