#!/usr/bin/env python
# coding: utf-8

# In[1]:


from __future__ import print_function

import csv
import sys
import xml.etree.ElementTree as ET
    
import pandas as pd
import requests

if sys.version_info.major == 2:
    from StringIO import StringIO
else:
    from io import StringIO


# In[2]:


def get_psiquic(service, query, full_url=False, **kwargs):
    kwargs['format'] = kwargs.get('format', 'tab27')
    if full_url:
        req = requests.get('%s%s' % (service, query), params=kwargs)
    else:
        server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic'
        req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs)
    if not req.ok:
        req.raise_for_status()
    return req.content


# In[3]:


def get_databases(db_xml):
    for service in db_xml:
        for elem in service:
            ns_clean_tag = elem.tag[elem.tag.find('}') + 1:]
            if ns_clean_tag == 'name':
                name = elem.text
            elif ns_clean_tag == 'active':
                active = False if elem.text == 'false' else True
            elif ns_clean_tag == 'restUrl':
                rest_url = elem.text
            elif ns_clean_tag == 'restExample':
                example = elem.text
            elif ns_clean_tag == 'organizationUrl':
                org_url = elem.text
            else:
                pass  # there are a few more
        yield {'name': name, 'active': active, 'org_url': org_url,
               'example': example, 'rest_url': rest_url}

dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml')
dbs_xml_parsed = ET.fromstring(dbs_xml)
dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed))

pd.options.display.max_colwidth = 100
active_dbs = dbs[dbs.active==True]
active_dbs.drop(['active', 'example', 'rest_url'], 1)


# In[4]:


req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count')
print(req)
for index, db in active_dbs.iterrows():
    req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count')
    count = int(req)
    print('DB: %s, count: %d' % ( db['name'], count))


# In[5]:


req = get_psiquic('intact/webservices/current/search/query', 'tp53',
                  firstResult=0, maxResults=1000)
answer = csv.reader(StringIO(req), delimiter='\t')
db_types = set()
for record in answer:
    db_types.add(record[0].split(':')[0])
    db_types.add(record[1].split(':')[0])
print(db_types)


# In[ ]: