by Kozo Nishida (Riken, Japan)
This example demonstrates how to integrate transcriptome data (preprocessed with bioconductor packages) with KEGG pathways and visualize it in Cytoscape.
import requests
import json
# Basic Setup
PORT_NUMBER = 1234
BASE_URL = "http://localhost:" + str(PORT_NUMBER) + "/v1/"
# Header for posting data to the server as JSON
HEADERS = {'Content-Type': 'application/json'}
# Delete all networks in current session
requests.delete(BASE_URL + 'session')
<Response [200]>
pathway_location = "http://rest.kegg.jp/get/eco00260/kgml"
res1 = requests.post(BASE_URL + "networks?source=url", data=json.dumps([pathway_location]), headers=HEADERS)
result = json.loads(res1.content)
pathway_suid = result[0]["networkSUID"][0]
print("Pathway SUID = " + str(pathway_suid))
Pathway SUID = 70708
source("http://bioconductor.org/biocLite.R")
biocLite(c("genefilter", "ecoliLeucine"))
library("ecoliLeucine")
library("genefilter")
data("ecoliLeucine")
eset = rma(ecoliLeucine)
r = rowttests(eset, eset$strain)
filtered = r[r$p.value < 0.05,]
write.csv(filtered, file="ttest.csv")
import pandas as pd
ttest_df = pd.read_csv('ttest.csv')
ttest_df.head()
Unnamed: 0 | statistic | dm | p.value | |
---|---|---|---|---|
0 | IG_1070_1689385_1697378_fwd_f_st | 2.459792 | 0.082383 | 0.049133 |
1 | IG_10_10495_10642_rev_st | -3.009316 | -0.046399 | 0.023721 |
2 | IG_1110_1744617_1744723_fwd_st | -2.515037 | -0.169626 | 0.045592 |
3 | IG_1145_1805715_1805819_fwd_st | 3.556263 | 0.368773 | 0.011981 |
4 | IG_1189_1874879_1874911_fwd_st | -2.875842 | -0.276748 | 0.028211 |
deftable = requests.get('http://localhost:1234/v1/networks/' + str(pathway_suid) + '/tables/defaultnode.tsv')
handle = open('defaultnode.tsv','w')
handle.write(deftable.content)
handle.close()
deftable_df = pd.read_table('defaultnode.tsv')
deftable_df.head()
SUID | shared name | name | selected | KEGG_NODE_X | KEGG_NODE_Y | KEGG_NODE_WIDTH | KEGG_NODE_HEIGHT | KEGG_NODE_LABEL | KEGG_NODE_LABEL_LIST_FIRST | KEGG_NODE_LABEL_LIST | KEGG_ID | KEGG_NODE_LABEL_COLOR | KEGG_NODE_FILL_COLOR | KEGG_NODE_REACTIONID | KEGG_NODE_TYPE | KEGG_NODE_SHAPE | KEGG_LINK | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 70718 | path:eco00260:46 | path:eco00260:46 | False | 162 | 547 | 46 | 17 | K17755 | K17755 | K17755 | ko:K17755 | #000000 | #FFFFFF | rn:R08211 | ortholog | rectangle | http://www.kegg.jp/dbget-bin/www_bget?K17755 |
1 | 70719 | path:eco00260:47 | path:eco00260:47 | False | 688 | 222 | 46 | 17 | K12235 | K12235 | K12235 | ko:K12235 | #000000 | #FFFFFF | rn:R00589 | ortholog | rectangle | http://www.kegg.jp/dbget-bin/www_bget?K12235 |
2 | 70720 | path:eco00260:48 | path:eco00260:48 | False | 1079 | 930 | 8 | 8 | C16432 | 5-Hydroxyectoine | C16432 | cpd:C16432 | #000000 | #FFFFFF | NaN | compound | circle | http://www.kegg.jp/dbget-bin/www_bget?C16432 |
3 | 70721 | path:eco00260:49 | path:eco00260:49 | False | 1023 | 930 | 46 | 17 | K10674 | K10674 | K10674 | ko:K10674 | #000000 | #FFFFFF | rn:R08050 | ortholog | rectangle | http://www.kegg.jp/dbget-bin/www_bget?K10674 |
4 | 70722 | path:eco00260:50 | path:eco00260:50 | False | 99 | 464 | 46 | 17 | K00499 | K00499 | K00499 | ko:K00499 | #000000 | #FFFFFF | rn:R07409 | ortholog | rectangle | http://www.kegg.jp/dbget-bin/www_bget?K00499 |
import re
bnum_re = re.compile('b[0-9]{4}')
keggids = []
keggnode_labels = []
for index, probe in ttest_df['Unnamed: 0'].iteritems():
m = bnum_re.search(probe)
if m:
keggids.append(None)
keggnode_labels.append(None)
for i, keggid in deftable_df['KEGG_ID'].iteritems():
if m.group(0) in keggid:
keggids.pop()
keggids.append(keggid)
keggnode_labels.pop()
keggnode_labels.append(deftable_df['KEGG_NODE_LABEL'][i])
else:
keggids.append(None)
keggnode_labels.append(None)
s1 = pd.Series(keggids, name='KEGG_ID_INPATHWAY')
s2 = pd.Series(keggnode_labels, name='KEGG_NODE_LABEL_INPATHWAY')
merged_df = pd.concat([ttest_df, s1, s2], axis=1)
merged_df.head()
Unnamed: 0 | statistic | dm | p.value | KEGG_ID_INPATHWAY | KEGG_NODE_LABEL_INPATHWAY | |
---|---|---|---|---|---|---|
0 | IG_1070_1689385_1697378_fwd_f_st | 2.459792 | 0.082383 | 0.049133 | None | None |
1 | IG_10_10495_10642_rev_st | -3.009316 | -0.046399 | 0.023721 | None | None |
2 | IG_1110_1744617_1744723_fwd_st | -2.515037 | -0.169626 | 0.045592 | None | None |
3 | IG_1145_1805715_1805819_fwd_st | 3.556263 | 0.368773 | 0.011981 | None | None |
4 | IG_1189_1874879_1874911_fwd_st | -2.875842 | -0.276748 | 0.028211 | None | None |
ttestjson = json.loads(merged_df.to_json(orient="records"))
new_table_data = {
"key": "KEGG_NODE_LABEL",
"dataKey": "KEGG_NODE_LABEL_INPATHWAY",
"data" : ttestjson
}
update_table_url = BASE_URL + "networks/" + str(pathway_suid) + "/tables/defaultnode"
print(update_table_url)
requests.put(update_table_url, data=json.dumps(new_table_data), headers=HEADERS)
http://localhost:1234/v1/networks/70708/tables/defaultnode
<Response [200]>
You can see the t-test results in Cytoscape default node table!
This workflow integrates data, but visualization part is not fully automated. This is a TODO item...