import pandas as pd
import numpy as np
import scipy
from IPython.display import HTML
from scipy.cluster.hierarchy import linkage, dendrogram
from scipy.spatial.distance import pdist, squareform
from IPython.display import Image
from pandas import *
GOS=pd.read_csv('../data/matrix_species.out',delimiter=' ',header=None)
#label=pd.read_csv('../data/config-GOS.txt',delimiter=' ',header=None)
pre_label = GOS.iloc[:,0].tolist()
label = []
for stuff in pre_label:
print stuff
label.append(str(stuff).split(".trimmed.")[0])
len(label)
print label
GOS
O2.UC-11.trimmed.fa.clippered.corr.comb O2.UC-12.trimmed.fa.clippered.corr.comb O2.UC-13.trimmed.fa.clippered.corr.comb O2.UC-14.trimmed.fa.clippered.corr.comb O2.UC-16.trimmed.fa.clippered.corr.comb O2.UC-17.trimmed.fa.clippered.corr.comb O2.UC-18.trimmed.fa.clippered.corr.comb O2.UC-19.trimmed.fa.clippered.corr.comb O2.UC-1.trimmed.fa.clippered.corr.comb O2.UC-20.trimmed.fa.clippered.corr.comb O2.UC-21.trimmed.fa.clippered.corr.comb O2.UC-22.trimmed.fa.clippered.corr.comb O2.UC-23.trimmed.fa.clippered.corr.comb O2.UC-24.trimmed.fa.clippered.corr.comb O2.UC-4.trimmed.fa.clippered.corr.comb V1.CD-11.trimmed.fa.clippered.corr.comb V1.CD-12.trimmed.fa.clippered.corr.comb V1.CD-13.trimmed.fa.clippered.corr.comb V1.CD-14.trimmed.fa.clippered.corr.comb V1.CD-15.trimmed.fa.clippered.corr.comb V1.CD-1.trimmed.fa.clippered.corr.comb V1.CD-2.trimmed.fa.clippered.corr.comb V1.CD-3.trimmed.fa.clippered.corr.comb V1.CD-4.trimmed.fa.clippered.corr.comb V1.CD-6.trimmed.fa.clippered.corr.comb V1.CD-8.trimmed.fa.clippered.corr.comb V1.CD-9.trimmed.fa.clippered.corr.comb V1.UC-10.trimmed.fa.clippered.corr.comb V1.UC-13.trimmed.fa.clippered.corr.comb V1.UC-14.trimmed.fa.clippered.corr.comb V1.UC-15.trimmed.fa.clippered.corr.comb V1.UC-17.trimmed.fa.clippered.corr.comb V1.UC-18.trimmed.fa.clippered.corr.comb V1.UC-19.trimmed.fa.clippered.corr.comb V1.UC-21.trimmed.fa.clippered.corr.comb V1.UC-6.trimmed.fa.clippered.corr.comb V1.UC-7.trimmed.fa.clippered.corr.comb V1.UC-8.trimmed.fa.clippered.corr.comb V1.UC-9.trimmed.fa.clippered.corr.comb ['O2.UC-11', 'O2.UC-12', 'O2.UC-13', 'O2.UC-14', 'O2.UC-16', 'O2.UC-17', 'O2.UC-18', 'O2.UC-19', 'O2.UC-1', 'O2.UC-20', 'O2.UC-21', 'O2.UC-22', 'O2.UC-23', 'O2.UC-24', 'O2.UC-4', 'V1.CD-11', 'V1.CD-12', 'V1.CD-13', 'V1.CD-14', 'V1.CD-15', 'V1.CD-1', 'V1.CD-2', 'V1.CD-3', 'V1.CD-4', 'V1.CD-6', 'V1.CD-8', 'V1.CD-9', 'V1.UC-10', 'V1.UC-13', 'V1.UC-14', 'V1.UC-15', 'V1.UC-17', 'V1.UC-18', 'V1.UC-19', 'V1.UC-21', 'V1.UC-6', 'V1.UC-7', 'V1.UC-8', 'V1.UC-9']
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | O2.UC-11.trimmed.fa.clippered.corr.comb | 0.000000 | 0.984968 | 0.975543 | 0.991098 | 0.990754 | 0.990321 | 0.991366 | 0.971941 | 0.983234 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
1 | O2.UC-12.trimmed.fa.clippered.corr.comb | 0.984968 | 0.000000 | 0.981335 | 0.985827 | 0.977879 | 0.979988 | 0.988925 | 0.964062 | 0.972836 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
2 | O2.UC-13.trimmed.fa.clippered.corr.comb | 0.975543 | 0.981335 | 0.000000 | 0.986988 | 0.990266 | 0.985479 | 0.994523 | 0.979273 | 0.977122 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
3 | O2.UC-14.trimmed.fa.clippered.corr.comb | 0.991098 | 0.985827 | 0.986988 | 0.000000 | 0.977179 | 0.957241 | 0.992617 | 0.986381 | 0.966148 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
4 | O2.UC-16.trimmed.fa.clippered.corr.comb | 0.990754 | 0.977879 | 0.990266 | 0.977179 | 0.000000 | 0.969994 | 0.994952 | 0.986598 | 0.957323 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
5 | O2.UC-17.trimmed.fa.clippered.corr.comb | 0.990321 | 0.979988 | 0.985479 | 0.957241 | 0.969994 | 0.000000 | 0.986233 | 0.985815 | 0.940492 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
6 | O2.UC-18.trimmed.fa.clippered.corr.comb | 0.991366 | 0.988925 | 0.994523 | 0.992617 | 0.994952 | 0.986233 | 0.000000 | 0.989288 | 0.988595 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
7 | O2.UC-19.trimmed.fa.clippered.corr.comb | 0.971941 | 0.964062 | 0.979273 | 0.986381 | 0.986598 | 0.985815 | 0.989288 | 0.000000 | 0.965407 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
8 | O2.UC-1.trimmed.fa.clippered.corr.comb | 0.983234 | 0.972836 | 0.977122 | 0.966148 | 0.957323 | 0.940492 | 0.988595 | 0.965407 | 0.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
9 | O2.UC-20.trimmed.fa.clippered.corr.comb | 0.969764 | 0.976001 | 0.979541 | 0.985309 | 0.992730 | 0.981898 | 0.987390 | 0.964807 | 0.981508 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
10 | O2.UC-21.trimmed.fa.clippered.corr.comb | 0.997622 | 0.998699 | 0.996403 | 0.997704 | 0.998753 | 0.997758 | 0.998396 | 0.998250 | 0.998785 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
11 | O2.UC-22.trimmed.fa.clippered.corr.comb | 0.981130 | 0.980406 | 0.981299 | 0.978463 | 0.992812 | 0.977620 | 0.978923 | 0.974594 | 0.986353 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
12 | O2.UC-23.trimmed.fa.clippered.corr.comb | 0.984447 | 0.972965 | 0.980964 | 0.988268 | 0.994213 | 0.988335 | 0.986989 | 0.968038 | 0.980521 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
13 | O2.UC-24.trimmed.fa.clippered.corr.comb | 0.982351 | 0.972010 | 0.981508 | 0.957615 | 0.962040 | 0.919765 | 0.992487 | 0.976860 | 0.940634 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
14 | O2.UC-4.trimmed.fa.clippered.corr.comb | 0.988579 | 0.994717 | 0.985271 | 0.990194 | 0.995350 | 0.993924 | 0.995078 | 0.987368 | 0.992522 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
15 | V1.CD-11.trimmed.fa.clippered.corr.comb | 0.989466 | 0.994130 | 0.987616 | 0.989969 | 0.997060 | 0.986563 | 0.986087 | 0.991634 | 0.995343 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
16 | V1.CD-12.trimmed.fa.clippered.corr.comb | 0.995049 | 0.991574 | 0.994829 | 0.983964 | 0.997119 | 0.989323 | 0.956587 | 0.983880 | 0.992616 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
17 | V1.CD-13.trimmed.fa.clippered.corr.comb | 0.946449 | 0.947449 | 0.971230 | 0.981234 | 0.985573 | 0.985884 | 0.982174 | 0.960176 | 0.978126 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
18 | V1.CD-14.trimmed.fa.clippered.corr.comb | 0.980329 | 0.966711 | 0.974045 | 0.976920 | 0.992546 | 0.981639 | 0.988111 | 0.958290 | 0.969359 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
19 | V1.CD-15.trimmed.fa.clippered.corr.comb | 0.940929 | 0.979535 | 0.981849 | 0.983622 | 0.995719 | 0.977142 | 0.981263 | 0.969507 | 0.986944 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
20 | V1.CD-1.trimmed.fa.clippered.corr.comb | 0.993891 | 0.988433 | 0.988552 | 0.984780 | 0.996230 | 0.977126 | 0.986633 | 0.986580 | 0.994292 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
21 | V1.CD-2.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.966567 | 0.958694 | 0.972513 | 0.984443 | 0.990661 | 0.984929 | 0.979776 | 0.976809 | 0.968202 | 0.951865 |
22 | V1.CD-3.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.960166 | 0.969740 | 0.951027 | 0.968895 | 0.991520 | 0.992930 | 0.930383 | 0.982502 | 0.955651 | 0.960562 |
23 | V1.CD-4.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.977162 | 0.992468 | 0.977674 | 0.989636 | 0.992787 | 0.980246 | 0.970508 | 0.982373 | 0.987452 | 0.981684 |
24 | V1.CD-6.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.990094 | 0.992652 | 0.989768 | 0.989560 | 0.995195 | 0.989052 | 0.985580 | 0.991442 | 0.984396 | 0.991242 |
25 | V1.CD-8.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.976741 | 0.983809 | 0.983220 | 0.987564 | 0.988551 | 0.987281 | 0.979601 | 0.962464 | 0.972664 | 0.980977 |
26 | V1.CD-9.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.974807 | 0.974492 | 0.980131 | 0.988173 | 0.987686 | 0.969512 | 0.981129 | 0.980680 | 0.969399 | 0.971969 |
27 | V1.UC-10.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.963936 | 0.973027 | 0.953053 | 0.982558 | 0.989918 | 0.987053 | 0.964775 | 0.977011 | 0.971556 | 0.962640 |
28 | V1.UC-13.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.968598 | 0.971909 | 0.984319 | 0.976985 | 0.992932 | 0.987816 | 0.971534 | 0.986723 | 0.959647 | 0.973065 |
29 | V1.UC-14.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.000000 | 0.975791 | 0.967284 | 0.985336 | 0.990910 | 0.986948 | 0.977532 | 0.971271 | 0.955809 | 0.971796 |
30 | V1.UC-15.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.975791 | 0.000000 | 0.980196 | 0.976490 | 0.986267 | 0.987606 | 0.985863 | 0.984048 | 0.970740 | 0.956262 |
31 | V1.UC-17.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.967284 | 0.980196 | 0.000000 | 0.980102 | 0.986288 | 0.984708 | 0.967099 | 0.983253 | 0.981986 | 0.961695 |
32 | V1.UC-18.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.985336 | 0.976490 | 0.980102 | 0.000000 | 0.983417 | 0.987396 | 0.963790 | 0.984856 | 0.973331 | 0.985179 |
33 | V1.UC-19.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.990910 | 0.986267 | 0.986288 | 0.983417 | 0.000000 | 0.991847 | 0.987864 | 0.991273 | 0.986927 | 0.982452 |
34 | V1.UC-21.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.986948 | 0.987606 | 0.984708 | 0.987396 | 0.991847 | 0.000000 | 0.989299 | 0.985895 | 0.984929 | 0.985725 |
35 | V1.UC-6.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.977532 | 0.985863 | 0.967099 | 0.963790 | 0.987864 | 0.989299 | 0.000000 | 0.974708 | 0.964166 | 0.973366 |
36 | V1.UC-7.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.971271 | 0.984048 | 0.983253 | 0.984856 | 0.991273 | 0.985895 | 0.974708 | 0.000000 | 0.973391 | 0.974878 |
37 | V1.UC-8.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.955809 | 0.970740 | 0.981986 | 0.973331 | 0.986927 | 0.984929 | 0.964166 | 0.973391 | 0.000000 | 0.966591 |
38 | V1.UC-9.trimmed.fa.clippered.corr.comb | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.971796 | 0.956262 | 0.961695 | 0.985179 | 0.982452 | 0.985725 | 0.973366 | 0.974878 | 0.966591 | 0.000000 |
39 rows × 40 columns
GOS=GOS.ix[:,1:39]
GOS
1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | ... | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.000000 | 0.984968 | 0.975543 | 0.991098 | 0.990754 | 0.990321 | 0.991366 | 0.971941 | 0.983234 | 0.969764 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
1 | 0.984968 | 0.000000 | 0.981335 | 0.985827 | 0.977879 | 0.979988 | 0.988925 | 0.964062 | 0.972836 | 0.976001 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
2 | 0.975543 | 0.981335 | 0.000000 | 0.986988 | 0.990266 | 0.985479 | 0.994523 | 0.979273 | 0.977122 | 0.979541 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
3 | 0.991098 | 0.985827 | 0.986988 | 0.000000 | 0.977179 | 0.957241 | 0.992617 | 0.986381 | 0.966148 | 0.985309 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
4 | 0.990754 | 0.977879 | 0.990266 | 0.977179 | 0.000000 | 0.969994 | 0.994952 | 0.986598 | 0.957323 | 0.992730 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
5 | 0.990321 | 0.979988 | 0.985479 | 0.957241 | 0.969994 | 0.000000 | 0.986233 | 0.985815 | 0.940492 | 0.981898 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
6 | 0.991366 | 0.988925 | 0.994523 | 0.992617 | 0.994952 | 0.986233 | 0.000000 | 0.989288 | 0.988595 | 0.987390 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
7 | 0.971941 | 0.964062 | 0.979273 | 0.986381 | 0.986598 | 0.985815 | 0.989288 | 0.000000 | 0.965407 | 0.964807 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
8 | 0.983234 | 0.972836 | 0.977122 | 0.966148 | 0.957323 | 0.940492 | 0.988595 | 0.965407 | 0.000000 | 0.981508 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
9 | 0.969764 | 0.976001 | 0.979541 | 0.985309 | 0.992730 | 0.981898 | 0.987390 | 0.964807 | 0.981508 | 0.000000 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
10 | 0.997622 | 0.998699 | 0.996403 | 0.997704 | 0.998753 | 0.997758 | 0.998396 | 0.998250 | 0.998785 | 0.997055 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
11 | 0.981130 | 0.980406 | 0.981299 | 0.978463 | 0.992812 | 0.977620 | 0.978923 | 0.974594 | 0.986353 | 0.949049 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
12 | 0.984447 | 0.972965 | 0.980964 | 0.988268 | 0.994213 | 0.988335 | 0.986989 | 0.968038 | 0.980521 | 0.954648 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
13 | 0.982351 | 0.972010 | 0.981508 | 0.957615 | 0.962040 | 0.919765 | 0.992487 | 0.976860 | 0.940634 | 0.981117 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
14 | 0.988579 | 0.994717 | 0.985271 | 0.990194 | 0.995350 | 0.993924 | 0.995078 | 0.987368 | 0.992522 | 0.980808 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
15 | 0.989466 | 0.994130 | 0.987616 | 0.989969 | 0.997060 | 0.986563 | 0.986087 | 0.991634 | 0.995343 | 0.986433 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
16 | 0.995049 | 0.991574 | 0.994829 | 0.983964 | 0.997119 | 0.989323 | 0.956587 | 0.983880 | 0.992616 | 0.976638 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
17 | 0.946449 | 0.947449 | 0.971230 | 0.981234 | 0.985573 | 0.985884 | 0.982174 | 0.960176 | 0.978126 | 0.959651 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
18 | 0.980329 | 0.966711 | 0.974045 | 0.976920 | 0.992546 | 0.981639 | 0.988111 | 0.958290 | 0.969359 | 0.961448 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
19 | 0.940929 | 0.979535 | 0.981849 | 0.983622 | 0.995719 | 0.977142 | 0.981263 | 0.969507 | 0.986944 | 0.944355 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
20 | 0.993891 | 0.988433 | 0.988552 | 0.984780 | 0.996230 | 0.977126 | 0.986633 | 0.986580 | 0.994292 | 0.975247 | ... | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 |
21 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.966567 | 0.958694 | 0.972513 | 0.984443 | 0.990661 | 0.984929 | 0.979776 | 0.976809 | 0.968202 | 0.951865 |
22 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.960166 | 0.969740 | 0.951027 | 0.968895 | 0.991520 | 0.992930 | 0.930383 | 0.982502 | 0.955651 | 0.960562 |
23 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.977162 | 0.992468 | 0.977674 | 0.989636 | 0.992787 | 0.980246 | 0.970508 | 0.982373 | 0.987452 | 0.981684 |
24 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.990094 | 0.992652 | 0.989768 | 0.989560 | 0.995195 | 0.989052 | 0.985580 | 0.991442 | 0.984396 | 0.991242 |
25 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.976741 | 0.983809 | 0.983220 | 0.987564 | 0.988551 | 0.987281 | 0.979601 | 0.962464 | 0.972664 | 0.980977 |
26 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.974807 | 0.974492 | 0.980131 | 0.988173 | 0.987686 | 0.969512 | 0.981129 | 0.980680 | 0.969399 | 0.971969 |
27 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.963936 | 0.973027 | 0.953053 | 0.982558 | 0.989918 | 0.987053 | 0.964775 | 0.977011 | 0.971556 | 0.962640 |
28 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.968598 | 0.971909 | 0.984319 | 0.976985 | 0.992932 | 0.987816 | 0.971534 | 0.986723 | 0.959647 | 0.973065 |
29 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.000000 | 0.975791 | 0.967284 | 0.985336 | 0.990910 | 0.986948 | 0.977532 | 0.971271 | 0.955809 | 0.971796 |
30 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.975791 | 0.000000 | 0.980196 | 0.976490 | 0.986267 | 0.987606 | 0.985863 | 0.984048 | 0.970740 | 0.956262 |
31 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.967284 | 0.980196 | 0.000000 | 0.980102 | 0.986288 | 0.984708 | 0.967099 | 0.983253 | 0.981986 | 0.961695 |
32 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.985336 | 0.976490 | 0.980102 | 0.000000 | 0.983417 | 0.987396 | 0.963790 | 0.984856 | 0.973331 | 0.985179 |
33 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.990910 | 0.986267 | 0.986288 | 0.983417 | 0.000000 | 0.991847 | 0.987864 | 0.991273 | 0.986927 | 0.982452 |
34 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.986948 | 0.987606 | 0.984708 | 0.987396 | 0.991847 | 0.000000 | 0.989299 | 0.985895 | 0.984929 | 0.985725 |
35 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.977532 | 0.985863 | 0.967099 | 0.963790 | 0.987864 | 0.989299 | 0.000000 | 0.974708 | 0.964166 | 0.973366 |
36 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.971271 | 0.984048 | 0.983253 | 0.984856 | 0.991273 | 0.985895 | 0.974708 | 0.000000 | 0.973391 | 0.974878 |
37 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.955809 | 0.970740 | 0.981986 | 0.973331 | 0.986927 | 0.984929 | 0.964166 | 0.973391 | 0.000000 | 0.966591 |
38 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | ... | 0.971796 | 0.956262 | 0.961695 | 0.985179 | 0.982452 | 0.985725 | 0.973366 | 0.974878 | 0.966591 | 0.000000 |
39 rows × 39 columns
GOS.columns=label
GOS.index=label
print GOS
GOS.to_csv("../data/dm_Methit_new.txt",sep="\t")
O2.UC-11 O2.UC-12 O2.UC-13 O2.UC-14 O2.UC-16 O2.UC-17 \ O2.UC-11 0.000000 0.984968 0.975543 0.991098 0.990754 0.990321 O2.UC-12 0.984968 0.000000 0.981335 0.985827 0.977879 0.979988 O2.UC-13 0.975543 0.981335 0.000000 0.986988 0.990266 0.985479 O2.UC-14 0.991098 0.985827 0.986988 0.000000 0.977179 0.957241 O2.UC-16 0.990754 0.977879 0.990266 0.977179 0.000000 0.969994 O2.UC-17 0.990321 0.979988 0.985479 0.957241 0.969994 0.000000 O2.UC-18 0.991366 0.988925 0.994523 0.992617 0.994952 0.986233 O2.UC-19 0.971941 0.964062 0.979273 0.986381 0.986598 0.985815 O2.UC-1 0.983234 0.972836 0.977122 0.966148 0.957323 0.940492 O2.UC-20 0.969764 0.976001 0.979541 0.985309 0.992730 0.981898 O2.UC-21 0.997622 0.998699 0.996403 0.997704 0.998753 0.997758 O2.UC-22 0.981130 0.980406 0.981299 0.978463 0.992812 0.977620 O2.UC-23 0.984447 0.972965 0.980964 0.988268 0.994213 0.988335 O2.UC-24 0.982351 0.972010 0.981508 0.957615 0.962040 0.919765 O2.UC-4 0.988579 0.994717 0.985271 0.990194 0.995350 0.993924 V1.CD-11 0.989466 0.994130 0.987616 0.989969 0.997060 0.986563 V1.CD-12 0.995049 0.991574 0.994829 0.983964 0.997119 0.989323 V1.CD-13 0.946449 0.947449 0.971230 0.981234 0.985573 0.985884 V1.CD-14 0.980329 0.966711 0.974045 0.976920 0.992546 0.981639 V1.CD-15 0.940929 0.979535 0.981849 0.983622 0.995719 0.977142 V1.CD-1 0.993891 0.988433 0.988552 0.984780 0.996230 0.977126 V1.CD-2 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-3 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-4 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-6 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-8 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-9 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-10 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-13 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-14 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-15 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-17 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-18 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-19 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-21 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-6 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-7 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-8 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.UC-9 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-18 O2.UC-19 O2.UC-1 O2.UC-20 ... V1.UC-14 \ O2.UC-11 0.991366 0.971941 0.983234 0.969764 ... 1.000000 O2.UC-12 0.988925 0.964062 0.972836 0.976001 ... 1.000000 O2.UC-13 0.994523 0.979273 0.977122 0.979541 ... 1.000000 O2.UC-14 0.992617 0.986381 0.966148 0.985309 ... 1.000000 O2.UC-16 0.994952 0.986598 0.957323 0.992730 ... 1.000000 O2.UC-17 0.986233 0.985815 0.940492 0.981898 ... 1.000000 O2.UC-18 0.000000 0.989288 0.988595 0.987390 ... 1.000000 O2.UC-19 0.989288 0.000000 0.965407 0.964807 ... 1.000000 O2.UC-1 0.988595 0.965407 0.000000 0.981508 ... 1.000000 O2.UC-20 0.987390 0.964807 0.981508 0.000000 ... 1.000000 O2.UC-21 0.998396 0.998250 0.998785 0.997055 ... 1.000000 O2.UC-22 0.978923 0.974594 0.986353 0.949049 ... 1.000000 O2.UC-23 0.986989 0.968038 0.980521 0.954648 ... 1.000000 O2.UC-24 0.992487 0.976860 0.940634 0.981117 ... 1.000000 O2.UC-4 0.995078 0.987368 0.992522 0.980808 ... 1.000000 V1.CD-11 0.986087 0.991634 0.995343 0.986433 ... 1.000000 V1.CD-12 0.956587 0.983880 0.992616 0.976638 ... 1.000000 V1.CD-13 0.982174 0.960176 0.978126 0.959651 ... 1.000000 V1.CD-14 0.988111 0.958290 0.969359 0.961448 ... 1.000000 V1.CD-15 0.981263 0.969507 0.986944 0.944355 ... 1.000000 V1.CD-1 0.986633 0.986580 0.994292 0.975247 ... 1.000000 V1.CD-2 1.000000 1.000000 1.000000 1.000000 ... 0.966567 V1.CD-3 1.000000 1.000000 1.000000 1.000000 ... 0.960166 V1.CD-4 1.000000 1.000000 1.000000 1.000000 ... 0.977162 V1.CD-6 1.000000 1.000000 1.000000 1.000000 ... 0.990094 V1.CD-8 1.000000 1.000000 1.000000 1.000000 ... 0.976741 V1.CD-9 1.000000 1.000000 1.000000 1.000000 ... 0.974807 V1.UC-10 1.000000 1.000000 1.000000 1.000000 ... 0.963936 V1.UC-13 1.000000 1.000000 1.000000 1.000000 ... 0.968598 V1.UC-14 1.000000 1.000000 1.000000 1.000000 ... 0.000000 V1.UC-15 1.000000 1.000000 1.000000 1.000000 ... 0.975791 V1.UC-17 1.000000 1.000000 1.000000 1.000000 ... 0.967284 V1.UC-18 1.000000 1.000000 1.000000 1.000000 ... 0.985336 V1.UC-19 1.000000 1.000000 1.000000 1.000000 ... 0.990910 V1.UC-21 1.000000 1.000000 1.000000 1.000000 ... 0.986948 V1.UC-6 1.000000 1.000000 1.000000 1.000000 ... 0.977532 V1.UC-7 1.000000 1.000000 1.000000 1.000000 ... 0.971271 V1.UC-8 1.000000 1.000000 1.000000 1.000000 ... 0.955809 V1.UC-9 1.000000 1.000000 1.000000 1.000000 ... 0.971796 V1.UC-15 V1.UC-17 V1.UC-18 V1.UC-19 V1.UC-21 V1.UC-6 \ O2.UC-11 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-12 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-13 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-14 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-16 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-17 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-18 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-19 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-20 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-21 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-22 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-23 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-24 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 O2.UC-4 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-11 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-12 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-13 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-14 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-15 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-1 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 V1.CD-2 0.958694 0.972513 0.984443 0.990661 0.984929 0.979776 V1.CD-3 0.969740 0.951027 0.968895 0.991520 0.992930 0.930383 V1.CD-4 0.992468 0.977674 0.989636 0.992787 0.980246 0.970508 V1.CD-6 0.992652 0.989768 0.989560 0.995195 0.989052 0.985580 V1.CD-8 0.983809 0.983220 0.987564 0.988551 0.987281 0.979601 V1.CD-9 0.974492 0.980131 0.988173 0.987686 0.969512 0.981129 V1.UC-10 0.973027 0.953053 0.982558 0.989918 0.987053 0.964775 V1.UC-13 0.971909 0.984319 0.976985 0.992932 0.987816 0.971534 V1.UC-14 0.975791 0.967284 0.985336 0.990910 0.986948 0.977532 V1.UC-15 0.000000 0.980196 0.976490 0.986267 0.987606 0.985863 V1.UC-17 0.980196 0.000000 0.980102 0.986288 0.984708 0.967099 V1.UC-18 0.976490 0.980102 0.000000 0.983417 0.987396 0.963790 V1.UC-19 0.986267 0.986288 0.983417 0.000000 0.991847 0.987864 V1.UC-21 0.987606 0.984708 0.987396 0.991847 0.000000 0.989299 V1.UC-6 0.985863 0.967099 0.963790 0.987864 0.989299 0.000000 V1.UC-7 0.984048 0.983253 0.984856 0.991273 0.985895 0.974708 V1.UC-8 0.970740 0.981986 0.973331 0.986927 0.984929 0.964166 V1.UC-9 0.956262 0.961695 0.985179 0.982452 0.985725 0.973366 V1.UC-7 V1.UC-8 V1.UC-9 O2.UC-11 1.000000 1.000000 1.000000 O2.UC-12 1.000000 1.000000 1.000000 O2.UC-13 1.000000 1.000000 1.000000 O2.UC-14 1.000000 1.000000 1.000000 O2.UC-16 1.000000 1.000000 1.000000 O2.UC-17 1.000000 1.000000 1.000000 O2.UC-18 1.000000 1.000000 1.000000 O2.UC-19 1.000000 1.000000 1.000000 O2.UC-1 1.000000 1.000000 1.000000 O2.UC-20 1.000000 1.000000 1.000000 O2.UC-21 1.000000 1.000000 1.000000 O2.UC-22 1.000000 1.000000 1.000000 O2.UC-23 1.000000 1.000000 1.000000 O2.UC-24 1.000000 1.000000 1.000000 O2.UC-4 1.000000 1.000000 1.000000 V1.CD-11 1.000000 1.000000 1.000000 V1.CD-12 1.000000 1.000000 1.000000 V1.CD-13 1.000000 1.000000 1.000000 V1.CD-14 1.000000 1.000000 1.000000 V1.CD-15 1.000000 1.000000 1.000000 V1.CD-1 1.000000 1.000000 1.000000 V1.CD-2 0.976809 0.968202 0.951865 V1.CD-3 0.982502 0.955651 0.960562 V1.CD-4 0.982373 0.987452 0.981684 V1.CD-6 0.991442 0.984396 0.991242 V1.CD-8 0.962464 0.972664 0.980977 V1.CD-9 0.980680 0.969399 0.971969 V1.UC-10 0.977011 0.971556 0.962640 V1.UC-13 0.986723 0.959647 0.973065 V1.UC-14 0.971271 0.955809 0.971796 V1.UC-15 0.984048 0.970740 0.956262 V1.UC-17 0.983253 0.981986 0.961695 V1.UC-18 0.984856 0.973331 0.985179 V1.UC-19 0.991273 0.986927 0.982452 V1.UC-21 0.985895 0.984929 0.985725 V1.UC-6 0.974708 0.964166 0.973366 V1.UC-7 0.000000 0.973391 0.974878 V1.UC-8 0.973391 0.000000 0.966591 V1.UC-9 0.974878 0.966591 0.000000 [39 rows x 39 columns]
f_label = open('../data/MH_label.txt','r')
s = f_label.readlines()
lines = s[0].split('\r')
labels = {}
count = 1
for line in lines:
line = line.rstrip()
#print count
if count%6 == 1:
sample = line
labels[sample]=line
else:
labels[sample] = labels[sample]+'>'+line
count = count+1
#print labels
label = GOS.columns
label
L = []
for l in label:
ll = l.split('_out')[0]
#print ll
L.append(labels[ll])
L
sample_md = {}
new_sample = {}
for each_L in L:
fields = each_L.split(">")
#label = fields[0]+'_out.list.freq_table'
label = fields[0]
disease = fields[0].split(".")[1].split('-')[0]
if fields[5] == 'N':
ibd = 'N'
else:
ibd = disease
sample_md[label] = {'gender':fields[2],'age':fields[3],'BMI':fields[4],'IBD':ibd}
new_sample[fields[0]] = {'gender':fields[2],'age':fields[3],'BMI':fields[4],'IBD':ibd}
sample_md = pd.DataFrame.from_dict(sample_md, orient='index')
print sample_md
new_sample = pd.DataFrame.from_dict(new_sample, orient='index')
new_sample
new_sample.to_csv("../data/metahit_map_original.txt",sep="\t")
gender age BMI IBD O2.UC-1 male 37 31.02 UC O2.UC-11 female 34 18.68 UC O2.UC-12 male 43 21.60 UC O2.UC-13 female 68 23.38 UC O2.UC-14 male 31 32.65 UC O2.UC-16 male 47 26.42 UC O2.UC-17 male 56 21.87 UC O2.UC-18 male 48 25.72 UC O2.UC-19 male 42 24.15 UC O2.UC-20 female 51 24.03 UC O2.UC-21 female 49 30.46 UC O2.UC-22 male 44 25.39 UC O2.UC-23 female 44 28.16 UC O2.UC-24 female 55 28.76 UC O2.UC-4 female 57 28.53 UC V1.CD-1 female 25 17.93 CD V1.CD-11 female 62 35.46 N V1.CD-12 female 41 20.20 CD V1.CD-13 male 68 25.69 N V1.CD-14 female 41 23.12 N V1.CD-15 female 34 19.00 CD V1.CD-2 male 49 27.76 N V1.CD-3 female 18 21.51 N V1.CD-4 female 46 29.69 N V1.CD-6 female 36 18.52 CD V1.CD-8 male 51 29.38 N V1.CD-9 female 48 27.55 N V1.UC-10 male 45 27.31 UC V1.UC-13 female 51 28.51 UC V1.UC-14 female 53 20.25 UC V1.UC-15 female 25 22.77 UC V1.UC-17 female 41 24.46 UC V1.UC-18 female 63 28.67 N V1.UC-19 female 37 21.19 N V1.UC-21 male 62 25.21 UC V1.UC-6 female 38 23.18 N V1.UC-7 female 19 23.05 N V1.UC-8 male 22 25.40 N V1.UC-9 male 32 30.37 N
GOS=1-GOS
figure(num=None, figsize=(12, 12))
R = dendrogram(linkage(GOS, method='average'),labels=L, leaf_font_size=13,leaf_rotation=90,orientation='right')
ylabel('points')
xlabel('Height')
#xlim(1,1.6)
suptitle('MetHit: average', fontweight='bold', fontsize=20)
savefig("../figure/metahit_cluster.png")
GOS2=1-GOS
figure(num=None, figsize=(12, 12))
R = dendrogram(linkage(GOS2, method='average'),labels=L, leaf_font_size=13,leaf_rotation=90,orientation='right')
ylabel('points')
xlabel('Height')
#xlim(1,1.6)
suptitle('MetHit: average', fontweight='bold', fontsize=20)
savefig("../figure/metahit_cluster.png")
from skbio.stats.distance import DistanceMatrix
dm = DistanceMatrix.from_file("../data/dm_Methit_new.txt")
from skbio.stats.ordination import PCoA
bc_pc = PCoA(dm).scores()
coord_matrix = bc_pc.site.T
#coord_matrix
pro = bc_pc.proportion_explained
pro
array([ 0.04860185, 0.03370058, 0.03014182, 0.02909193, 0.02830664, 0.02775359, 0.02769056, 0.02751342, 0.02719024, 0.02699702, 0.02677763, 0.02675273, 0.02647042, 0.02640769, 0.02595732, 0.02584498, 0.02579477, 0.02559587, 0.02549789, 0.02542969, 0.02540548, 0.02534444, 0.02512487, 0.02500185, 0.02497194, 0.02474767, 0.02444456, 0.02436035, 0.02425501, 0.02419137, 0.02399349, 0.02377779, 0.02366412, 0.02351445, 0.02286941, 0.02248857, 0.0224366 , 0.0218914 , 0. ])
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from pylab import scatter
#ord_plot = scatter(coord_matrix[0], coord_matrix[1], s=40)
color_map = {'N': 'b', 'UC': 'r','CD': 'g'}
print sample_md
print bc_pc.site_ids
colors = [color_map[sample_md['IBD'][id_]] for id_ in bc_pc.site_ids]
#colors
gender age BMI IBD O2.UC-1 male 37 31.02 UC O2.UC-11 female 34 18.68 UC O2.UC-12 male 43 21.60 UC O2.UC-13 female 68 23.38 UC O2.UC-14 male 31 32.65 UC O2.UC-16 male 47 26.42 UC O2.UC-17 male 56 21.87 UC O2.UC-18 male 48 25.72 UC O2.UC-19 male 42 24.15 UC O2.UC-20 female 51 24.03 UC O2.UC-21 female 49 30.46 UC O2.UC-22 male 44 25.39 UC O2.UC-23 female 44 28.16 UC O2.UC-24 female 55 28.76 UC O2.UC-4 female 57 28.53 UC V1.CD-1 female 25 17.93 CD V1.CD-11 female 62 35.46 N V1.CD-12 female 41 20.20 CD V1.CD-13 male 68 25.69 N V1.CD-14 female 41 23.12 N V1.CD-15 female 34 19.00 CD V1.CD-2 male 49 27.76 N V1.CD-3 female 18 21.51 N V1.CD-4 female 46 29.69 N V1.CD-6 female 36 18.52 CD V1.CD-8 male 51 29.38 N V1.CD-9 female 48 27.55 N V1.UC-10 male 45 27.31 UC V1.UC-13 female 51 28.51 UC V1.UC-14 female 53 20.25 UC V1.UC-15 female 25 22.77 UC V1.UC-17 female 41 24.46 UC V1.UC-18 female 63 28.67 N V1.UC-19 female 37 21.19 N V1.UC-21 male 62 25.21 UC V1.UC-6 female 38 23.18 N V1.UC-7 female 19 23.05 N V1.UC-8 male 22 25.40 N V1.UC-9 male 32 30.37 N ('O2.UC-11', 'O2.UC-12', 'O2.UC-13', 'O2.UC-14', 'O2.UC-16', 'O2.UC-17', 'O2.UC-18', 'O2.UC-19', 'O2.UC-1', 'O2.UC-20', 'O2.UC-21', 'O2.UC-22', 'O2.UC-23', 'O2.UC-24', 'O2.UC-4', 'V1.CD-11', 'V1.CD-12', 'V1.CD-13', 'V1.CD-14', 'V1.CD-15', 'V1.CD-1', 'V1.CD-2', 'V1.CD-3', 'V1.CD-4', 'V1.CD-6', 'V1.CD-8', 'V1.CD-9', 'V1.UC-10', 'V1.UC-13', 'V1.UC-14', 'V1.UC-15', 'V1.UC-17', 'V1.UC-18', 'V1.UC-19', 'V1.UC-21', 'V1.UC-6', 'V1.UC-7', 'V1.UC-8', 'V1.UC-9')
ord_plot = scatter(coord_matrix[0], coord_matrix[1], s=40, c=colors)
savefig("../figure/metahit_pca_2d.png")
def scatter_3d(ord_results, df, column, color_map, title='', axis1=0,
... axis2=1, axis3=2):
... coord_matrix = ord_results.site.T
... ids = ord_results.site_ids
... colors = [color_map[df[column][id_]] for id_ in ord_results.site_ids]
...
... fig = plt.figure()
... ax = fig.add_subplot(111, projection='3d')
...
... xs = coord_matrix[axis1]
... ys = coord_matrix[axis2]
... zs = coord_matrix[axis3]
... plot = ax.scatter(xs, ys, zs, c=colors,s=40)
...
... ax.set_xlabel('PC %d' % (axis1 + 1))
... ax.set_ylabel('PC %d' % (axis2 + 1))
... ax.set_zlabel('PC %d' % (axis3 + 1))
... ax.set_xticklabels([])
... ax.set_yticklabels([])
... ax.set_zticklabels([])
... ax.set_title(title)
... return fig
- RED: 21 ulcerative colitis
- GREEN: 4 Crohn’s disease
- BLUE: 14 healthy individuals
fig = scatter_3d(bc_pc, sample_md, 'IBD', {'N': 'b', 'UC': 'r','CD': 'g'},
... 'Samples colored by patient or not')
savefig("../figure/metahit_pca_3d.png")
Image(filename="MetHit.png")
from skbio.stats.distance import ANOSIM
anosim = ANOSIM(dm, sample_md, column='IBD')
results = anosim(999)
results.statistic
0.17299869406625745
results.p_value
0.0030000000000000001
R statistic: An R value near +1 means that there is dissimilarity between the groups, while an R value near 0 indicates no significant dissimilarity between the groups. So here R=0.11 is not that promising. I am not sure if this is significant enough.
P-value = 0.045 (using ANOSIM)
Compared to 0.031 shown in Methit paper as above(the method to get this p-value may be different,I will check it), it is not bad.
fig = scatter_3d(bc_pc, sample_md, 'gender', {'male': 'b', 'female': 'r'},
... 'Samples colored by gender')
anosim = ANOSIM(dm, sample_md, column='gender')
results = anosim(999)
results.statistic
-0.020049577136191276
results.p_value
0.65600000000000003
No significant difference on the gender.
python ~/Dropbox/Manuscript/2013-diversity/scripts/seperate_IGS_for_alpha.py GOS.freq GOS_MAP.txt
biom convert -i GOS.freq.IGS.alpha -o GOS.freq.IGS.alpha.biom --table-type="OTU table" biom summarize-table -i GOS.freq.IGS.alpha.biom -o GOS.freq.IGS.alpha.biom.summary.txt