import numpy as np import pylab as pb import GPy import Bio.Affy.CelFile as ss from xlrd import open_workbook import pickle %pylab inline num_genes=10000 from xlrd import open_workbook wb = open_workbook('ann1.xlsx') ann=np.repeat("ccccccccccccc",num_genes*2).flatten() ann=ann.reshape(num_genes,2) for ss in wb.sheets(): for row in range(1,num_genes+1): ann[row-1,0] = ss.cell(row,0).value ann[row-1,1] = ss.cell(row,1).value #print ann[row,1] #print ann[36,0], ann[36,1] def maxx(odds): if odds[0]<0.5 : if odds[3]<0.5: if odds[5]<0.5: return 4 else : return 3 elif odds[4]<0.5: return 4 else : return 2 elif odds[1]<0.5: if odds[5]<0.5: return 4 else : return 3 elif odds[2]<0.5: return 4 else : return 1 max1=np.zeros(num_genes).flatten() num_genes=1000 import pickle kk=np.repeat('cccccccccccccc',(num_genes+1)*25).reshape(num_genes+1,25) odds=np.zeros(6) kk[0,:]=['Prob_ID','Gene symbol', 'likelihood value C1','likelihood value C2','likelihood value C3','likelihood value C4','propab. C1','propab.C2','propab. C3' ,'propab. C4','ODDS C1/c2','ODDS C1/c3','ODDS C1/c4' ,'ODDS . C2/c3',' ODDS . C2/c4','ODDS . C3/c4', ' logit C1/c2',' logit C1/c3',' logit C1/c4' ,' logit C2/c3',' logit C2/c4',' logit C3/c4','best model','Gene No.','model 1 or 2' ] new=[] m=[0,1,2,3] for j in range(0,4): new.append(numpy.load("Case{}_models.npy".format(j+1))) norm_case=np.zeros(4*num_genes).reshape(4,num_genes) for i in range(0,num_genes): for j in range(0,4): f= open('Case{}_models{}/model_{}.pickle'.format(j+1,int (new[j][i,0]),int (new[j][i,1])),'r' ) m[j]=pickle.load(f).log_likelihood() m1=np.exp(m) prop=m1/np.sum(m1) l=0 for k in range(0,3): for j in range(k+1,4): odds[l]=(prop[k]/prop[j])/(1+(prop[k]/prop[j])) l=l+1 logit=np.log(odds) max1[i]=int(maxx(odds)) # find the best model by it's Odds value kk[i+1,:]=[ann[i,0],ann[i,1],m1[0],m1[1],m1[2],m1[3],prop[0],prop[1],prop[2],prop[3], odds[0],odds[1],odds[2],odds[3],odds[4],odds[5],logit[0],logit[1],logit[2],logit[3] ,logit[4],logit[5] ,int(max1[i]),i ,int(new[int(max1[i])-1][i,0])] for i in range(0,25): print kk[0,i], '\t:\t', kk[1,i] numpy.save("All_Prop_models.npy", kk) import csv with open('All_Prop_models.csv', 'wb') as csvfile: spamwriter = csv.writer(csvfile, delimiter=' ') for rowx in kk: spamwriter.writerow(rowx) kk=numpy.load("All_Prop_models.npy") s=np.repeat('ccccccccc',25) for i in range(1,500): for j in range(i+1,num_genes+1): if kk[i,int(kk[i,22])+5]