from pandas import DataFrame
import sqlite3 as sql
conn = sql.connect("/ipython_notebooks/germ_database.db3")
cursor = conn.cursor()
cursor_execute_long = cursor.execute('''
SELECT
one.struct_id,
one.resNum,
score_types.score_type_name,
one.score_value,
rpi.pdb_residue_number,
rpi.chain_id
hc.pdbnum as "mutant_res",
hc.germ,
hc.wt
FROM
residue_scores_1b one
INNER JOIN score_types ON score_types.score_type_id = one.score_type_id
inner join residues r on r.resNum = one.resNum and one.struct_id = r.struct_id
inner join residue_pdb_identification rpi on rpi.residue_number = r.resNum and rpi.struct_id = r.struct_id
left outer join heavy_chain_mutation_shift hc on hc.chain_id = rpi.chain_id and hc.pdbnum = rpi.pdb_residue_number
where rpi.chain_id = "H"
limit 10000
''')
cursor_execute_short = cursor.execute('''
SELECT
one.struct_id,
one.resNum,
score_types.score_type_name,
one.score_value
-- rpi.pdb_residue_number,
-- rpi.chain_id
FROM
residue_scores_1b one
INNER JOIN score_types ON score_types.score_type_id = one.score_type_id
inner join residues r on r.resNum = one.resNum and one.struct_id = r.struct_id
inner join residue_pdb_identification rpi on rpi.residue_number = r.resNum and rpi.struct_id = r.struct_id
limit 10000
''')
df = DataFrame([i for i in cursor_execute_short])
df.columns = [i[0] for i in cursor.description]
print df.columns
print df.head(30)
Index([u'struct_id', u'resNum', u'score_type_name', u'score_value'], dtype='object') struct_id resNum score_type_name score_value 0 4294967297 1 omega 0.064840 1 4294967297 1 fa_dun 2.185618 2 4294967297 1 fa_dun_dev 0.000027 3 4294967297 1 fa_dun_semi 2.185591 4 4294967297 1 ref -1.191180 5 4294967297 2 rama -0.795161 6 4294967297 2 omega 0.222345 7 4294967297 2 fa_dun 1.378923 8 4294967297 2 fa_dun_dev 0.028560 9 4294967297 2 fa_dun_rot 1.350362 10 4294967297 2 p_aa_pp -0.442467 11 4294967297 2 ref 0.249477 12 4294967297 3 rama 0.267443 13 4294967297 3 omega 0.005106 14 4294967297 3 fa_dun 0.020352 15 4294967297 3 fa_dun_dev 0.025507 16 4294967297 3 fa_dun_rot -0.005156 17 4294967297 3 p_aa_pp -0.096847 18 4294967297 3 ref 0.979644 19 4294967297 4 rama -1.403292 20 4294967297 4 omega 0.212160 21 4294967297 4 fa_dun 4.218029 22 4294967297 4 fa_dun_dev 0.003712 23 4294967297 4 fa_dun_semi 4.214317 24 4294967297 4 p_aa_pp -0.462765 25 4294967297 4 ref -1.960940 26 4294967297 5 rama -0.600053 27 4294967297 5 omega 0.061867 28 4294967297 5 fa_dun 3.663050 29 4294967297 5 fa_dun_dev 0.004953
df.pivot(columns='score_type_name',values='score_value',index=['struct_id','resNum'])
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-149-1377278b33f3> in <module>() ----> 1 df.pivot(columns='score_type_name',values='score_value',index=['struct_id','resNum']) /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in pivot(self, index, columns, values) 3390 """ 3391 from pandas.core.reshape import pivot -> 3392 return pivot(self, index=index, columns=columns, values=values) 3393 3394 def stack(self, level=-1, dropna=True): /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/reshape.pyc in pivot(self, index, columns, values) 368 indexed = Series(self[values].values, 369 index=MultiIndex.from_arrays([self[index], --> 370 self[columns]])) 371 return indexed.unstack(columns) 372 /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in __init__(self, data, index, dtype, name, copy, fastpath) 211 raise_cast_failure=True) 212 --> 213 data = SingleBlockManager(data, index, fastpath=True) 214 215 generic.NDFrame.__init__(self, data, fastpath=True) /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in __init__(self, block, axis, do_integrity_check, fastpath) 3369 block = make_block(block, 3370 placement=slice(0, len(axis)), -> 3371 ndim=1, fastpath=True) 3372 3373 self.blocks = [block] /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in make_block(values, placement, klass, ndim, dtype, fastpath) 2097 2098 return klass(values, ndim=ndim, fastpath=fastpath, -> 2099 placement=placement) 2100 2101 /dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in __init__(self, values, placement, ndim, fastpath) 74 raise ValueError('Wrong number of items passed %d,' 75 ' placement implies %d' % ( ---> 76 len(self.values), len(self.mgr_locs))) 77 78 @property ValueError: Wrong number of items passed 10000, placement implies 2
pivoted = df.pivot_table(columns='score_type_name',values='score_value',index=['struct_id','resNum'])
score_type_name | fa_dun | fa_dun_dev | fa_dun_rot | fa_dun_semi | omega | p_aa_pp | rama | ref | yhh_planarity | |
---|---|---|---|---|---|---|---|---|---|---|
struct_id | resNum | |||||||||
4294967297 | 1 | 2.185618 | 0.000027 | NaN | 2.185591 | 0.064840 | NaN | NaN | -1.191180 | NaN |
2 | 1.378923 | 0.028560 | 1.350362 | NaN | 0.222345 | -0.442467 | -0.795161 | 0.249477 | NaN | |
3 | 0.020352 | 0.025507 | -0.005156 | NaN | 0.005106 | -0.096847 | 0.267443 | 0.979644 | NaN | |
4 | 4.218029 | 0.003712 | NaN | 4.214317 | 0.212160 | -0.462765 | -1.403292 | -1.960940 | NaN | |
5 | 3.663050 | 0.004953 | NaN | 3.658097 | 0.061867 | -0.275890 | -0.600053 | -1.517170 | NaN | |
6 | 1.517452 | 0.220623 | 1.296829 | NaN | 0.050372 | -0.375813 | -0.741903 | 0.249477 | NaN | |
7 | 3.698745 | 0.018365 | NaN | 3.680381 | 0.054732 | 0.401857 | -0.148868 | 0.388298 | NaN | |
8 | 0.104812 | 0.013364 | 0.091448 | NaN | 0.000280 | -0.796285 | -0.403267 | 0.201340 | NaN | |
9 | 2.362573 | 0.000532 | NaN | 2.362040 | 0.052061 | 0.195547 | -0.349949 | -1.630020 | NaN | |
10 | 1.844669 | 0.027023 | 1.817646 | NaN | 0.097104 | 0.167615 | -0.407074 | 1.080600 | NaN | |
11 | 0.127562 | 0.009247 | 0.118316 | NaN | 0.020841 | -0.402554 | -0.007522 | 1.080600 | NaN | |
12 | 0.292078 | 0.034249 | 0.257829 | NaN | 0.034202 | -0.242636 | -1.393017 | 0.165383 | NaN | |
13 | 0.648313 | 0.001905 | 0.646408 | NaN | 0.084531 | -0.356526 | -0.235599 | 0.761128 | NaN | |
14 | 2.596477 | 0.150729 | NaN | 2.445748 | 0.514282 | -0.136966 | -0.537316 | 1.234130 | NaN | |
15 | 2.644818 | 0.159300 | NaN | 2.485518 | 0.050967 | 0.150995 | -0.501137 | -1.630020 | NaN | |
16 | 3.561031 | 0.003633 | NaN | 3.557399 | 0.002421 | -0.361212 | -0.957757 | -1.517170 | NaN | |
17 | 0.326576 | 0.056354 | 0.270222 | NaN | 0.207658 | -0.477727 | -1.271940 | 0.165383 | NaN | |
18 | 1.028985 | 0.044087 | 0.984898 | NaN | 1.246412 | -0.189764 | 2.146958 | 0.761128 | NaN | |
19 | 1.423473 | 0.012313 | 1.411160 | NaN | 0.491884 | -0.123693 | -0.486693 | -0.358574 | NaN | |
20 | 0.038964 | 0.018734 | 0.020230 | NaN | 0.000119 | -0.358959 | 1.757088 | -0.250485 | NaN | |
21 | 0.394884 | 0.012919 | 0.381965 | NaN | 0.612963 | -0.260295 | 0.388244 | 0.443793 | NaN | |
22 | 0.037251 | 0.015745 | 0.021506 | NaN | 0.016037 | -0.238985 | -0.747880 | 0.979644 | NaN | |
23 | 2.303942 | 0.012627 | 2.291315 | NaN | 0.033129 | -0.087818 | -0.832301 | -0.358574 | NaN | |
24 | 0.383950 | 0.095711 | 0.288238 | NaN | 0.098923 | -0.011932 | 1.308046 | 0.761128 | NaN | |
25 | 0.130075 | 0.003686 | 0.126389 | NaN | 0.084236 | 0.129601 | -0.032645 | 0.201340 | NaN | |
26 | 0.255410 | 0.104805 | 0.150605 | NaN | 0.168313 | -0.430657 | -0.215267 | -0.250485 | NaN | |
27 | 0.448255 | 0.064404 | 0.383851 | NaN | 0.068534 | -0.176108 | 0.710819 | 0.761128 | NaN | |
28 | 0.482777 | 0.075748 | 0.407029 | NaN | 0.061439 | -0.507403 | 0.493257 | 0.443793 | NaN | |
29 | 0.583948 | 0.073615 | 0.510333 | NaN | 0.040075 | 0.158187 | 0.161241 | 0.979644 | NaN | |
30 | 0.055166 | 0.003266 | 0.051899 | NaN | 0.065008 | -0.025786 | -0.071459 | 0.201340 | NaN | |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4294967298 | 240 | 4.740174 | 0.449317 | NaN | 4.290857 | 0.344661 | -0.269404 | -0.268549 | 0.619370 | NaN |
241 | 3.978024 | 0.118481 | NaN | 3.859542 | 0.976365 | -0.407884 | -0.831550 | 0.619370 | NaN | |
242 | 3.452059 | 0.186282 | NaN | 3.265777 | 0.828436 | -0.508247 | 0.253198 | 0.162496 | 0.191642 | |
243 | 0.417471 | 0.208754 | 0.208717 | NaN | 0.012376 | -0.504630 | -0.517383 | 0.443793 | NaN | |
244 | 2.443289 | 0.019956 | NaN | 2.423333 | 0.309896 | -0.847634 | -0.596147 | -1.191180 | NaN | |
245 | 0.019082 | 0.008125 | 0.010956 | NaN | 0.013838 | 0.079334 | 0.284252 | 0.201340 | NaN | |
246 | 0.962801 | 0.007121 | 0.955679 | NaN | 0.000004 | 0.145112 | -0.666483 | 0.165383 | NaN | |
247 | NaN | NaN | NaN | NaN | 0.004413 | 0.540807 | -0.122814 | 0.173326 | NaN | |
248 | 0.227479 | 0.107047 | 0.120432 | NaN | 0.073280 | -0.333456 | 0.159415 | 0.761128 | NaN | |
249 | 2.470933 | 0.059415 | NaN | 2.411518 | 0.034304 | -1.534370 | 0.442499 | 0.619370 | NaN | |
250 | 2.301918 | 0.000482 | NaN | 2.301436 | 0.485663 | -1.949321 | 1.919370 | -1.191180 | NaN | |
251 | 0.636847 | 0.000030 | 0.636817 | NaN | 0.033697 | -0.780162 | -0.259157 | 0.165383 | NaN | |
252 | 0.052720 | 0.012307 | 0.040413 | NaN | 0.099188 | -0.111913 | -0.033058 | 0.201340 | NaN | |
253 | 3.427858 | 0.000125 | NaN | 3.427732 | 0.206410 | -0.182746 | -0.111733 | 1.234130 | NaN | |
254 | 0.138352 | 0.008968 | 0.129384 | NaN | 0.283981 | 0.638144 | 0.926719 | 1.080600 | NaN | |
255 | 0.271421 | 0.002089 | 0.269331 | NaN | 0.061324 | -0.459537 | -1.105778 | 0.165383 | NaN | |
256 | 2.644857 | 0.000002 | NaN | 2.644855 | 0.034885 | -1.218309 | 0.336546 | -1.191180 | NaN | |
257 | 2.441294 | 0.000089 | NaN | 2.441205 | 0.043497 | -1.430593 | 0.495951 | -1.191180 | NaN | |
258 | 2.722745 | 0.035252 | NaN | 2.687493 | 0.042097 | -1.603858 | 0.674321 | -1.630020 | NaN | |
259 | 0.430576 | 0.000168 | 0.430408 | NaN | 0.032542 | -0.470244 | -0.166085 | 0.165383 | NaN | |
260 | 0.313999 | 0.026600 | 0.287399 | NaN | 0.026820 | -1.261095 | -0.979981 | 1.080600 | NaN | |
261 | 0.091507 | 0.032129 | 0.059379 | NaN | 0.031812 | 0.191408 | -0.415607 | 0.201340 | NaN | |
262 | 1.355400 | 0.402955 | 0.952445 | NaN | 0.021373 | -0.315569 | 0.513188 | 0.761128 | NaN | |
263 | 0.128155 | 0.007633 | 0.120522 | NaN | 0.044180 | -1.523705 | -0.601065 | -0.250485 | NaN | |
264 | 0.209232 | 0.003300 | 0.205933 | NaN | 0.170198 | -0.507001 | -0.512832 | 0.443793 | NaN | |
265 | 3.156909 | 0.508314 | 2.648594 | NaN | 0.109325 | -0.298296 | -0.552645 | -0.324360 | NaN | |
266 | 1.328400 | 0.016535 | 1.311864 | NaN | 0.013423 | -0.552644 | -0.746187 | 1.080600 | NaN | |
267 | 3.462925 | 0.734232 | 2.728692 | NaN | 0.014817 | -0.020396 | -0.372945 | -0.358574 | NaN | |
268 | 3.180541 | 0.205535 | NaN | 2.975006 | 0.021037 | -0.276916 | 0.299735 | -1.517170 | NaN | |
269 | NaN | NaN | NaN | NaN | NaN | NaN | -0.691644 | NaN | NaN |
1505 rows × 9 columns