Load DB

In [2]:
from pandas import DataFrame
In [5]:
import sqlite3 as sql
conn = sql.connect("/ipython_notebooks/germ_database.db3")
cursor = conn.cursor()

Query and convert to dataframe

In [75]:
cursor_execute_long = cursor.execute('''
SELECT
	one.struct_id,
    one.resNum,
    score_types.score_type_name,
    one.score_value,
    rpi.pdb_residue_number,
    rpi.chain_id
    hc.pdbnum as "mutant_res",
    hc.germ,
    hc.wt
FROM
	residue_scores_1b one
INNER JOIN score_types ON score_types.score_type_id = one.score_type_id
inner join residues r on r.resNum = one.resNum and one.struct_id = r.struct_id
inner join residue_pdb_identification rpi on rpi.residue_number = r.resNum and rpi.struct_id = r.struct_id
left outer join heavy_chain_mutation_shift hc on hc.chain_id = rpi.chain_id and hc.pdbnum = rpi.pdb_residue_number
where rpi.chain_id = "H"
limit 10000
''')
In [133]:
cursor_execute_short = cursor.execute('''
SELECT
	one.struct_id,
    one.resNum,
    score_types.score_type_name,
    one.score_value
    -- rpi.pdb_residue_number,
    -- rpi.chain_id
FROM
	residue_scores_1b one
INNER JOIN score_types ON score_types.score_type_id = one.score_type_id
inner join residues r on r.resNum = one.resNum and one.struct_id = r.struct_id
inner join residue_pdb_identification rpi on rpi.residue_number = r.resNum and rpi.struct_id = r.struct_id
limit 10000
''')
In [134]:
df = DataFrame([i for i in cursor_execute_short])
In [140]:
df.columns = [i[0] for i in cursor.description]
print df.columns
print df.head(30)
Index([u'struct_id', u'resNum', u'score_type_name', u'score_value'], dtype='object')
     struct_id  resNum score_type_name  score_value
0   4294967297       1           omega     0.064840
1   4294967297       1          fa_dun     2.185618
2   4294967297       1      fa_dun_dev     0.000027
3   4294967297       1     fa_dun_semi     2.185591
4   4294967297       1             ref    -1.191180
5   4294967297       2            rama    -0.795161
6   4294967297       2           omega     0.222345
7   4294967297       2          fa_dun     1.378923
8   4294967297       2      fa_dun_dev     0.028560
9   4294967297       2      fa_dun_rot     1.350362
10  4294967297       2         p_aa_pp    -0.442467
11  4294967297       2             ref     0.249477
12  4294967297       3            rama     0.267443
13  4294967297       3           omega     0.005106
14  4294967297       3          fa_dun     0.020352
15  4294967297       3      fa_dun_dev     0.025507
16  4294967297       3      fa_dun_rot    -0.005156
17  4294967297       3         p_aa_pp    -0.096847
18  4294967297       3             ref     0.979644
19  4294967297       4            rama    -1.403292
20  4294967297       4           omega     0.212160
21  4294967297       4          fa_dun     4.218029
22  4294967297       4      fa_dun_dev     0.003712
23  4294967297       4     fa_dun_semi     4.214317
24  4294967297       4         p_aa_pp    -0.462765
25  4294967297       4             ref    -1.960940
26  4294967297       5            rama    -0.600053
27  4294967297       5           omega     0.061867
28  4294967297       5          fa_dun     3.663050
29  4294967297       5      fa_dun_dev     0.004953

Pivot

Why doesn't pivot work here?

In [149]:
df.pivot(columns='score_type_name',values='score_value',index=['struct_id','resNum'])
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-149-1377278b33f3> in <module>()
----> 1 df.pivot(columns='score_type_name',values='score_value',index=['struct_id','resNum'])

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in pivot(self, index, columns, values)
   3390         """
   3391         from pandas.core.reshape import pivot
-> 3392         return pivot(self, index=index, columns=columns, values=values)
   3393 
   3394     def stack(self, level=-1, dropna=True):

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/reshape.pyc in pivot(self, index, columns, values)
    368         indexed = Series(self[values].values,
    369                          index=MultiIndex.from_arrays([self[index],
--> 370                                                        self[columns]]))
    371         return indexed.unstack(columns)
    372 

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in __init__(self, data, index, dtype, name, copy, fastpath)
    211                                        raise_cast_failure=True)
    212 
--> 213                 data = SingleBlockManager(data, index, fastpath=True)
    214 
    215         generic.NDFrame.__init__(self, data, fastpath=True)

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in __init__(self, block, axis, do_integrity_check, fastpath)
   3369             block = make_block(block,
   3370                                placement=slice(0, len(axis)),
-> 3371                                ndim=1, fastpath=True)
   3372 
   3373         self.blocks = [block]

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in make_block(values, placement, klass, ndim, dtype, fastpath)
   2097 
   2098     return klass(values, ndim=ndim, fastpath=fastpath,
-> 2099                  placement=placement)
   2100 
   2101 

/dnas/apps/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in __init__(self, values, placement, ndim, fastpath)
     74             raise ValueError('Wrong number of items passed %d,'
     75                              ' placement implies %d' % (
---> 76                                  len(self.values), len(self.mgr_locs)))
     77 
     78     @property

ValueError: Wrong number of items passed 10000, placement implies 2

Pivot Table

Does work, but not ideal

In [152]:
pivoted = df.pivot_table(columns='score_type_name',values='score_value',index=['struct_id','resNum'])
Out[152]:
score_type_name fa_dun fa_dun_dev fa_dun_rot fa_dun_semi omega p_aa_pp rama ref yhh_planarity
struct_id resNum
4294967297 1 2.185618 0.000027 NaN 2.185591 0.064840 NaN NaN -1.191180 NaN
2 1.378923 0.028560 1.350362 NaN 0.222345 -0.442467 -0.795161 0.249477 NaN
3 0.020352 0.025507 -0.005156 NaN 0.005106 -0.096847 0.267443 0.979644 NaN
4 4.218029 0.003712 NaN 4.214317 0.212160 -0.462765 -1.403292 -1.960940 NaN
5 3.663050 0.004953 NaN 3.658097 0.061867 -0.275890 -0.600053 -1.517170 NaN
6 1.517452 0.220623 1.296829 NaN 0.050372 -0.375813 -0.741903 0.249477 NaN
7 3.698745 0.018365 NaN 3.680381 0.054732 0.401857 -0.148868 0.388298 NaN
8 0.104812 0.013364 0.091448 NaN 0.000280 -0.796285 -0.403267 0.201340 NaN
9 2.362573 0.000532 NaN 2.362040 0.052061 0.195547 -0.349949 -1.630020 NaN
10 1.844669 0.027023 1.817646 NaN 0.097104 0.167615 -0.407074 1.080600 NaN
11 0.127562 0.009247 0.118316 NaN 0.020841 -0.402554 -0.007522 1.080600 NaN
12 0.292078 0.034249 0.257829 NaN 0.034202 -0.242636 -1.393017 0.165383 NaN
13 0.648313 0.001905 0.646408 NaN 0.084531 -0.356526 -0.235599 0.761128 NaN
14 2.596477 0.150729 NaN 2.445748 0.514282 -0.136966 -0.537316 1.234130 NaN
15 2.644818 0.159300 NaN 2.485518 0.050967 0.150995 -0.501137 -1.630020 NaN
16 3.561031 0.003633 NaN 3.557399 0.002421 -0.361212 -0.957757 -1.517170 NaN
17 0.326576 0.056354 0.270222 NaN 0.207658 -0.477727 -1.271940 0.165383 NaN
18 1.028985 0.044087 0.984898 NaN 1.246412 -0.189764 2.146958 0.761128 NaN
19 1.423473 0.012313 1.411160 NaN 0.491884 -0.123693 -0.486693 -0.358574 NaN
20 0.038964 0.018734 0.020230 NaN 0.000119 -0.358959 1.757088 -0.250485 NaN
21 0.394884 0.012919 0.381965 NaN 0.612963 -0.260295 0.388244 0.443793 NaN
22 0.037251 0.015745 0.021506 NaN 0.016037 -0.238985 -0.747880 0.979644 NaN
23 2.303942 0.012627 2.291315 NaN 0.033129 -0.087818 -0.832301 -0.358574 NaN
24 0.383950 0.095711 0.288238 NaN 0.098923 -0.011932 1.308046 0.761128 NaN
25 0.130075 0.003686 0.126389 NaN 0.084236 0.129601 -0.032645 0.201340 NaN
26 0.255410 0.104805 0.150605 NaN 0.168313 -0.430657 -0.215267 -0.250485 NaN
27 0.448255 0.064404 0.383851 NaN 0.068534 -0.176108 0.710819 0.761128 NaN
28 0.482777 0.075748 0.407029 NaN 0.061439 -0.507403 0.493257 0.443793 NaN
29 0.583948 0.073615 0.510333 NaN 0.040075 0.158187 0.161241 0.979644 NaN
30 0.055166 0.003266 0.051899 NaN 0.065008 -0.025786 -0.071459 0.201340 NaN
... ... ... ... ... ... ... ... ... ... ...
4294967298 240 4.740174 0.449317 NaN 4.290857 0.344661 -0.269404 -0.268549 0.619370 NaN
241 3.978024 0.118481 NaN 3.859542 0.976365 -0.407884 -0.831550 0.619370 NaN
242 3.452059 0.186282 NaN 3.265777 0.828436 -0.508247 0.253198 0.162496 0.191642
243 0.417471 0.208754 0.208717 NaN 0.012376 -0.504630 -0.517383 0.443793 NaN
244 2.443289 0.019956 NaN 2.423333 0.309896 -0.847634 -0.596147 -1.191180 NaN
245 0.019082 0.008125 0.010956 NaN 0.013838 0.079334 0.284252 0.201340 NaN
246 0.962801 0.007121 0.955679 NaN 0.000004 0.145112 -0.666483 0.165383 NaN
247 NaN NaN NaN NaN 0.004413 0.540807 -0.122814 0.173326 NaN
248 0.227479 0.107047 0.120432 NaN 0.073280 -0.333456 0.159415 0.761128 NaN
249 2.470933 0.059415 NaN 2.411518 0.034304 -1.534370 0.442499 0.619370 NaN
250 2.301918 0.000482 NaN 2.301436 0.485663 -1.949321 1.919370 -1.191180 NaN
251 0.636847 0.000030 0.636817 NaN 0.033697 -0.780162 -0.259157 0.165383 NaN
252 0.052720 0.012307 0.040413 NaN 0.099188 -0.111913 -0.033058 0.201340 NaN
253 3.427858 0.000125 NaN 3.427732 0.206410 -0.182746 -0.111733 1.234130 NaN
254 0.138352 0.008968 0.129384 NaN 0.283981 0.638144 0.926719 1.080600 NaN
255 0.271421 0.002089 0.269331 NaN 0.061324 -0.459537 -1.105778 0.165383 NaN
256 2.644857 0.000002 NaN 2.644855 0.034885 -1.218309 0.336546 -1.191180 NaN
257 2.441294 0.000089 NaN 2.441205 0.043497 -1.430593 0.495951 -1.191180 NaN
258 2.722745 0.035252 NaN 2.687493 0.042097 -1.603858 0.674321 -1.630020 NaN
259 0.430576 0.000168 0.430408 NaN 0.032542 -0.470244 -0.166085 0.165383 NaN
260 0.313999 0.026600 0.287399 NaN 0.026820 -1.261095 -0.979981 1.080600 NaN
261 0.091507 0.032129 0.059379 NaN 0.031812 0.191408 -0.415607 0.201340 NaN
262 1.355400 0.402955 0.952445 NaN 0.021373 -0.315569 0.513188 0.761128 NaN
263 0.128155 0.007633 0.120522 NaN 0.044180 -1.523705 -0.601065 -0.250485 NaN
264 0.209232 0.003300 0.205933 NaN 0.170198 -0.507001 -0.512832 0.443793 NaN
265 3.156909 0.508314 2.648594 NaN 0.109325 -0.298296 -0.552645 -0.324360 NaN
266 1.328400 0.016535 1.311864 NaN 0.013423 -0.552644 -0.746187 1.080600 NaN
267 3.462925 0.734232 2.728692 NaN 0.014817 -0.020396 -0.372945 -0.358574 NaN
268 3.180541 0.205535 NaN 2.975006 0.021037 -0.276916 0.299735 -1.517170 NaN
269 NaN NaN NaN NaN NaN NaN -0.691644 NaN NaN

1505 rows × 9 columns

In [ ]: