import random
import string
import pandas as pd
import numpy as np
randStr = lambda str_type,length:"".join(np.random.choice(np.array(list(eval(str_type))),length))
randStr("string.ascii_uppercase",9)
'KXDIWQKMP'
def mkRandDf(length=None, width=None, str_kind=None, str_len=None):
"""
Parameters
----------
length : int
width : int
str_kind : str
Any of the types of strings in strings module e.g.
"acscii_lower", "ascii_upper", "digits", ect...
str_len : int
"""
str_kind = str_kind or "ascii_uppercase"
str_kind = "string."+ str_kind
str_len = width or 10
randStr = lambda str_type, length :"".join(np.random.choice(np.array(list(eval(str_type))),length))
column_names = [randStr(str_kind, width) for i in range(width)]
rand_df = pd.DataFrame(np.random.rand(length, width))
rand_df.columns = column_names
return rand_df
my_df = mkRandDf(5, 6, str_kind="ascii_lowercase")
my_df
iovrrx | nfinsu | mvdfjc | idjges | fubmrg | lvuhfv | |
---|---|---|---|---|---|---|
0 | 0.987654 | 0.206104 | 0.802920 | 0.011157 | 0.860618 | 0.575871 |
1 | 0.706397 | 0.860083 | 0.939230 | 0.436194 | 0.557081 | 0.706964 |
2 | 0.043139 | 0.729435 | 0.597488 | 0.700998 | 0.974193 | 0.917758 |
3 | 0.316080 | 0.461547 | 0.844540 | 0.510143 | 0.908475 | 0.877330 |
4 | 0.828839 | 0.177670 | 0.610833 | 0.328238 | 0.327697 | 0.689756 |
ix
to slice up like a numpy
array.¶my_df.ix[:,0]
0 0.046928 1 0.736357 2 0.020734 3 0.973336 4 0.827972 Name: aekbyu, dtype: float64
to_frame
to make a sliced series into a data frame of its own.¶my_df.ix[:,0].to_frame()
aekbyu | |
---|---|
0 | 0.046928 |
1 | 0.736357 |
2 | 0.020734 |
3 | 0.973336 |
4 | 0.827972 |
data_array = np.random.rand(3,3)
data = pd.DataFrame(data_array,columns = ["left","middle","right"])
data
left | middle | right | |
---|---|---|---|
0 | 0.912992 | 0.544838 | 0.489038 |
1 | 0.977553 | 0.265588 | 0.342345 |
2 | 0.266270 | 0.121333 | 0.871679 |
def superGroup(dataframe=None,new_level=None):
if type(dataframe.columns) == pd.indexes.base.Index:
out_df = pd.DataFrame(dataframe.values,index=dataframe.index,columns=pd.MultiIndex.from_product([[new_level],dataframe.columns]))
return out_df
if type(dataframe.columns) == pd.indexes.multi.MultiIndex:
levels = [list(i.values) for i in dataframe.columns.levels]
levels = [[new_level]]+levels
out_df = pd.DataFrame(dataframe.values, index = dataframe.index, columns = pd.MultiIndex.from_product(levels))
return out_df
hey = superGroup(data,"New")
works = superGroup(hey,"Newest")
superGroup(works,"Most New")
# hey.columns.labels
Most New | |||
---|---|---|---|
Newest | |||
New | |||
left | middle | right | |
0 | 0.394329 | 0.298836 | 0.683910 |
1 | 0.847183 | 0.630419 | 0.171109 |
2 | 0.747384 | 0.058720 | 0.129397 |
code review question
import re
import pandas as pd
import numpy as np
color = ["1xYellow ; 2xRed ",
"2xYellow ; 1xOrange ",
"3xYellow ; 2xGreen ",
"1xYellow ; 1xRed ",
"2xYellow ; 1xRed "]
numbers = np.random.rand(len(color))
ex_df = pd.DataFrame(np.array([numbers,color]).T,
columns = ["numbers","colors"])
rx = re.compile("x(\w+)\s")
just_colors = ex_df.colors.apply(rx.findall)
%timeit present_colors = set(sum(just_colors,[]))
10000 loops, best of 3: 25.3 µs per loop
# present_colors
present_colors = set()
for value in ex_df['colors'].values:
for color in [x.strip() for x in value.split(';')]:
present_colors.add(color.split('x')[-1])
present_colors = set(sum(([color.split('x')[-1] for color in value.split(';')]
for value in ex_df['colors'].values), []))
present_colors
{'Green ', 'Orange ', 'Red ', 'Yellow '}
print(my_df)
iovrrx nfinsu mvdfjc idjges fubmrg lvuhfv 0 0.987654 0.206104 0.802920 0.011157 0.860618 0.575871 1 0.706397 0.860083 0.939230 0.436194 0.557081 0.706964 2 0.043139 0.729435 0.597488 0.700998 0.974193 0.917758 3 0.316080 0.461547 0.844540 0.510143 0.908475 0.877330 4 0.828839 0.177670 0.610833 0.328238 0.327697 0.689756
section_A = my_df.ix[:,:3]
section_B = my_df.ix[:,3:]
# section_B
log_div_ave = my_df.apply(np.log2).values.T - my_df.apply(np.log2).mean(axis=1).values
log_div_ave = pd.DataFrame(log_div_ave.T,columns=my_df.columns)
print(log_div_ave)
iovrrx nfinsu mvdfjc idjges fubmrg lvuhfv 0 1.667378 -0.593258 1.368628 -4.800610 1.468744 0.889117 1 0.056992 0.340988 0.467991 -0.638518 -0.285601 0.058149 2 -3.467018 0.612699 0.324830 0.555330 1.030127 0.944032 3 -0.941776 -0.395590 0.476099 -0.251165 0.581380 0.531053 4 0.933714 -1.288174 0.493400 -0.402633 -0.405015 0.668708