import numpy as np
import pandas as pd
import itertools
df = pd.DataFrame(np.random.randn(3, 4) * 10, columns=[f"column {i}" for i in range(4)])
df
column 0 | column 1 | column 2 | column 3 | |
---|---|---|---|---|
0 | -2.514103 | -11.355411 | -7.881233 | 2.462776 |
1 | -8.846382 | -3.083167 | -14.698746 | -2.668292 |
2 | -1.551953 | 0.827107 | -4.770237 | -17.009419 |
df.values # this is numpy array
array([[ -2.51410318, -11.35541057, -7.8812329 , 2.46277592], [ -8.84638153, -3.08316687, -14.69874602, -2.66829197], [ -1.55195347, 0.82710666, -4.77023707, -17.00941874]])
group_list = [3 * ["portfolio_1"], 3 * ["portfolio_2"], 3 * ["portfolio_3"]]
flat_group = list(itertools.chain(*group_list))
subgroup_list = [[f"strategy_{i}" for i in range(1, 4)] for _ in range(3)]
flat_subgroup = list(itertools.chain(*subgroup_list))
df = pd.DataFrame(np.random.randn(9, 4))
df.index = pd.MultiIndex.from_arrays(
[flat_group, flat_subgroup], names=("portfolio", "strategy")
)
df
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
portfolio | strategy | ||||
portfolio_1 | strategy_1 | -1.666313 | -1.535478 | 1.192467 | -0.221651 |
strategy_2 | -0.173095 | -0.213803 | 0.235330 | 0.861566 | |
strategy_3 | 0.374859 | 1.990981 | 0.107066 | -1.670760 | |
portfolio_2 | strategy_1 | -0.084183 | 0.288425 | 1.052083 | -0.146290 |
strategy_2 | 0.326947 | 0.027266 | 0.222703 | 0.308883 | |
strategy_3 | 1.563305 | 1.280715 | 0.685003 | 2.202861 | |
portfolio_3 | strategy_1 | 0.464870 | -1.077632 | 0.000620 | 1.331755 |
strategy_2 | -1.627832 | -0.361344 | 1.080016 | -0.599627 | |
strategy_3 | 0.348844 | 0.780083 | -0.679279 | -0.716045 |
df.loc[[("portfolio_2", "strategy_3")]] # select with a tuple
0 | 1 | 2 | 3 | ||
---|---|---|---|---|---|
portfolio | strategy | ||||
portfolio_2 | strategy_3 | -1.44679 | -1.263631 | 0.45624 | -1.349913 |
top_columns = [f"PM {i}" for i in range(1, 3) for _ in range(2)]
bottom_columns = 2 * ["Opportunistic", "Niches"]
df.columns = pd.MultiIndex.from_arrays([top_columns, bottom_columns])
df
PM 1 | PM 2 | ||||
---|---|---|---|---|---|
Opportunistic | Niches | Opportunistic | Niches | ||
portfolio | strategy | ||||
portfolio_1 | strategy_1 | -1.666313 | -1.535478 | 1.192467 | -0.221651 |
strategy_2 | -0.173095 | -0.213803 | 0.235330 | 0.861566 | |
strategy_3 | 0.374859 | 1.990981 | 0.107066 | -1.670760 | |
portfolio_2 | strategy_1 | -0.084183 | 0.288425 | 1.052083 | -0.146290 |
strategy_2 | 0.326947 | 0.027266 | 0.222703 | 0.308883 | |
strategy_3 | 1.563305 | 1.280715 | 0.685003 | 2.202861 | |
portfolio_3 | strategy_1 | 0.464870 | -1.077632 | 0.000620 | 1.331755 |
strategy_2 | -1.627832 | -0.361344 | 1.080016 | -0.599627 | |
strategy_3 | 0.348844 | 0.780083 | -0.679279 | -0.716045 |
# select PM 2 and Niche, return a series
df.loc[:, ("PM 2", "Niches")]
portfolio strategy portfolio_1 strategy_1 -0.221651 strategy_2 0.861566 strategy_3 -1.670760 portfolio_2 strategy_1 -0.146290 strategy_2 0.308883 strategy_3 2.202861 portfolio_3 strategy_1 1.331755 strategy_2 -0.599627 strategy_3 -0.716045 Name: (PM 2, Niches), dtype: float64
df.loc[:, ("PM 2", "Niches")].to_frame() # return a dataframe
PM 2 | ||
---|---|---|
Niches | ||
portfolio | strategy | |
portfolio_1 | strategy_1 | -0.221651 |
strategy_2 | 0.861566 | |
strategy_3 | -1.670760 | |
portfolio_2 | strategy_1 | -0.146290 |
strategy_2 | 0.308883 | |
strategy_3 | 2.202861 | |
portfolio_3 | strategy_1 | 1.331755 |
strategy_2 | -0.599627 | |
strategy_3 | -0.716045 |
# An example of reindexing
df = pd.DataFrame(
np.random.randn(3, 4),
columns=[f"column {i}" for i in range(4)],
index=pd.date_range("20240101", periods=3, freq="B"),
)
df
column 0 | column 1 | column 2 | column 3 | |
---|---|---|---|---|
2024-01-01 | 1.604649 | 0.923969 | -0.627469 | 0.625340 |
2024-01-02 | 0.587526 | 1.938590 | -1.275731 | 0.112374 |
2024-01-03 | -0.082560 | 0.964331 | 1.170139 | 0.474678 |
index_2 = pd.date_range("20231229", periods=8)
df = df.reindex(index_2)
df
column 0 | column 1 | column 2 | column 3 | |
---|---|---|---|---|
2023-12-29 | NaN | NaN | NaN | NaN |
2023-12-30 | NaN | NaN | NaN | NaN |
2023-12-31 | NaN | NaN | NaN | NaN |
2024-01-01 | 1.604649 | 0.923969 | -0.627469 | 0.625340 |
2024-01-02 | 0.587526 | 1.938590 | -1.275731 | 0.112374 |
2024-01-03 | -0.082560 | 0.964331 | 1.170139 | 0.474678 |
2024-01-04 | NaN | NaN | NaN | NaN |
2024-01-05 | NaN | NaN | NaN | NaN |
new_columns = [f"column {i}" for i in range(-2, 5)]
df = df.reindex(columns=new_columns)
df
column -2 | column -1 | column 0 | column 1 | column 2 | column 3 | column 4 | |
---|---|---|---|---|---|---|---|
2023-12-29 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2023-12-30 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2023-12-31 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2024-01-01 | NaN | NaN | 1.604649 | 0.923969 | -0.627469 | 0.625340 | NaN |
2024-01-02 | NaN | NaN | 0.587526 | 1.938590 | -1.275731 | 0.112374 | NaN |
2024-01-03 | NaN | NaN | -0.082560 | 0.964331 | 1.170139 | 0.474678 | NaN |
2024-01-04 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2024-01-05 | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
# demonstrate how to use .pivot() to create a dataframe
df = pd.DataFrame(
{
"A": ["apple", "apple", "pear", "banana"] * 3,
"B": ["one", "two", "one", "two"] * 3,
"C": ["small", "large", "large", "small"] * 3,
"D": np.random.randn(12),
"E": np.random.randn(12),
}
); df
A | B | C | D | E | |
---|---|---|---|---|---|
0 | apple | one | small | -0.596805 | 1.722010 |
1 | apple | two | large | 0.009945 | 0.832135 |
2 | pear | one | large | 0.387427 | -0.118612 |
3 | banana | two | small | -0.361440 | 0.431291 |
4 | apple | one | small | -3.021230 | 0.905683 |
5 | apple | two | large | -1.515054 | 2.379202 |
6 | pear | one | large | -0.536045 | -1.021149 |
7 | banana | two | small | 1.498246 | 0.444942 |
8 | apple | one | small | -1.286598 | 1.124035 |
9 | apple | two | large | -0.481828 | -0.273580 |
10 | pear | one | large | -0.967426 | -0.463726 |
11 | banana | two | small | -0.400804 | 0.282053 |
df.pivot_table(index="A", columns="B", values=["D"]) # default aggregation is mean
D | ||
---|---|---|
B | one | two |
A | ||
apple | -0.475195 | 0.837590 |
banana | NaN | -0.678793 |
pear | -0.693141 | NaN |
df.pivot_table(index="A", columns="B", values=["D"], aggfunc="sum") # sum aggregation
D | ||
---|---|---|
B | one | two |
A | ||
apple | -4.904634 | -1.986937 |
banana | NaN | 0.736002 |
pear | -1.116043 | NaN |
df_pivoted = df.pivot_table(index="A", columns="B", values=["D", "E"], aggfunc="max", fill_value=0); df_pivoted
D | E | |||
---|---|---|---|---|
B | one | two | one | two |
A | ||||
apple | -0.596805 | 0.009945 | 1.722010 | 2.379202 |
banana | 0.000000 | 1.498246 | 0.000000 | 0.444942 |
pear | 0.387427 | 0.000000 | -0.118612 | 0.000000 |
df_pivoted.columns
MultiIndex([('D', 'one'), ('D', 'two'), ('E', 'one'), ('E', 'two')], names=[None, 'B'])