import pandas as pd
df1 = pd.DataFrame({
"a":[40, 70, 10],
"b":[50, 80, 20],
"c":[60, 90, 30]},
index=[1, 2, 3])
print(df1)
a b c 1 40 50 60 2 70 80 90 3 10 20 30
df2 = pd.DataFrame([
[41, 51, 61],
[71, 81, 91],
[11, 21, 31]],
index=[1, 2, 3],
columns=['a', 'b', 'c'])
print(df2)
a b c 1 41 51 61 2 71 81 91 3 11 21 31
df3 = pd.concat( [ df1, df2 ] )
print(df3)
a b c 1 40 50 60 2 70 80 90 3 10 20 30 1 41 51 61 2 71 81 91 3 11 21 31
pd.concat([df1,df2], axis=1)
a | b | c | a | b | c | |
---|---|---|---|---|---|---|
1 | 40 | 50 | 60 | 41 | 51 | 61 |
2 | 70 | 80 | 90 | 71 | 81 | 91 |
3 | 10 | 20 | 30 | 11 | 21 | 31 |
df3 = df3.reset_index()
print(df3)
index a b c 0 1 40 50 60 1 2 70 80 90 2 3 10 20 30 3 1 41 51 61 4 2 71 81 91 5 3 11 21 31
df3 = df3.drop(columns=['index'])
print(df3)
a b c 0 40 50 60 1 70 80 90 2 10 20 30 3 41 51 61 4 71 81 91 5 11 21 31
df4 = pd.melt(df3)
print(df4)
variable value 0 a 40 1 a 70 2 a 10 3 a 41 4 a 71 5 a 11 6 b 50 7 b 80 8 b 20 9 b 51 10 b 81 11 b 21 12 c 60 13 c 90 14 c 30 15 c 61 16 c 91 17 c 31
df4.sort_values('value')
variable | value | |
---|---|---|
2 | a | 10 |
5 | a | 11 |
8 | b | 20 |
11 | b | 21 |
14 | c | 30 |
17 | c | 31 |
0 | a | 40 |
3 | a | 41 |
6 | b | 50 |
9 | b | 51 |
12 | c | 60 |
15 | c | 61 |
1 | a | 70 |
4 | a | 71 |
7 | b | 80 |
10 | b | 81 |
13 | c | 90 |
16 | c | 91 |
df4.sort_values('value', ascending=False)
variable | value | |
---|---|---|
16 | c | 91 |
13 | c | 90 |
10 | b | 81 |
7 | b | 80 |
4 | a | 71 |
1 | a | 70 |
15 | c | 61 |
12 | c | 60 |
9 | b | 51 |
6 | b | 50 |
3 | a | 41 |
0 | a | 40 |
17 | c | 31 |
14 | c | 30 |
11 | b | 21 |
8 | b | 20 |
5 | a | 11 |
2 | a | 10 |
df4.query('value>50')
variable | value | |
---|---|---|
1 | a | 70 |
4 | a | 71 |
7 | b | 80 |
9 | b | 51 |
10 | b | 81 |
12 | c | 60 |
13 | c | 90 |
15 | c | 61 |
16 | c | 91 |
df4.query('value>50').query('variable=="b"')
variable | value | |
---|---|---|
7 | b | 80 |
9 | b | 51 |
10 | b | 81 |
df4.rename(columns={'variable':'var', 'value':'val'})
var | val | |
---|---|---|
0 | a | 40 |
1 | a | 70 |
2 | a | 10 |
3 | a | 41 |
4 | a | 71 |
5 | a | 11 |
6 | b | 50 |
7 | b | 80 |
8 | b | 20 |
9 | b | 51 |
10 | b | 81 |
11 | b | 21 |
12 | c | 60 |
13 | c | 90 |
14 | c | 30 |
15 | c | 61 |
16 | c | 91 |
17 | c | 31 |
df4['variable'].drop_duplicates()
0 a 6 b 12 c Name: variable, dtype: object
df4.head(5)
variable | value | |
---|---|---|
0 | a | 40 |
1 | a | 70 |
2 | a | 10 |
3 | a | 41 |
4 | a | 71 |
df4.tail(5)
variable | value | |
---|---|---|
13 | c | 90 |
14 | c | 30 |
15 | c | 61 |
16 | c | 91 |
17 | c | 31 |
import pandas as pd
leftDF = pd.DataFrame({'key': ['A', 'B', 'C', 'D'], 'value': [1,2,3,4]})
rightDF = pd.DataFrame({'key': ['B', 'D', 'E', 'F'], 'value': [5,6,7,8]})
print(leftDF)
print(rightDF)
key value 0 A 1 1 B 2 2 C 3 3 D 4 key value 0 B 5 1 D 6 2 E 7 3 F 8
# leftDF.merge(rightDF, on='key')
# leftDF.merge(rightDF, on='key', how='inner')
pd.merge(leftDF, rightDF, on='key', how='inner')
key | value_x | value_y | |
---|---|---|---|
0 | B | 2 | 5 |
1 | D | 4 | 6 |
# leftDF.merge(rightDF, on='key', how='left')
pd.merge(leftDF, rightDF, on='key', how='left')
key | value_x | value_y | |
---|---|---|---|
0 | A | 1 | NaN |
1 | B | 2 | 5.0 |
2 | C | 3 | NaN |
3 | D | 4 | 6.0 |
# leftDF.merge(rightDF, on='key', how='right')
pd.merge(leftDF, rightDF, on='key', how='right')
key | value_x | value_y | |
---|---|---|---|
0 | B | 2.0 | 5 |
1 | D | 4.0 | 6 |
2 | E | NaN | 7 |
3 | F | NaN | 8 |
# leftDF.merge(rightDF, on='key', how='outer')
pd.merge(leftDF, rightDF, on='key', how='outer')
key | value_x | value_y | |
---|---|---|---|
0 | A | 1.0 | NaN |
1 | B | 2.0 | 5.0 |
2 | C | 3.0 | NaN |
3 | D | 4.0 | 6.0 |
4 | E | NaN | 7.0 |
5 | F | NaN | 8.0 |