import numpy as np
import seaborn as sns
import os
import pandas as pd
df = pd.DataFrame()
print (df)
Empty DataFrame Columns: [] Index: []
import pandas as pd
data = [1,2,3,4,5]
df = pd.DataFrame(data)
print (df)
0 0 1 1 2 2 3 3 4 4 5
import pandas as pd
data = [['Mouse',10],['KBD',12],['Monitor',13]]
df = pd.DataFrame(data,columns=['Item','Quantity'])
print (df)
Item Quantity 0 Mouse 10 1 KBD 12 2 Monitor 13
import pandas as pd
data = [['Mouse',10],['KBD',12],['Monitor',13]]
df = pd.DataFrame(data,columns=['Name','Quantity'],dtype=float)
print (df)
Name Quantity 0 Mouse 10.0 1 KBD 12.0 2 Monitor 13.0
import pandas as pd
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data)
print (df)
Name Age 0 Tom 28 1 Jack 34 2 Steve 29 3 Ricky 42
import pandas as pd
data = {'Name':['Tom', 'Jack', 'Steve', 'Ricky'],'Age':[28,34,29,42]}
df = pd.DataFrame(data, index=['rank1','rank2','rank3','rank4'])
print (df)
Name Age rank1 Tom 28 rank2 Jack 34 rank3 Steve 29 rank4 Ricky 42
print(os.listdir())
['.ipynb_checkpoints', 'box office for teaching.ipynb', 'cancer diagnosis for teaching.ipynb', 'data2.csv', 'Day 8', 'DPA - Day 1.ipynb', 'DPA-Class 2-13-09-2020.ipynb', 'gapminder.ipynb', 'temp.csv', 'test.csv', 'train.csv']
df=pd.read_csv("temp.csv")
df
A | B | |
---|---|---|
0 | 1 | 1.0 |
1 | NaN | NaN |
2 | 3 | 31.0 |
3 | 2 | 22.0 |
4 | 3 | 33.0 |
5 | 1 | 11.0 |
6 | 2 | 21.0 |
7 | NaN | 24.0 |
8 | 1 | 12.0 |
9 | na | 32.0 |
df.isnull()
A | B | |
---|---|---|
0 | False | False |
1 | True | True |
2 | False | False |
3 | False | False |
4 | False | False |
5 | False | False |
6 | False | False |
7 | True | False |
8 | False | False |
9 | False | False |
df.isnull().sum()
A 2 B 1 dtype: int64
missing_values=["N/a","na",np.nan]
df=pd.read_csv("temp.csv",na_values=missing_values)
df
A | B | |
---|---|---|
0 | 1.0 | 1.0 |
1 | NaN | NaN |
2 | 3.0 | 31.0 |
3 | 2.0 | 22.0 |
4 | 3.0 | 33.0 |
5 | 1.0 | 11.0 |
6 | 2.0 | 21.0 |
7 | NaN | 24.0 |
8 | 1.0 | 12.0 |
9 | NaN | 32.0 |
df.isnull()
A | B | |
---|---|---|
0 | False | False |
1 | True | True |
2 | False | False |
3 | False | False |
4 | False | False |
5 | False | False |
6 | False | False |
7 | True | False |
8 | False | False |
9 | True | False |
df.isnull().sum()
A 3 B 1 dtype: int64
df.isnull().any()
A True B True dtype: bool
sns.heatmap(df.isnull(),yticklabels=False)
<matplotlib.axes._subplots.AxesSubplot at 0x2086359aa00>
sns.heatmap(df.isnull(),yticklabels=False,annot=True)
<matplotlib.axes._subplots.AxesSubplot at 0x20863d5cc40>
df11 = pd.DataFrame(data={"A":[1,np.nan,1,2,3],
"B":[2,np.nan,4,np.nan,22]
})
df11
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | NaN | NaN |
2 | 1.0 | 4.0 |
3 | 2.0 | NaN |
4 | 3.0 | 22.0 |
df11.dropna()
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
2 | 1.0 | 4.0 |
4 | 3.0 | 22.0 |
df11.dropna(how="all")
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | NaN |
4 | 3.0 | 22.0 |
df11.fillna(0)
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | 0.0 | 0.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | 0.0 |
4 | 3.0 | 22.0 |
df11.fillna(method='ffill')
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | 1.0 | 2.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | 4.0 |
4 | 3.0 | 22.0 |
df11.fillna(method='bfill')
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | 1.0 | 4.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | 22.0 |
4 | 3.0 | 22.0 |
df11.interpolate()
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | 1.0 | 3.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | 13.0 |
4 | 3.0 | 22.0 |
df11
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | NaN | NaN |
2 | 1.0 | 4.0 |
3 | 2.0 | NaN |
4 | 3.0 | 22.0 |
df12=df11.interpolate()
df
A | B | |
---|---|---|
0 | 1.0 | 1.0 |
1 | NaN | NaN |
2 | 3.0 | 31.0 |
3 | 2.0 | 22.0 |
4 | 3.0 | 33.0 |
5 | 1.0 | 11.0 |
6 | 2.0 | 21.0 |
7 | NaN | 24.0 |
8 | 1.0 | 12.0 |
9 | NaN | 32.0 |
df12
A | B | |
---|---|---|
0 | 1.0 | 2.0 |
1 | 1.0 | 3.0 |
2 | 1.0 | 4.0 |
3 | 2.0 | 13.0 |
4 | 3.0 | 22.0 |
df.fillna({'A':99999})
A | B | |
---|---|---|
0 | 1.0 | 1.0 |
1 | 99999.0 | NaN |
2 | 3.0 | 31.0 |
3 | 2.0 | 22.0 |
4 | 3.0 | 33.0 |
5 | 1.0 | 11.0 |
6 | 2.0 | 21.0 |
7 | 99999.0 | 24.0 |
8 | 1.0 | 12.0 |
9 | 99999.0 | 32.0 |