![]() |
![]() |
# import required packages
import pandas as pd
# read data
df = pd.read_csv('./data/gdp_china_renamed.csv')
# display names of the columns
df.columns
Index(['prov', 'gdpr', 'year', 'gdp', 'pop', 'finv', 'trade', 'fexpen', 'uinc'], dtype='object')
df_new = df.dropna()
df_new.isna().sum().sum()
0
idx_l = [3,4,5,22,23,24]
df.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963 |
4 | NaN |
5 | 9.194 |
22 | 7.458 |
23 | NaN |
24 | 7.588 |
df_num = df.drop(['prov','gdpr'],axis=1)
df_new = df_num.fillna(df_num.mean())
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963000 |
4 | 8.321032 |
5 | 9.194000 |
22 | 7.458000 |
23 | 8.321032 |
24 | 7.588000 |
# foward fill
df_new = df.fillna(method='ffill')
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963 |
4 | 8.963 |
5 | 9.194 |
22 | 7.458 |
23 | 7.458 |
24 | 7.588 |
# backward fill
df_new = df.fillna(method='bfill')
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963 |
4 | 9.194 |
5 | 9.194 |
22 | 7.458 |
23 | 7.588 |
24 | 7.588 |
# default is linear
df_new =df.interpolate() # method='linear'
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.9630 |
4 | 9.0785 |
5 | 9.1940 |
22 | 7.4580 |
23 | 7.5230 |
24 | 7.5880 |
# polynomial method
df_new =df.interpolate(method='polynomial',order=2)
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963000 |
4 | 9.054257 |
5 | 9.194000 |
22 | 7.458000 |
23 | 7.522919 |
24 | 7.588000 |
# cubicspline
df_new =df.interpolate(method='cubicspline',order=2)
df_new.loc[idx_l,['pop']]
pop | |
---|---|
3 | 8.963000 |
4 | 9.052298 |
5 | 9.194000 |
22 | 7.458000 |
23 | 7.532276 |
24 | 7.588000 |
df_new.to_csv('./data/gdp_china_mis_cl.csv',index=False)