import pandas as pd
df = pd.DataFrame(data = [[2018, "a", "b", 500, 1500, 700],
[2018, "a", "c", 500, 400, 50],
[2018, "b", "a", 1500, 500, 700],
[2018, "b", "d", 1500, 750, 200],
[2018, "c", "d", 400, 750, 375]],
columns = ["year", "exporter", "importer", "importer_gdp", "exporter_gdp", "distance"])
df
year | exporter | importer | importer_gdp | exporter_gdp | distance | |
---|---|---|---|---|---|---|
0 | 2018 | a | b | 500 | 1500 | 700 |
1 | 2018 | a | c | 500 | 400 | 50 |
2 | 2018 | b | a | 1500 | 500 | 700 |
3 | 2018 | b | d | 1500 | 750 | 200 |
4 | 2018 | c | d | 400 | 750 | 375 |
df['check_string'] = df.apply(lambda row: ''.join(sorted([row['exporter'], row['importer']])), axis=1)
df
year | exporter | importer | importer_gdp | exporter_gdp | distance | check_string | |
---|---|---|---|---|---|---|---|
0 | 2018 | a | b | 500 | 1500 | 700 | ab |
1 | 2018 | a | c | 500 | 400 | 50 | ac |
2 | 2018 | b | a | 1500 | 500 | 700 | ab |
3 | 2018 | b | d | 1500 | 750 | 200 | bd |
4 | 2018 | c | d | 400 | 750 | 375 | cd |
df.drop_duplicates(subset="check_string", inplace=True)
df
year | exporter | importer | importer_gdp | exporter_gdp | distance | check_string | |
---|---|---|---|---|---|---|---|
0 | 2018 | a | b | 500 | 1500 | 700 | ab |
1 | 2018 | a | c | 500 | 400 | 50 | ac |
3 | 2018 | b | d | 1500 | 750 | 200 | bd |
4 | 2018 | c | d | 400 | 750 | 375 | cd |
export_group = df.groupby("exporter")
df2 = pd.DataFrame(
1/(export_group.importer_gdp.sum()/export_group.distance.sum())
).reset_index().rename(columns={0:"remoteness"})
df2
exporter | remoteness | |
---|---|---|
0 | a | 0.750000 |
1 | b | 0.133333 |
2 | c | 0.937500 |
df = df.merge(df2, how="left", on="exporter")
df
year | exporter | importer | importer_gdp | exporter_gdp | distance | check_string | remoteness | |
---|---|---|---|---|---|---|---|---|
0 | 2018 | a | b | 500 | 1500 | 700 | ab | 0.750000 |
1 | 2018 | a | c | 500 | 400 | 50 | ac | 0.750000 |
2 | 2018 | b | d | 1500 | 750 | 200 | bd | 0.133333 |
3 | 2018 | c | d | 400 | 750 | 375 | cd | 0.937500 |