#!/usr/bin/env python # coding: utf-8 # In[1]: from IPython.core.display import display, HTML display(HTML("")) # In[2]: import pandas as pd import pprint pd.options.display.float_format = '{:20,.4f}'.format pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', 1000) pd.set_option('display.colheader_justify', 'center') pd.set_option('display.precision', 3) # open file x_rates = pd.read_csv("euro-daily-hist_1999_2020.csv") # strip brackets and trailing space from country names # replace remaining space with underscore # lower case all column names x_rates.columns = x_rates.columns.str.replace("[","").str.replace("]","") x_rates.columns = x_rates.columns.str.rstrip() x_rates.columns = x_rates.columns.str.replace(" ","_") x_rates.columns = x_rates.columns.str.lower() # rename columns x_rates.rename(columns={"period\\unit:":"date", "chinese_yuan_renminbi":"chinese_yuan", "uk_pound_sterling":"uk_pound"}, inplace=True) # convert datetime x_rates["date"] = pd.to_datetime(x_rates["date"]) # resort and reindex x_rates.sort_values("date", inplace=True) x_rates.reset_index(drop=True, inplace=True) # convert hyphens in currency columns to NaN import numpy as np x_rates = x_rates.replace("-", np.nan) # convert exchange rate values to float x_rates.iloc[:,1:] = x_rates.iloc[:,1:].astype(float) display(x_rates.head(2)) display(x_rates.tail(2)) # In[3]: # miniaturize x_rates x_rates = x_rates.iloc[:,:6] # array with names of currencies currency_names = x_rates.columns.values currency_names = currency_names[1:] # create a list & dictionary of data frames for each currency with log rate of the exchange rate, 30 day rolling mean, and year col_names = [] # list with names of currencies df_list = [] # list of 40 dataframes df_dict = {} # dictionary with 40 currency:currency_dataframe pairs for currency in x_rates.columns[1:]: df_name = currency col_names.append(df_name) df = x_rates[["date", currency]].copy() df = df[df[currency].notna()] df["log_rate"] = np.log(df.iloc[:,1]/df.iloc[:,1].shift()) # getting the log of the exchange rate # double check this is the correct way to get log df["rolling_mean_30"] = df[currency].rolling(30).mean() df["year"] = df["date"].dt.year df_dict[currency] = df df_list.append(df) # In[4]: # function to create df with year and annual volatility for every row # i think i could use aggregation here but don't know how def volatizer(currency): annual_df_list = [currency[currency['year'] == y] for y in currency['year'].unique()] # list of annual dfs c_name = currency.columns[1] row_dict = {} for frame in annual_df_list: year_name = frame.iat[0,4] # first cell of the "year" column, becomes the "year" key for row_dict annual_volatility = frame["log_rate"].std()*253**.5 # volatility measured by standard deviation * 253 trading days per year raised to the 0.5 power row_dict[year_name] = annual_volatility df = pd.DataFrame.from_dict(row_dict, orient="index", columns=[c_name+"_annual_vol"]) # indexing on year, not sure if this is cool return df # apply volatizer to each currency df for key in df_dict: df_dict[key] = volatizer(df_dict[key]) display(df_dict["bulgarian_lev"])