#!/usr/bin/env python # coding: utf-8 # In[1]: from IPython.core.display import display, HTML display(HTML("")) # In[2]: import pandas as pd import pprint pd.options.display.float_format = '{:20,.4f}'.format pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', 1000) pd.set_option('display.colheader_justify', 'center') pd.set_option('display.precision', 3) # open file x_rates = pd.read_csv("euro-daily-hist_1999_2020.csv") # strip brackets and trailing space from country names # replace remaining space with underscore # lower case all column names x_rates.columns = x_rates.columns.str.replace("[","").str.replace("]","") x_rates.columns = x_rates.columns.str.rstrip() x_rates.columns = x_rates.columns.str.replace(" ","_") x_rates.columns = x_rates.columns.str.lower() # rename columns x_rates.rename(columns={"period\\unit:":"date", "chinese_yuan_renminbi":"chinese_yuan", "uk_pound_sterling":"uk_pound"}, inplace=True) # convert datetime x_rates["date"] = pd.to_datetime(x_rates["date"]) # resort and reindex x_rates.sort_values("date", inplace=True) x_rates.reset_index(drop=True, inplace=True) # convert hyphens in currency columns to NaN import numpy as np x_rates = x_rates.replace("-", np.nan) # convert exchange rate values to float x_rates.iloc[:,1:] = x_rates.iloc[:,1:].astype(float) mini = x_rates.iloc[:5000] display(mini.head(2)) display(mini.tail(2)) # In[3]: # create a list of data frame names # not sure this is neccessary but trying df = df_list.index(currency) in the framer function doesn't seem to work # create a list of data frames for each currency with log rate of the exchange rate, 30 day rolling mean, and year col_names = [] df_list = [] df_dict = {} for column in mini.columns[1:]: df_name = column col_names.append(df_name) df = mini[["date", column]].copy() df = df[df[column].notna()] df["log_rate"] = np.log(df.iloc[:,1]/df.iloc[:,1].shift()) # getting the log of the exchange rate # double check this is the correct way to get log df["rolling_mean_30"] = df[column].rolling(30).mean() df["year"] = df["date"].dt.year # df_dict[column] = df df_list.append(df) # In[4]: # functions to build annual volatility given string of currency name # function to assign dataframe to variable name # i could include this in volatizer, but for now keep separate because these are frames of all the original data def framer(currency): index = col_names.index(currency) df = df_list[index] # this is a dataframe containing a single currency and the columns built in cell 3 return df # function to create df with year and annual volatility for every row # i think i could use aggregation here but don't know how def volatizer(currency): all_the_years = [currency[currency['year'] == y] for y in currency['year'].unique()] # list of dataframes for each year c_name = currency.columns[1] df_dict = {} for frame in all_the_years: year_name = frame.iat[0,4] # the year for each df, becomes the "year" cell for annual volatility df annual_volatility = frame["log_rate"].std()*253**.5 # volatility measured by standard deviation * 253 trading days per year raised to the 0.5 power df_dict[year_name] = annual_volatility df = pd.DataFrame.from_dict(df_dict, orient="index", columns=[c_name+"_annual_vol"]) # indexing on year, not sure if this is cool return df brazilian_real = framer("brazilian_real") br_vol = volatizer(brazilian_real) # now i need to run this on all 40 df's and get a new df with all of them joined # then build an hbar chart with the eight from above # maybe two hbar charts for cell phones? # In[5]: display(br_vol) # In[10]: # use string of currency to get index position and assign it to variable name index = col_names.index("chinese_yuan") print(index) chinese_yuan = df_list[5] # function to create df for each year in currency df def split_years(df): return [df[df['year'] == y] for y in df['year'].unique()] all_the_years = split_years(chinese_yuan) # see if I can make split_years split and heads # # determine annual volatility for euro to currency # for frame in all_the_years: # year_name = frame.iat[0,4] # annual_volatility = frame["log_rate"].std()*253**.5 # volatility measured by standard deviation * 253 trading days per year # print("The Euro to Chinese yuan volatility for", year_name, "is" ,annual_volatility) # In[11]: display(all_the_years[0])