#!/usr/bin/env python # coding: utf-8 # Open In Colab # Source of data: FactSets (All ETFs with some cleaning) # In[182]: import numpy as np import pandas as pd from pylab import mpl, plt import statsmodels.formula.api as smf import statsmodels.api as sm plt.style.use('seaborn') mpl.rcParams['font.family'] = 'serif' get_ipython().run_line_magic('matplotlib', 'inline') # In[2]: import os os.getcwd() # In[120]: filename = 'https://raw.githubusercontent.com/cyrus723/my-first-binder/main/factsets_etf.csv' # In[121]: data = pd.read_csv(filename) # In[122]: data.info() # In[123]: data['scaled_aum'] = np.log(data['aum']) # In[124]: data['aum_million'] = data['aum'] / 1000000 # In[125]: data.head() # In[126]: data.tail() # ### Summary Statistics # In[127]: data.info() # In[128]: data.describe().round(2) # In[129]: data.aggregate([min, np.mean, np.std, np.median, max] ).round(2) # In[131]: data.info() # In[132]: data2 = data.groupby(by='class').mean() data2 # In[138]: data['class'] = data['class'].replace(['Asset Allocation','Fixed Income' ], ['Asset_Allocation', "Fixed_Income"]) # In[143]: data2 = data.groupby(by='class').mean() data2 # In[144]: data3 = data.rename({'class': 'asset_class'}, axis=1, inplace=False) # In[145]: data.info() # In[146]: data3.info() # In[149]: data4 = pd.get_dummies(data3, columns=['asset_class']) # In[150]: data4.info() # ## Regression Analysis # In[157]: data5 = data4[['exp', 'scaled_aum', 'beta', 'asset_class_Alternatives', 'asset_class_Commodities', 'asset_class_Currency', 'asset_class_Equity', 'asset_class_Fixed_Income']].dropna() # In[158]: data5.describe() # In[159]: data6 = data5.loc[data5['exp'] <= 3] # In[160]: data6.describe() # In[ ]: # In[161]: data7 = data6[['exp', 'scaled_aum']].dropna() # In[162]: pd.plotting.scatter_matrix(data7, alpha=0.2, diagonal='hist', hist_kwds={'bins': 35}, figsize=(10, 6)); # plt.savefig('../../images/ch08/fts_11.png'); # In[163]: data7 = data6[['exp', 'beta']].dropna() # In[164]: pd.plotting.scatter_matrix(data7, alpha=0.2, diagonal='hist', hist_kwds={'bins': 35}, figsize=(10, 6)); # plt.savefig('../../images/ch08/fts_11.png'); # In[ ]: # ### OLS Regression # In[165]: reg = np.polyfit(data6['exp'], data6['scaled_aum'], deg=1) # In[166]: ax = data6.plot(kind='scatter', x='scaled_aum', y='exp', figsize=(10, 6)) ax.plot(data6['scaled_aum'], np.polyval(reg, data6['scaled_aum']), 'r', lw=2); # plt.savefig('../../images/ch08/fts_12.png'); # In[183]: formula = 'exp ~ scaled_aum + beta + asset_class_Commodities+ asset_class_Currency+ asset_class_Equity +asset_class_Fixed_Income' results = smf.ols(formula, data6).fit() print(results.summary()) # ### Correlation # In[171]: data2.corr() # In[179]: X = data6[['scaled_aum', 'beta','asset_class_Commodities','asset_class_Currency','asset_class_Equity','asset_class_Fixed_Income']] y = data6['exp'] # In[184]: X_ols = sm.add_constant(X) model = sm.OLS(y, X_ols).fit() print(model.summary()) # In[ ]: # In[185]: beta = np.linalg.inv(X_ols.T.dot(X_ols)).dot(X_ols.T.dot(y)) pd.Series(beta, index=X_ols.columns)