!pip install plotly_express
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import plotly_express as px
import plotly as py
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns

Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: plotly_express in d:\ana\lib\site-packages (0.4.1)
Requirement already satisfied: numpy>=1.11 in c:\users\1020h\appdata\roaming\python\python39\site-packages (from plotly_express) (1.24.2)
Requirement already satisfied: pandas>=0.20.0 in d:\ana\lib\site-packages (from plotly_express) (1.4.4)
Requirement already satisfied: plotly>=4.1.0 in d:\ana\lib\site-packages (from plotly_express) (5.9.0)
Requirement already satisfied: patsy>=0.5 in d:\ana\lib\site-packages (from plotly_express) (0.5.2)
Requirement already satisfied: statsmodels>=0.9.0 in d:\ana\lib\site-packages (from plotly_express) (0.13.2)
Requirement already satisfied: scipy>=0.18 in d:\ana\lib\site-packages (from plotly_express) (1.9.1)
Requirement already satisfied: pytz>=2020.1 in d:\ana\lib\site-packages (from pandas>=0.20.0->plotly_express) (2022.1)
Requirement already satisfied: python-dateutil>=2.8.1 in d:\ana\lib\site-packages (from pandas>=0.20.0->plotly_express) (2.8.2)
Requirement already satisfied: six in d:\ana\lib\site-packages (from patsy>=0.5->plotly_express) (1.16.0)
Requirement already satisfied: tenacity>=6.2.0 in d:\ana\lib\site-packages (from plotly>=4.1.0->plotly_express) (8.0.1)
Requirement already satisfied: packaging>=21.3 in d:\ana\lib\site-packages (from statsmodels>=0.9.0->plotly_express) (21.3)
Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in d:\ana\lib\site-packages (from packaging>=21.3->statsmodels>=0.9.0->plotly_express) (3.0.9)


ESG_Disclosure_across_world = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Disclosure%20Guidance%20Database%20(1)%20(1).csv", skiprows = [291, 290])
Industry = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/GridExport_April_8_2023_21_38_44%20(1).csv", skiprows = [1])


# Combine descriptive information from the "STOCK EXCHANGE" and "ESG GUIDANCE" columns
ESG_Disclosure_across_world["Stock exchage & ESG guidance"] = ESG_Disclosure_across_world["STOCK EXCHANGE"] + ": " + ESG_Disclosure_across_world["ESG GUIDANCE"]
ESG_Disclosure_across_world


# Rename columns
Industry = Industry.rename(columns={"ESG Score\n(FY0)\n(Σ=Avg)": "ESG Score (FY0)"})
Industry = Industry.rename(columns={"ESG Period Last Update Date\n(FY0)": "ESG Score Latest Update Date (FY0)"})
Industry = Industry.rename(columns={"ESG Score\n(FY-1)\n(Σ=Avg)": "ESG Score (FY-1)"})
Industry = Industry.rename(columns={"Original Announcement Date Time\n(FY0)": "Latest Financial Statement Announcement Time (FY0)"})
Industry = Industry.rename(columns={"Ticker Symbol": "Ticker Symbol (Simplified)"})
Industry = Industry.rename(columns={"Identifier (RIC)": "Ticker Symbol"})
Industry = Industry.rename(columns={"TRBC Economic Sector Name": "Sector"})
Industry = Industry.rename(columns={"Market Cap\n(Σ=Avg)": "Market Cap"})
Industry = Industry.rename(columns={"Beta\n(Σ=Avg)": "Beta"})

# Get rid of the rating alphabet letters and only keep the numeral score
Industry['ESG Score (FY0)'] = Industry['ESG Score (FY0)'].str.extract('(\d+\.\d+)').astype(float)
Industry['ESG Score (FY-1)'] = Industry['ESG Score (FY-1)'].str.extract('(\d+\.\d+)').astype(float)

# Make the data in the 'Market Cap' column become numeric and get rid of the ","
Industry['Market Cap'] = pd.to_numeric(Industry['Market Cap'].str.replace(',', ''))

# Remove the given Yoy change with only two decimal and calculate it to get the accurate value
Industry['Yoy Change in ESG Score in FY0 (%)'] = ((Industry['ESG Score (FY0)']/Industry['ESG Score (FY-1)'])-1)*100
Industry = Industry.drop("YoY % Chg in\nESG Score\n(FY0)\n(Σ=Avg)", axis=1)

# Reorder columns
new_column_order = ["Company Name", "Sector", "Ticker Symbol", "Ticker Symbol (Simplified)", "ESG Score (FY0)", "Yoy Change in ESG Score in FY0 (%)","ESG Score (FY-1)","ESG Score Latest Update Date (FY0)", "Market Cap", "Beta"]
Industry = Industry[new_column_order]

# Drop the rows that contain NA in the ESG Score (FY0) - Latest ESG Score column
Industry = Industry.dropna(subset=['ESG Score (FY0)'])

# Insert the list of S&P 500 company
list_of_SP500 = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/constituents.csv")
list_of_SP500 = list_of_SP500.rename(columns={"Symbol": "Ticker Symbol (Simplified)"})
list_of_SP500

# Merge and only contain the data of S&P 500 company
merged_data_Industry = pd.merge(Industry, list_of_SP500, on='Ticker Symbol (Simplified)', how='left')
merged_data_Industry['Is S&P500'] = merged_data_Industry['Ticker Symbol (Simplified)'].isin(list_of_SP500['Ticker Symbol (Simplified)'])
Industry_SP500 = merged_data_Industry.loc[merged_data_Industry['Is S&P500'] == True]

# Reorder columns
Industry_SP500 = Industry_SP500[["Company Name", "Sector", "Ticker Symbol", "Ticker Symbol (Simplified)", "ESG Score (FY0)", "Yoy Change in ESG Score in FY0 (%)","ESG Score (FY-1)","ESG Score Latest Update Date (FY0)", "Market Cap", "Beta", "Is S&P500"]]
Industry_SP500


fig = px.scatter_geo(ESG_Disclosure_across_world, locations="iso_alpha", 
                     hover_name="Stock exchage & ESG guidance",
                     text = "Market", 
                     scope = "world",
                     animation_frame = "Year", 
                     projection="natural earth",
                     title='Progression of Establishing ESG Guidance across World')
fig.show()

da = ESG_Disclosure_across_world.query("Year == 2022")
fig = px.scatter_geo(da, locations="iso_alpha", 
                     hover_name="Stock exchage & ESG guidance",
                     color = "continent",
                     text = "Market", 
                     scope = "world",
                     animation_frame = "Year", 
                     projection="equirectangular",
                     title='Progression of Establishing ESG Guidance till 2022')
fig.show()


fig = px.treemap(Industry_SP500, path=[px.Constant("S&P 500"), 'Sector', 'Company Name'], values='Market Cap',
                  color='Market Cap', hover_data=['Ticker Symbol'],
                  color_continuous_scale='dense')
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), title = "Market Cap Treemap of S&P 500")
fig.show()


fig = px.treemap(Industry_SP500, path=[px.Constant("S&P 500"), 'Sector', 'Company Name'], values='ESG Score (FY0)',
                  color='ESG Score (FY0)', hover_data=['Ticker Symbol'],
                  color_continuous_scale='RdYlGn')
fig.update_layout(margin = dict(t=50, l=25, r=25, b=25), title = "ESG Score Treemap of S&P 500")
fig.show()


# Compute the average ESG Score by Sector
sector_scores_SP500only = Industry_SP500.groupby('Sector')['ESG Score (FY0)'].mean()

# Compute the average Market Cap by Sector
sector_market_cap_SP500only = Industry_SP500.groupby('Sector')['Market Cap'].mean()

# Combine into one Dataframe
combined_data = pd.concat([sector_scores_SP500only, sector_market_cap_SP500only], axis=1)
combined_data.columns = ['Average ESG Score across sectors (S&P 500 only)', "Total Market Cap  (S&P 500 only)"]
combined_data = combined_data.sort_values("Average ESG Score across sectors (S&P 500 only)", ascending=True)
combined_data

# Convert to a list in order to visualize it
sector= combined_data.index.tolist()
sc = combined_data['Average ESG Score across sectors (S&P 500 only)'].tolist()
cap= combined_data['Total Market Cap  (S&P 500 only)'].tolist()


# Create 2 Plot
fig = make_subplots(rows=1, cols=2, specs=[[{}, {}]], shared_xaxes=True,
                    shared_yaxes=False, vertical_spacing=0.001)

# Create Bar Plot on the left
fig.append_trace(go.Bar(
    x=sc,
    y=sector,
    marker=dict(
        color='rgba(50, 171, 96, 0.6)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1),
    ),
    name='Average ESG Score for Each Sectors',
    orientation='h',
), 1, 1)

# Create Scatter Plot on the right
fig.append_trace(go.Scatter(
    x=cap, y=sector,
    mode='lines+markers',
    line_color='rgb(116, 116, 212)',
    name='Market Cap for Each Sectors',
), 1, 2)

# Adjusting graph layout
fig.update_layout(
    title='Average ESG Score and Total Market Cap for Each Sector within S&P 500',
    yaxis=dict(
        showgrid=True,
        showline=False,
        showticklabels=True,
        domain=[0, 0.85],
    ),
    yaxis2=dict(
        showgrid=True,
        showline=True,
        showticklabels=False,
        linecolor='rgba(102, 102, 102, 0.8)',
        linewidth=2,
        domain=[0, 0.85],
    ),
    xaxis=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0, 0.42],
    ),
    xaxis2=dict(
        zeroline=False,
        showline=False,
        showticklabels=False,
        showgrid=True,
        domain=[0.47, 1],
        side='bottom',
        dtick=1000,
    ),
    legend=dict(x=0.029, y=1.038, font_size=10),
    margin=dict(l=100, r=20, t=70, b=70),
    paper_bgcolor='rgb(248, 248, 255)',
    plot_bgcolor='rgb(248, 248, 255)',
)

annotations = []

y_s = np.round(sc, decimals=2)
ccap = [float(s) / 1000000000 for s in cap]

y_nw = np.rint(ccap)

# Adding labels
for ydn, yd, xd in zip(y_nw, y_s, sector):
    annotations.append(dict(xref='x2', yref='y2',
                            y=xd, x=ydn,
                            text='{:,}'.format(ydn) + 'B',
                            font=dict(family='Arial', size=12,
                                      color='rgb(116, 116, 212)'),
                            showarrow=True))
    annotations.append(dict(xref='x1', yref='y1',
                            y=xd, x=yd + 10,
                            text=str(yd),
                            font=dict(family='Arial', size=12,
                                      color='rgb(50, 171, 96)'),
                            showarrow=False))
    
fig.update_layout(annotations=annotations)

fig.show()


fig = px.scatter(Industry_SP500, x="ESG Score (FY0)", y="Beta", facet_col_wrap=5, facet_col="Sector", color= "Sector", 
                 width = 1050, trendline= "ols")

fig.update_layout(title='Relationship between Beta and ESG Score across Sectors', showlegend=False)  
fig.show()


# Separate the dataset by sector
Energy = Industry_SP500.loc[Industry_SP500["Sector"] == "Energy"]
Basic_Materials = Industry_SP500.loc[Industry_SP500["Sector"] == "Basic Materials"]
Industrials = Industry_SP500.loc[Industry_SP500["Sector"] == "Industrials"]
Consumer_Cyclicals = Industry_SP500.loc[Industry_SP500["Sector"] == "Consumer Cyclicals"]
Consumer_Non_Cyclicals = Industry_SP500.loc[Industry_SP500["Sector"] == "Consumer Non-Cyclicals"]
Financials = Industry_SP500.loc[Industry_SP500["Sector"] == "Financials"]
Healthcare = Industry_SP500.loc[Industry_SP500["Sector"] == "Healthcare"]
Technology = Industry_SP500.loc[Industry_SP500["Sector"] == "Technology"]
Utilities = Industry_SP500.loc[Industry_SP500["Sector"] == "Utilities"]
Real_Estate = Industry_SP500.loc[Industry_SP500["Sector"] == "Real Estate"]

# Get the Beta and rename the columns for each industry
Corr_Energy = Energy[["Beta", "ESG Score (FY0)"]]
Corr_Energy = Corr_Energy.rename(columns={"ESG Score (FY0)": "ESG Score in Energy"})

Corr_Basic_Materials = Basic_Materials[["ESG Score (FY0)", "Beta"]]
Corr_Basic_Materials = Corr_Basic_Materials.rename(columns={"ESG Score (FY0)": "ESG Score in Basic Materials"})

Corr_Industrials = Industrials[["ESG Score (FY0)", "Beta"]]
Corr_Industrials = Corr_Industrials.rename(columns={"ESG Score (FY0)": "ESG Score in Industrials"})

Corr_Consumer_Cyclicals = Consumer_Cyclicals[["ESG Score (FY0)", "Beta"]]
Corr_Consumer_Cyclicals = Corr_Consumer_Cyclicals.rename(columns={"ESG Score (FY0)": "ESG Score in Consumer Cyclicals"})

Corr_Consumer_Non_Cyclicals = Consumer_Non_Cyclicals[["ESG Score (FY0)", "Beta"]]
Corr_Consumer_Non_Cyclicals = Corr_Consumer_Non_Cyclicals.rename(columns={"ESG Score (FY0)": "ESG Score in Consumer Non Cyclicals"})

Corr_Financials = Financials[["ESG Score (FY0)", "Beta"]]
Corr_Financials = Corr_Financials.rename(columns={"ESG Score (FY0)": "ESG Score in Financials"})

Corr_Healthcare = Healthcare[["ESG Score (FY0)", "Beta"]]
Corr_Healthcare = Corr_Healthcare.rename(columns={"ESG Score (FY0)": "ESG Score in Healthcare"})

Corr_Technology = Technology[["ESG Score (FY0)", "Beta"]]
Corr_Technology = Corr_Technology.rename(columns={"ESG Score (FY0)": "ESG Score in Technology"})

Corr_Utilities = Utilities[["ESG Score (FY0)", "Beta"]]
Corr_Utilities = Corr_Utilities.rename(columns={"ESG Score (FY0)": "ESG Score in Utilities"})

Corr_Real_Estate = Real_Estate[["ESG Score (FY0)", "Beta"]]
Corr_Real_Estate = Corr_Real_Estate.rename(columns={"ESG Score (FY0)": "ESG Score in Real_Estate"})

# Merge all the correlations in different sector
Corr = pd.merge(Corr_Energy, Corr_Basic_Materials, how="outer")
Corr = pd.merge(Corr, Corr_Industrials, how="outer")
Corr = pd.merge(Corr, Corr_Consumer_Cyclicals, how="outer")
Corr = pd.merge(Corr, Corr_Consumer_Non_Cyclicals, how="outer")
Corr = pd.merge(Corr, Corr_Financials, how="outer")
Corr = pd.merge(Corr, Corr_Healthcare, how="outer")
Corr = pd.merge(Corr, Corr_Technology, how="outer")
Corr = pd.merge(Corr, Corr_Utilities, how="outer")
Corr = pd.merge(Corr, Corr_Real_Estate, how="outer")

# Covert the Dataframe into correlation matrix
Corr = Corr.corr()
Corr

# Visualization
plt.figure(figsize=(16, 6))
sns.heatmap(Corr, vmin=-1, vmax=1, annot=True, cmap='BrBG',
            annot_kws={'fontsize':7, 'fontweight':'bold'},
           square=True)

plt.title('Correlation between ESG core and Beta across Sectors')

plt.show()


Financials = Industry_SP500.loc[Industry_SP500["Sector"] == "Financials"]
Financials = Financials.sort_values("ESG Score (FY0)", ascending=False)
Financials.head(3)


Financials = Industry_SP500.loc[Industry_SP500["Sector"] == "Financials"]
Financials = Financials.sort_values("ESG Score (FY0)", ascending=True)
Financials.head(3)


C = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/C%20(1).csv")
C['Date'] = pd.to_datetime(C['Date'], format='%Y-%m-%d')
C = C.set_index(C['Date'])
C = C[['Close']]
C = C.rename(columns={"Close": "Citigroup"})

GS = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/GS%20(1).csv")
GS['Date'] = pd.to_datetime(GS['Date'], format='%Y-%m-%d')
GS = GS.set_index(GS['Date'])
GS = GS[['Close']]
GS = GS.rename(columns={"Close": "Goldman Sachs Group"})

JPM = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/JPM%20(1).csv")
JPM['Date'] = pd.to_datetime(JPM['Date'], format='%Y-%m-%d')
JPM = JPM.set_index(JPM['Date'])
JPM = JPM[['Close']]
JPM = JPM.rename(columns={"Close": "JPMorgan Chase & Co"})

BRO = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/BRO%20(1).csv")
BRO['Date'] = pd.to_datetime(BRO['Date'], format='%Y-%m-%d')
BRO = BRO.set_index(BRO['Date'])
BRO = BRO[['Close']]
BRO = BRO.rename(columns={"Close": "Brown & Brown"})

L = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/L%20(1).csv")
L['Date'] = pd.to_datetime(L['Date'], format='%Y-%m-%d')
L = L.set_index(L['Date'])
L = L[['Close']]
L = L.rename(columns={"Close": "Loews Corp"})

FRC = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/FRC%20(1).csv")
FRC['Date'] = pd.to_datetime(FRC['Date'], format='%Y-%m-%d')
FRC = FRC.set_index(FRC['Date'])
FRC = FRC[['Close']]
FRC = FRC.rename(columns={"Close": "First Republic Bank"})

merged_df = pd.merge(C, GS, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, JPM, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, BRO, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, L, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, FRC, left_index=True, right_index=True)
high_score_df = merged_df[["Citigroup", "Goldman Sachs Group", "JPMorgan Chase & Co"]]
low_score_df = merged_df[["Brown & Brown", "Loews Corp", "First Republic Bank"]]


C_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20C.N%20(1).csv", skiprows = [0,1])
C_score = C_score.set_index('Unnamed: 0', inplace=False)
C_period_end_date = C_score.iloc[0].tolist()
C_esg_scores = C_score.iloc[7].tolist()

GS_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20GS.N%20(1).csv", skiprows = [0,1])
GS_score = GS_score.set_index('Unnamed: 0', inplace=False)
GS_period_end_date = GS_score.iloc[0].tolist()
GS_esg_scores = GS_score.iloc[7].tolist()

JPM_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20JPM.N%20(1).csv", skiprows = [0,1])
JPM_score = JPM_score.set_index('Unnamed: 0', inplace=False)
JPM_period_end_date = JPM_score.iloc[0].tolist()
JPM_esg_scores = JPM_score.iloc[7].tolist()

BRO_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20BRO.N%20(1).csv", skiprows = [0,1])
BRO_score = BRO_score.set_index('Unnamed: 0', inplace=False)
BRO_period_end_date = BRO_score.iloc[0].tolist()
BRO_esg_scores = BRO_score.iloc[7].tolist()

L_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20L.N%20(1).csv", skiprows = [0,1])
L_score = L_score.set_index('Unnamed: 0', inplace=False)
L_period_end_date = L_score.iloc[0].tolist()
L_esg_scores = L_score.iloc[7].tolist()

FRC_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20FRC.N%20(1).csv", skiprows = [0,1])
FRC_score = FRC_score.set_index('Unnamed: 0', inplace=False)
FRC_period_end_date = FRC_score.iloc[0].tolist()
FRC_esg_scores = FRC_score.iloc[7].tolist()


def color_score(x):
    if x == "D-":
        return "#FF0000"
    elif x == "D":
        return "#FF1100"
    elif x == "D+":
        return "#FF3300"
    elif x == "C-":
        return "#FF6600"
    elif x == "C":
        return "#FF9900"
    elif x == "C+":
        return "#FFCC00"        
    elif x == "B-":
        return "#FFFF00"
    elif x == "B":
        return "#CCFF00"
    elif x == "B+":
        return "#99FF00"
    elif x == "A-":
        return "#66FF00"
    elif x == "A":
        return "#33FF00"
    elif x == "A+":
        return "#00FF00"
    else:
        return "#77FF00"


fig = px.line(high_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"), 
              color_discrete_sequence=["#eb663b", "#511cfb", "#00a08b"], range_y=[0,450])
        
for i in range(len(C_period_end_date) - 1):
    esg_score = C_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=C_period_end_date[i], x1=C_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=C_period_end_date[0], x1=C_period_end_date[1], col=1,annotation_text="Beta = 1.56", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(GS_period_end_date) - 1):
    esg_score = GS_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=GS_period_end_date[i], x1=GS_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=GS_period_end_date[0], x1=GS_period_end_date[1], col=2,annotation_text="Beta = 1.40", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(JPM_period_end_date) - 1):
    esg_score = JPM_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=JPM_period_end_date[i], x1=JPM_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=JPM_period_end_date[0], x1=JPM_period_end_date[1], col=3,annotation_text="Beta = 1.08", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

fig.update_layout(title='Stock Price and ESG Rating of the Three Highest ESG Score Financial Companies in S&P500',
                  height=400,legend=dict(y=0.5, font_size=10)) 

fig.show()  

fig = px.line(low_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"),
             color_discrete_sequence=["#b2828d", "#6c7c32", "#778aae"], range_y=[0,450])
for i in range(len(BRO_period_end_date) - 1):
    esg_score = BRO_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=BRO_period_end_date[i], x1=BRO_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=BRO_period_end_date[0], x1=BRO_period_end_date[1], col=1,annotation_text="Beta = 0.74", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(L_period_end_date) - 1):
    esg_score = L_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=L_period_end_date[i], x1=L_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=L_period_end_date[0], x1=L_period_end_date[1], col=2,annotation_text="Beta = 0.83", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

    
for i in range(len(FRC_period_end_date) - 1):
    esg_score = FRC_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=FRC_period_end_date[i], x1=FRC_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=FRC_period_end_date[0], x1=FRC_period_end_date[1], col=3,annotation_text="Beta = 1.01", annotation_position="inside bottom right", opacity=0.15, line_width=0)    
    
fig.update_layout(title='Stock Price and ESG Rating of the Three Lowest ESG Score Financial Companies in S&P500', 
                  height=400, legend=dict(y=0.5, font_size=10))  
fig.show()


Real_Estate = Industry_SP500.loc[Industry_SP500["Sector"] == "Real Estate"]
Real_Estate = Real_Estate.sort_values("ESG Score (FY0)", ascending=False)
Real_Estate.head(3)


Real_Estate = Industry_SP500.loc[Industry_SP500["Sector"] == "Real Estate"]
Real_Estate = Real_Estate.sort_values("ESG Score (FY0)", ascending=True)
Real_Estate.head(3)


PEAK = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/PEAK%20(2).csv")
PEAK['Date'] = pd.to_datetime(PEAK['Date'], format='%Y-%m-%d')
PEAK = PEAK.set_index(PEAK['Date'])
PEAK = PEAK[['Close']]
PEAK = PEAK.rename(columns={"Close": "Healthpeak Properties"})

CBRE = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/CBRE%20(1).csv")
CBRE['Date'] = pd.to_datetime(CBRE['Date'], format='%Y-%m-%d')
CBRE = CBRE.set_index(CBRE['Date'])
CBRE = CBRE[['Close']]
CBRE = CBRE.rename(columns={"Close": "CBRE Group"})

O = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/O%20(1).csv")
O['Date'] = pd.to_datetime(O['Date'], format='%Y-%m-%d')
O = O.set_index(O['Date'])
O = O[['Close']]
O = O.rename(columns={"Close": "Realty Income Corp"})

EXR = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/EXR%20(1).csv")
EXR['Date'] = pd.to_datetime(EXR['Date'], format='%Y-%m-%d')
EXR = EXR.set_index(EXR['Date'])
EXR = EXR[['Close']]
EXR = EXR.rename(columns={"Close": "Extra Space Storage"})

INVH = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/INVH%20(2).csv")
INVH['Date'] = pd.to_datetime(INVH['Date'], format='%Y-%m-%d')
INVH = INVH.set_index(INVH['Date'])
INVH = INVH[['Close']]
INVH = INVH.rename(columns={"Close": "Invitation Homes"})

VICI = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/VICI%20(2).csv")
VICI['Date'] = pd.to_datetime(VICI['Date'], format='%Y-%m-%d')
VICI = VICI.set_index(VICI['Date'])
VICI = VICI[['Close']]
VICI = VICI.rename(columns={"Close": "VICI Properties"})

high_score_df = pd.merge(PEAK, CBRE, how="outer", left_on="Date", right_on = "Date")
high_score_df = pd.merge(high_score_df, O, how="outer", left_on="Date", right_on = "Date")

low_score_df = pd.merge(EXR, INVH, how="outer", left_on="Date", right_on = "Date")
low_score_df = pd.merge(low_score_df, VICI, how="outer", left_on="Date", right_on = "Date")


PEAK_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20PEAK.N%20(1).csv", skiprows = [0,1])
PEAK_score = PEAK_score.set_index('Unnamed: 0', inplace=False)
PEAK_period_end_date = PEAK_score.iloc[0].tolist()
PEAK_esg_scores = PEAK_score.iloc[7].tolist()

CBRE_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20CBRE.N%20(1).csv", skiprows = [0,1])
CBRE_score = CBRE_score.set_index('Unnamed: 0', inplace=False)
CBRE_period_end_date = CBRE_score.iloc[0].tolist()
CBRE_esg_scores = CBRE_score.iloc[7].tolist()

O_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20O.N%20(1).csv", skiprows = [0,1])
O_score = O_score.set_index('Unnamed: 0', inplace=False)
O_period_end_date = O_score.iloc[0].tolist()
O_esg_scores = O_score.iloc[7].tolist()

EXR_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20EXR.N%20(1).csv", skiprows = [0,1])
EXR_score = EXR_score.set_index('Unnamed: 0', inplace=False)
EXR_period_end_date = EXR_score.iloc[0].tolist()
EXR_esg_scores = EXR_score.iloc[7].tolist()

INVH_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20INVH.N%20(1).csv", skiprows = [0,1])
INVH_score = INVH_score.set_index('Unnamed: 0', inplace=False)
INVH_period_end_date = INVH_score.iloc[0].tolist()
INVH_esg_scores = INVH_score.iloc[7].tolist()

VICI_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20VICI.N%20(1).csv", skiprows = [0,1])
VICI_score = VICI_score.set_index('Unnamed: 0', inplace=False)
VICI_period_end_date = VICI_score.iloc[0].tolist()
VICI_esg_scores = VICI_score.iloc[7].tolist()


fig = px.line(high_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"), 
              color_discrete_sequence=["#2e91e5", "#e15f99", "#1ca71c"], range_y=[0,250])
        
for i in range(len(PEAK_period_end_date) - 1):
    esg_score = PEAK_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=PEAK_period_end_date[i], x1=PEAK_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=PEAK_period_end_date[0], x1=PEAK_period_end_date[1], col=1,annotation_text="Beta = 0.88", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(CBRE_period_end_date) - 1):
    esg_score = CBRE_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=CBRE_period_end_date[i], x1=CBRE_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=CBRE_period_end_date[0], x1=CBRE_period_end_date[1], col=2,annotation_text="Beta = 1.36", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(O_period_end_date) - 1):
    esg_score = O_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=O_period_end_date[i], x1=O_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=O_period_end_date[0], x1=O_period_end_date[1], col=3,annotation_text="Beta = 0.80", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

fig.update_layout(title='Stock Price and ESG Rating of the Three Highest ESG Score Real Estate Companies in S&P500',
                  height=400,legend=dict(y=0.5, font_size=10)) 

fig.show()  

fig = px.line(low_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"),
             color_discrete_sequence=["#fb0d0d", "#da16ff", "#750d86"], range_y=[0,250])
for i in range(len(EXR_period_end_date) - 1):
    esg_score = EXR_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=EXR_period_end_date[i], x1=EXR_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=EXR_period_end_date[0], x1=EXR_period_end_date[1], col=1,annotation_text="Beta = 0.56", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(INVH_period_end_date) - 1):
    esg_score = INVH_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=INVH_period_end_date[i], x1=INVH_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=INVH_period_end_date[0], x1=INVH_period_end_date[1], col=2,annotation_text="Beta = 0.90", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

    
for i in range(len(VICI_period_end_date) - 1):
    esg_score = VICI_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=VICI_period_end_date[i], x1=VICI_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=VICI_period_end_date[0], x1=VICI_period_end_date[1], col=3,annotation_text="Beta = 0.96", annotation_position="inside bottom right", opacity=0.15, line_width=0)    
    
fig.update_layout(title='Stock Price and ESG Rating of the Three Lowest ESG Score Real Estate Companies in S&P500', 
                 height=400, legend=dict(y=0.5, font_size=10))  
fig.show()


Healthcare_Sector = Industry_SP500.loc[Industry_SP500["Sector"] == "Healthcare"]
Healthcare_Sector_high = Healthcare_Sector.sort_values("ESG Score (FY0)", ascending=False)
Healthcare_Sector_high.head(3)


Healthcare_Sector = Industry_SP500.loc[Industry_SP500["Sector"] == "Healthcare"]
Healthcare_Sector_high = Healthcare_Sector.sort_values("ESG Score (FY0)", ascending=True)
Healthcare_Sector_high.head(3)


JNJ = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/JNJ%20(1).csv")
JNJ['Date'] = pd.to_datetime(JNJ['Date'], format='%Y-%m-%d')
JNJ = JNJ.set_index(JNJ['Date'])
JNJ = JNJ[['Close']]
JNJ = JNJ.rename(columns={"Close": "Johnson & Johnson"})

CVS = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/CVS%20(1).csv")
CVS['Date'] = pd.to_datetime(CVS['Date'], format='%Y-%m-%d')
CVS = CVS.set_index(CVS['Date'])
CVS = CVS[['Close']]
CVS = CVS.rename(columns={"Close": "CVS Health Corp"})

A = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/A%20(1).csv")
A['Date'] = pd.to_datetime(A['Date'], format='%Y-%m-%d')
A = A.set_index(A['Date'])
A = A[['Close']]
A = A.rename(columns={"Close": "Agilent Technologies"})

UHS = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/UHS%20(1).csv")
UHS['Date'] = pd.to_datetime(UHS['Date'], format='%Y-%m-%d')
UHS = UHS.set_index(UHS['Date'])
UHS = UHS[['Close']]
UHS = UHS.rename(columns={"Close": "Universal Health Services Inc"})

DXCM = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/DXCM%20(1).csv")
DXCM['Date'] = pd.to_datetime(DXCM['Date'], format='%Y-%m-%d')
DXCM = DXCM.set_index(DXCM['Date'])
DXCM = DXCM[['Close']]
DXCM = DXCM.rename(columns={"Close": "Dexcom Inc"})

BIO = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/BIO%20(1).csv")
BIO['Date'] = pd.to_datetime(BIO['Date'], format='%Y-%m-%d')
BIO = BIO.set_index(BIO['Date'])
BIO = BIO[['Close']]
BIO = BIO.rename(columns={"Close": "Bio Rad Laboratories Inc"})

merged_df = pd.merge(JNJ, CVS, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, A, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, UHS, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, DXCM, left_index=True, right_index=True)
merged_df = pd.merge(merged_df, BIO, left_index=True, right_index=True)

high_score_df = merged_df[["Johnson & Johnson", "CVS Health Corp", "Agilent Technologies"]]
low_score_df = merged_df[["Universal Health Services Inc", "Dexcom Inc", "Bio Rad Laboratories Inc"]]


JNJ_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20JNJ.N%20(1).csv", skiprows = [0,1])
JNJ_score = JNJ_score.set_index('Unnamed: 0', inplace=False)
JNJ_period_end_date = JNJ_score.iloc[0].tolist()
JNJ_esg_scores = JNJ_score.iloc[7].tolist()

CVS_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20CVS.N%20(1).csv", skiprows = [0,1])
CVS_score = CVS_score.set_index('Unnamed: 0', inplace=False)
CVS_period_end_date = CVS_score.iloc[0].tolist()
CVS_esg_scores = CVS_score.iloc[7].tolist()

A_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20A.N%20(1).csv", skiprows = [0,1])
A_score = A_score.set_index('Unnamed: 0', inplace=False)
A_period_end_date = A_score.iloc[0].tolist()
A_esg_scores = A_score.iloc[7].tolist()

UHS_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20UHS.N%20(1).csv", skiprows = [0,1])
UHS_score = UHS_score.set_index('Unnamed: 0', inplace=False)
UHS_period_end_date = UHS_score.iloc[0].tolist()
UHS_esg_scores = UHS_score.iloc[7].tolist()

DXCM_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20DXCM.OQ%20(1).csv", skiprows = [0,1])
DXCM_score = DXCM_score.set_index('Unnamed: 0', inplace=False)
DXCM_period_end_date = DXCM_score.iloc[0].tolist()
DXCM_esg_scores = DXCM_score.iloc[7].tolist()

BIO_score = pd.read_csv("https://raw.githubusercontent.com/KOM-1020/Harvey-ECON-323/main/ESG%20Table%20for%20BIO.N%20(1).csv", skiprows = [0,1])
BIO_score = BIO_score.set_index('Unnamed: 0', inplace=False)
BIO_period_end_date = BIO_score.iloc[0].tolist()
BIO_esg_scores = BIO_score.iloc[7].tolist()


fig = px.line(high_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"), 
              color_discrete_sequence=["#862a16", "#a777f1", "#620042"], range_y=[0,900])
        
for i in range(len(JNJ_period_end_date) - 1):
    esg_score = JNJ_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=JNJ_period_end_date[i], x1=JNJ_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=JNJ_period_end_date[0], x1=JNJ_period_end_date[1], col=1,annotation_text="Beta = 0.53", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(CVS_period_end_date) - 1):
    esg_score = CVS_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=CVS_period_end_date[i], x1=CVS_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=CVS_period_end_date[0], x1=CVS_period_end_date[1], col=2,annotation_text="Beta = 0.60", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(A_period_end_date) - 1):
    esg_score = A_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=A_period_end_date[i], x1=A_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=A_period_end_date[0], x1=A_period_end_date[1], col=3,annotation_text="Beta = 1.03", annotation_position="inside bottom right", opacity=0.15, line_width=0)    
 
fig.update_layout(title='Stock Price and ESG Rating of the Three Highest ESG Score Healthcare Companies in S&P500',
                  height=400,legend=dict(y=0.5, font_size=10)) 

fig.show()  

fig = px.line(low_score_df, facet_col="variable", facet_col_wrap=3, labels=dict(value="Stock Price", variable="Company"),
             color_discrete_sequence=["#1616a7", "#0d2a63", "#af0038"], range_y=[0,900])
for i in range(len(UHS_period_end_date) - 1):
    esg_score = UHS_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=UHS_period_end_date[i], x1=UHS_period_end_date[i+1], col=1,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=UHS_period_end_date[0], x1=UHS_period_end_date[1], col=1,annotation_text="Beta = 1.25", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

for i in range(len(DXCM_period_end_date) - 1):
    esg_score = DXCM_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=DXCM_period_end_date[i], x1=DXCM_period_end_date[i+1], col=2,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)

fig.add_vrect(x0=DXCM_period_end_date[0], x1=DXCM_period_end_date[1], col=2,annotation_text="Beta = 1.16", annotation_position="inside bottom right", opacity=0.15, line_width=0)    

    
for i in range(len(BIO_period_end_date) - 1):
    esg_score = BIO_esg_scores[i]
    fillcolor = color_score(esg_score)
    
    fig.add_vrect(x0=BIO_period_end_date[i], x1=BIO_period_end_date[i+1], col=3,
                  annotation_text=esg_score, annotation_position="top left",
                  fillcolor=fillcolor, opacity=0.15, line_width=1)
    
fig.add_vrect(x0=BIO_period_end_date[0], x1=BIO_period_end_date[1], col=3,annotation_text="Beta = 0.91", annotation_position="inside bottom right", opacity=0.15, line_width=0)    
    
fig.update_layout(title='Stock Price and ESG Rating of the Three Lowest ESG Score Healthcare Companies in S&P500', 
                  height=400, legend=dict(y=0.5, font_size=10))  
fig.show()


import pandas as pd
import numpy as np
pd.set_option('mode.chained_assignment', None)

data = pd.read_excel("./data.xlsx")


data.head(14)


data['Instrument'] = data['Instrument'].fillna(method='ffill')
data.head(3)


# last column is nonsense, should be removed
data = data.drop(columns=data.columns[-1])


# Reshape the DataFrame using melt
data_melted = data.melt(id_vars=['Instrument', 'columns'], var_name='Date', value_name='Value')

# Pivot the DataFrame
data_melted['Instrument'] = data_melted['Instrument'].fillna(method='ffill')


data_pivoted = data_melted.pivot_table(index=['Instrument', 'Date'], columns='columns', values='Value')

# Reset the index to have 'Instrument' and 'Timestamp' as columns again
data_pivoted.reset_index(inplace=True)

data_pivoted


#to match instrument token
data_pivoted['Instrument']=data_pivoted['Instrument'].replace(' UN Equity', '', regex=True)

data_pivoted.head()


industry = pd.read_csv('/Users/daltondu/Documents/UBC/323/project/data/industry.csv')


merged_df = data_pivoted.merge(industry, on='Instrument')

merged_df.head(3)


merged_df['Date'] = pd.to_datetime(merged_df['Date'])

# Format the 'Date' column as year-month
merged_df['Date'] = merged_df['Date'].dt.strftime('%Y-%m')

merged_df.head(1)


#see how many is missing
pd.isna(merged_df).sum()

Instrument                            0
Date                                  0
BESG Environmental Pillar Score    1227
BESG Governance Pillar Score        744
BESG Social Pillar Score           1227
Current Market Cap                   24
Last Price                           22
Net Income/Net Profit (Losses)        1
Price Earnings Ratio (P/E)           63
Price to Book Ratio                 199
Relative Share Price Momentum        34
Return on Assets                     14
Return on Common Equity             249
Revenue                               1
Total Debt to Total Equity          170
Industry                              0
dtype: int64


#fill up missing cells

fill_df = pd.DataFrame()
for instrument in merged_df.Instrument.unique():

    #to make sure only the number is filled up within same instrument: the function will not fill up values across different instruments
    s_df = merged_df[merged_df['Instrument'] == instrument]
    s_df = s_df.fillna(method = "ffill").fillna(method="bfill")
    fill_df = pd.concat([fill_df, s_df])

fill_df.head()


fill_df = fill_df.fillna(0)


fill_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 7717 entries, 0 to 7716
Data columns (total 16 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Instrument                       7717 non-null   object 
 1   Date                             7717 non-null   object 
 2   BESG Environmental Pillar Score  7717 non-null   float64
 3   BESG Governance Pillar Score     7717 non-null   float64
 4   BESG Social Pillar Score         7717 non-null   float64
 5   Current Market Cap               7717 non-null   float64
 6   Last Price                       7717 non-null   float64
 7   Net Income/Net Profit (Losses)   7717 non-null   float64
 8   Price Earnings Ratio (P/E)       7717 non-null   float64
 9   Price to Book Ratio              7717 non-null   float64
 10  Relative Share Price Momentum    7717 non-null   float64
 11  Return on Assets                 7717 non-null   float64
 12  Return on Common Equity          7717 non-null   float64
 13  Revenue                          7717 non-null   float64
 14  Total Debt to Total Equity       7717 non-null   float64
 15  Industry                         7717 non-null   object 
dtypes: float64(13), object(3)
memory usage: 1.0+ MB


#change last price to quarterly return

fill_df.sort_values(by='Date', inplace=True)

def calculate_quarterly_return(group):
    group['Quarterly Return'] = group['Last Price'].pct_change(periods=1)
    return group

# Group by 'Instrument' and apply the custom function to each group
fill_df = fill_df.groupby('Instrument').apply(calculate_quarterly_return)

fill_df.head()

/var/folders/xz/y00l9pw56txcfllknld9w1rr0000gn/T/ipykernel_81675/1960402510.py:10: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use 

	>>> .groupby(..., group_keys=True)
  fill_df = fill_df.groupby('Instrument').apply(calculate_quarterly_return)


#drop NAN ones as they are first quarter, also drop last price

fill_df = fill_df.drop(columns='Last Price')
fill_df = fill_df.dropna()


fill_df.head()


fill_df.to_csv('./final.csv')


import numpy as np
import pandas as pd

data = pd.read_csv('./final.csv')


data.head(3)


data = data.drop(data.columns[0], axis=1)
data.head(3)


#change the name of the columns.

data.rename(columns={'Instrument': 'Instrument', 'Date': 'Date', 'BESG Environmental Pillar Score' : 'E Score', 'BESG Social Pillar Score' : 'S Score', 'BESG Governance Pillar Score':'G Score', 'Net Income/Net Profit (Losses)' : 'NI', 
                          'Price Earnings Ratio (P/E)': 'PE', 'Price to Book Ratio': 'PB', 'Relative Share Price Momentum': 'Momentum', 'Return on Assets': 'ROA', 
                          'Return on Common Equity': 'ROE', 'Total Debt to Total Equity': 'Debt Ratio', 'Industry': 'Industry'}, inplace = True)
data.tail()


norm_df = pd.DataFrame()
norm_df['Date'] = data['Date']
norm_df['Instrument'] = data['Instrument']
norm_df['Quarterly Return'] = data['Quarterly Return']

def removeoutlier_standardizing_neutralizing(data, factor_column = factor_column, dummy_industry=dummy_industry, marketcap=marketcap, std=3):
    factor = ['E Score', 'S Score', 'G Score', 'NI', 'PE', 'PB', 'Momentum', 'ROA', 'ROE', 'Revenue', 'Debt Ratio', 'Industry']
    a = data[factor]
    a = a.dropna().copy()

    # Apply outlier removal to columns
    for col in factor_column:
        edge_up = a.groupby('Industry')[col].transform(lambda x: x.mean() + std * x.std())
        edge_low = a.groupby('Industry')[col].transform(lambda x: x.mean() - std * x.std())
        a.loc[a[col] > edge_up, col] = edge_up
        a.loc[a[col] < edge_low, col] = edge_low

    # Apply group-wise standardization using transform()
    a[factor_column] = a.groupby('Industry')[factor_column].transform(lambda x: (x - x.mean()) / x.std())

    def neutralizing(factor, dummy_industry, marketcap):

        '''
        This function helps to isolate the impact of individual factors on the returns of a portfolio, thereby reducing the impact of biases or unintended exposures to other factors. 
        It ensures that the model is focused on the factors of interest and not driven by hidden or unwanted exposures, and in this case, the unwanted exposure is the industry.
        '''
        y = factor

        LNmarketcap = marketcap.apply(lambda x:math.log(x))

        x= pd.concat([LNmarketcap, dummy_industry], axis=1)

        result = sm.OLS(y.astype(float), x.astype(float)).fit()
        return result.resid

    #neutralize
    a[factor_column] = neutralizing(a[factor_column], dummy_industry, marketcap)

    a = a.drop('Industry', axis=1)
    return a

norm_df = norm_df.merge(removeoutlier_standardizing_neutralizing(data), left_index=True, right_index=True)


norm_df


#find next quarter return
final = pd.DataFrame()
for instr in norm_df.Instrument.unique():
    s = norm_df[norm_df['Instrument'] == instr]
    s['Next Q Return'] = s['Quarterly Return'].shift(-1)
    final = pd.concat([final, s])

final.head()


final = final.reset_index(drop=True).dropna()


final


final.to_csv('./final.csv')


import seaborn as sns
import matplotlib.pyplot as plt
plt.rcParams['axes.unicode_minus'] = False

from sklearn.decomposition import PCA

pca_factor_name = ['NI', 'PE', 'PB', 'Momentum', 'ROA', 'ROE', 'Revenue', 'Debt Ratio']

com_df = pd.DataFrame()

date_list = list(final['Date'].unique())

# PCA analysis by using four quarters (1 year) past factors to predict this quarter's return
for quar in range(4, len(final['Date'].unique())):
    s_df = final[(final['Date'] >= date_list[quar - 4]) & (final['Date'] < date_list[quar])]

    pca = PCA(n_components=3)

    pca.fit(np.array(s_df[pca_factor_name].values))
    weights = pca.components_

    curr = final[final['Date'] == date_list[quar]]

    split_com = pd.DataFrame()
    split_com['Date'] = curr['Date']
    split_com['Instrument'] = curr['Instrument']

    # Calculate PCA factors
    pca_factors = np.dot(curr[pca_factor_name].values, weights.T)
    for i in range(3):
        split_com[f'PCA Factor {i + 1}'] = pca_factors[:, i]

    com_df = com_df.append(split_com)

com_df['Next Q Return'] = final['Next Q Return']


com_df['Quarterly Return'] = final['Quarterly Return']
com_df['E Score'] = final['E Score']
com_df['S Score'] = final['S Score']
com_df['G Score'] = final['G Score']

com_df


com_df.to_csv('./com_df.csv')


import seaborn as sns
import matplotlib.pyplot as plt

df = com_df.drop(com_df.columns[6], axis =1)
df = df.dropna()
df


# Create a dummy variable for Covid based on the condition: if date >= '2020-01-01' and <= '2022-01-01', it would be 1, otherwise 0
# we assume covid affected stock market in 2020 and 2021. 
# Why setting 2022 to be uneffective is because 1. a single event which has a start must have a end, and 2. we have to do this because we build the test data set
# only for data with date >2021-01. If I set all date > 2021-01 with covid variable 1, the linear regression will show error because intercept will be confused
# with dummy variable
df['Covid'] = ((df['Date'] >= '2020-01') & (df['Date'] < '2022-01')).astype(int)


import seaborn as sns
import matplotlib.pyplot as plt

factor_col = ['E Score', 'S Score', 'G Score', 'PCA Factor 1', 'PCA Factor 2', 'PCA Factor 3', 'Covid']

# Assuming your DataFrame is named df
correlation_matrix = df[factor_col].corr()

# Create a heatmap using Seaborn
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", linewidths=0.5)

# Customize the plot
plt.title("Correlation Matrix", fontsize=20)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Show the plot
plt.show()


df.describe()


from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor


train_df = df[df['Date']<='2021-01']
test_df = df[df['Date']>'2021-01']


#factor_col = ['E Score', 'S Score', 'G Score', 'NI', 'PE', 'PB', 'Momentum', 'ROA', 'ROE', 'Revenue', 'Debt Ratio', 'Covid']


X_train = train_df[factor_col].values
Y_train = train_df['Next Q Return']

X_test = test_df[factor_col].values
Y_test = test_df['Next Q Return']


X_train1 = sm.add_constant(X_train, prepend = True)
LR1 = sm.OLS(Y_train, X_train1)
LR = LR1.fit()

X_test1 = sm.add_constant(X_test, prepend = True)
LR_test = LR.predict(X_test1)


randomforest_regressor = RandomForestRegressor(n_estimators=300)

model_RF = randomforest_regressor.fit(X_train, Y_train)
RF_test = model_RF.predict(X_test)


svr = SVR(kernel='rbf')
model_SVR = svr.fit(X_train, Y_train)

svr_test = model_SVR.predict(X_test)


gbdt = GradientBoostingRegressor(n_estimators=100, max_depth=5)
model_GB = gbdt.fit(X_train, Y_train)

gbdt_test = model_GB.predict(X_test)


LR.summary()


# Get feature importances
RF_importances = model_RF.feature_importances_

# Print the feature importances
for feature, importance in zip(factor_col, RF_importances):
    print(f"{feature}: {importance}")

E Score: 0.15306876895814198
S Score: 0.1456452103484826
G Score: 0.14323780823727802
PCA Factor 1: 0.13957025117922972
PCA Factor 2: 0.15753055992669504
PCA Factor 3: 0.1513779006058136
Covid: 0.10956950074435894


importances2 = model_GB.feature_importances_

# Print the feature importances
for feature, importance in zip(factor_col, importances2):
    print(f"{feature}: {importance}")

E Score: 0.14613749118630107
S Score: 0.12413907467663009
G Score: 0.12878085983271992
PCA Factor 1: 0.08660388796214366
PCA Factor 2: 0.11543424855423633
PCA Factor 3: 0.13374951829969456
Covid: 0.2651549194882743


pred_df = pd.DataFrame()

pred_df['Instrument'] = test_df['Instrument']
pred_df['Date'] = test_df['Date']
pred_df['LR'] = LR_test

pred_df['RF'] = RF_test

pred_df['SVR'] = svr_test

pred_df['GB'] = gbdt_test

pred_df['Actual Return'] = test_df['Next Q Return']


pred_df


pred_df = pred_df.sort_values(by=['Instrument', 'Date'])


pred_df


from sklearn.metrics import mean_squared_error
Y_test = Y_test.dropna() #drop out the last row for next return which is NAN
mse_lr = mean_squared_error(Y_test, LR_test)
mse_rf = mean_squared_error(Y_test, RF_test)
mse_svr = mean_squared_error(Y_test, svr_test)
mse_gb = mean_squared_error(Y_test, gbdt_test)


print(f"Mean Squared Error for Linear Regression: {mse_lr}")
print(f"Mean Squared Error for Random Forest: {mse_rf}")
print(f"Mean Squared Error for Support Vector Regression: {mse_svr}")
print(f"Mean Squared Error for Gradient Boosting: {mse_gb}")

Mean Squared Error for Linear Regression: 0.023257777000994646
Mean Squared Error for Random Forest: 0.024504062986203536
Mean Squared Error for Support Vector Regression: 0.024717368400565383
Mean Squared Error for Gradient Boosting: 0.02392708622907875


def get_ret(name):
    ret=[]

    for quar in pred_df.Date.unique():
        s_df = pred_df[pred_df['Date'] == quar]
        K= 5
        s_df = s_df.sort_values(by=name, ascending=False)
        ret1 = s_df.iloc[:K]['Actual Return'].mean()   #Equal Weight

        ret.append(ret1)
    return ret

LR_ret = get_ret(name="LR")
RF_ret = get_ret(name="RF")
SVR_ret = get_ret(name="SVR")
GBDT_ret = get_ret(name='GB')


all_ret = pd.DataFrame()

all_ret['Date'] = pred_df.Date.unique()
all_ret['Date'] = pd.to_datetime(all_ret['Date'])
all_ret['Date'] = all_ret['Date'] + pd.DateOffset(months=3)

all_ret['LR'] = LR_ret
all_ret['RF'] = RF_ret
all_ret['SVR'] = SVR_ret
all_ret['GBDT'] = GBDT_ret


all_ret_backup = all_ret
all_ret.head()


sp500 = pd.read_excel('./sp500return.xlsx')


sp500 = sp500.T


sp5001 = sp500.iloc[2:,:]
sp5001 = sp5001.reset_index()
sp5001.columns = ['Date', 'Last Price']


sp5001


# Group by 'Instrument' and apply the custom function to each group
sp5002 = sp5001.set_index('Date').pct_change()

sp5003 = sp5002.tail(7)
sp5003


sp5004 = sp5003.reset_index()['Last Price']
all_return = all_ret.merge(sp5004, left_index=True, right_index=True)
all_return = all_return.dropna()


all_return = all_return.set_index('Date')


import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import pandas as pd

# Set figure size
plt.figure(figsize=(12,6))

# Plot LR
plt.plot(all_return['LR'], linestyle='--', lw=1.5, color='purple', label='LR')
# Plot RF
plt.plot(all_return['RF'], linestyle='-', lw=4, color='blue', label='RF')
# Plot SVR
plt.plot(all_return['SVR'], linestyle='--', lw=1.5, color='orange', label='SVR')
# Plot GBDT
plt.plot(all_return['GBDT'], linestyle='-', lw=4, color='Purple', label='GB')
# Plot S&P500 Index
plt.plot(all_return['Last Price'], linestyle='-.', lw=1.5, color='gray', label='S&P500 Index')

# Add shaded area between -0.2 and the S&P500 line
plt.fill_between(all_return.index, -0.2, all_return['Last Price'], color='red', alpha=0.2)

# Customize legend
plt.legend(fontsize=14)

# Create a list of dates with a 3-month interval
start_date = pd.Timestamp('2021-07-01')
end_date = pd.Timestamp('2022-07-01')
date_range = pd.date_range(start_date, end_date, freq='3M')

# Set custom x-axis tickers
ax = plt.gca()
ax.set_xticks(date_range)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))

# Show the plot
plt.show()


all_return.mean()

LR           -0.061781
RF            0.021182
SVR          -0.005432
GBDT          0.014248
Last Price   -0.004680
dtype: float64

	Market	continent	iso_alpha	STOCK EXCHANGE	Year	ESG GUIDANCE	Stock exchage & ESG guidance
0	China	Asia	CHN	Shenzhen Stock Exchange	2006	Social Responsibility Instructions to Listed C...	Shenzhen Stock Exchange: Social Responsibility...
1	China	Asia	CHN	Shenzhen Stock Exchange	2007	Social Responsibility Instructions to Listed C...	Shenzhen Stock Exchange: Social Responsibility...
2	China	Asia	CHN	Shenzhen Stock Exchange	2008	Social Responsibility Instructions to Listed C...	Shenzhen Stock Exchange: Social Responsibility...
3	China	Asia	CHN	Shenzhen Stock Exchange	2009	Social Responsibility Instructions to Listed C...	Shenzhen Stock Exchange: Social Responsibility...
4	China	Asia	CHN	Shenzhen Stock Exchange	2010	Social Responsibility Instructions to Listed C...	Shenzhen Stock Exchange: Social Responsibility...
...	...	...	...	...	...	...	...
284	Netherlands	Europe	NLD	Euronext Amsterdam	2022	ESG Reporting Guide - Target 1.5°C	Euronext Amsterdam: ESG Reporting Guide - Targ...
285	Norway	Europe	NOR	Oslo Bors (Euronext)	2022	ESG Reporting Guide - Target 1.5°C	Oslo Bors (Euronext): ESG Reporting Guide - Ta...
286	Portugal	Europe	PRT	Euronext Lisbon	2022	ESG Reporting Guide - Target 1.5°C	Euronext Lisbon: ESG Reporting Guide - Target ...
287	Russian Federation	Asia	RUS	Moscow Exchange	2022	ESG Best Practice Guide (in Russian)	Moscow Exchange: ESG Best Practice Guide (in R...
288	UK	Europe	GBR	Euronext London	2022	ESG Reporting Guide - Target 1.5°C	Euronext London: ESG Reporting Guide - Target ...

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
0	Baker Hughes Co	Energy	BKR.OQ	BKR	92.13	8.311780	85.06	29/03/2023	2.884841e+10	1.47	True
1	Chevron Corp	Energy	CVX.N	CVX	86.20	2.999164	83.69	29/03/2023	3.196539e+11	1.15	True
2	Kinder Morgan Inc	Energy	KMI.N	KMI	85.90	-1.614935	87.31	29/03/2023	3.981249e+10	0.92	True
3	Halliburton Co	Energy	HAL.N	HAL	85.78	-4.284758	89.62	29/03/2023	2.953318e+10	2.12	True
4	Schlumberger NV	Energy	SLB.N	SLB	85.47	5.206795	81.24	29/03/2023	7.046975e+10	1.78	True
...	...	...	...	...	...	...	...	...	...	...	...
3099	Camden Property Trust	Real Estate	CPT.N	CPT	62.87	-7.666324	68.09	29/03/2023	1.115660e+10	0.80	True
3104	Equity Residential	Real Estate	EQR.N	EQR	61.69	2.526176	60.17	09/03/2023	2.276917e+10	0.83	True
3113	VICI Properties Inc	Real Estate	VICI.N	VICI	56.91	-3.981778	59.27	29/03/2023	3.245590e+10	0.96	True
3116	Invitation Homes Inc	Real Estate	INVH.N	INVH	54.85	21.081678	45.30	29/03/2023	1.926749e+10	0.90	True
3185	Extra Space Storage Inc	Real Estate	EXR.N	EXR	33.06	-3.727432	34.34	29/03/2023	2.197784e+10	0.56	True

	Instrument	columns	2016-01-31 00:00:00	2016-04-30 00:00:00	2016-07-31 00:00:00	2016-10-31 00:00:00	2017-01-31 00:00:00	2017-04-30 00:00:00	2017-07-31 00:00:00	2017-10-31 00:00:00	...	2020-07-31 00:00:00	2020-10-31 00:00:00	2021-01-31 00:00:00	2021-04-30 00:00:00	2021-07-31 00:00:00	2021-10-31 00:00:00	2022-01-31 00:00:00	2022-04-30 00:00:00	2022-07-31 00:00:00	2022-10-31 00:00:00
0	A UN Equity	BESG Environmental Pillar Score	NaN	NaN	NaN	1.5000	1.5000	1.5000	1.5000	1.5000	...	1.5000	1.5000	1.5000	1.5000	1.5000	3.0000	3.0000	3.0000	3.0000	3.0000
1	NaN	BESG Social Pillar Score	NaN	NaN	NaN	1.6400	1.6400	1.6400	1.6400	1.6000	...	1.8000	5.1400	5.1400	5.1400	5.1400	7.6600	7.6600	7.6600	7.6600	7.6600
2	NaN	BESG Governance Pillar Score	NaN	NaN	NaN	5.9200	5.9200	5.9200	5.9200	6.3500	...	6.6800	6.5600	6.5600	6.5600	6.5600	6.7300	6.7300	6.7300	6.7300	6.7300
3	NaN	Price Earnings Ratio (P/E)	24.9317	26.7484	30.7221	27.0396	27.9601	28.5429	31.6935	34.5991	...	37.4695	38.6665	41.1288	41.5718	44.3922	42.2991	35.1326	28.7858	30.5493	29.4270
4	NaN	Price to Book Ratio	3.0530	3.1953	3.5891	3.3271	3.6696	4.0419	4.1780	4.5344	...	5.9677	6.4107	7.6294	8.4185	9.3871	8.8258	8.1094	6.9625	7.7968	7.6934
5	NaN	Relative Share Price Momentum	4.0554	9.8799	8.5025	7.9585	-2.4319	6.5499	16.6943	14.3360	...	9.2830	12.1779	12.6658	4.8849	4.6442	8.1828	-9.3599	-16.2913	0.4189	21.4459
6	NaN	Return on Common Equity	11.0843	11.1298	11.2716	10.9869	12.2033	13.6396	14.1437	15.0760	...	14.2064	14.9465	16.7841	19.3151	19.9456	23.5821	24.2016	25.4329	26.4621	23.4524
7	NaN	Return on Assets	6.1939	6.1520	6.3530	6.0499	6.7088	7.4348	7.9150	8.4340	...	7.6055	7.5371	8.4485	9.3185	9.8817	11.9024	12.0494	12.1134	12.6627	11.8096
8	NaN	Net Income/Net Profit (Losses)	121.0000	91.0000	124.0000	126.0000	168.0000	164.0000	175.0000	177.0000	...	199.0000	222.0000	288.0000	216.0000	264.0000	442.0000	283.0000	274.0000	329.0000	368.0000
9	NaN	Revenue	1028.0000	1019.0000	1044.0000	1111.0000	1067.0000	1102.0000	1114.0000	1189.0000	...	1261.0000	1483.0000	1548.0000	1525.0000	1586.0000	1660.0000	1674.0000	1607.0000	1718.0000	1849.0000
10	NaN	Total Debt to Total Equity	42.8113	45.3541	43.4192	44.8422	46.3256	46.6971	45.1312	41.5926	...	50.3513	52.0624	55.6828	64.9272	61.5447	54.0174	52.9686	56.7161	60.3614	55.0613
11	NaN	Last Price	37.6500	40.9200	48.1100	43.5700	48.9700	55.0500	59.7900	68.0300	...	96.3300	102.0900	120.1700	133.6400	153.2300	157.4900	139.3200	119.2700	134.1000	138.3500
12	NaN	Current Market Cap	12340.4987	13412.3517	15660.8489	14133.4442	15755.9944	17742.6647	19212.7657	21893.9590	...	29744.5266	31475.3306	36874.1075	40719.7759	46496.6054	47675.7930	42074.7510	35794.5225	40056.7503	40957.2129
13	AAL UW Equity	BESG Environmental Pillar Score	NaN	NaN	NaN	2.1200	2.1200	2.1200	2.1200	2.1800	...	4.1800	2.8400	2.8400	2.8400	2.8400	5.3200	5.3200	5.3200	5.3200	5.3200

	Instrument	columns	2016-01-31 00:00:00	2016-04-30 00:00:00	2016-07-31 00:00:00	2016-10-31 00:00:00	2017-01-31 00:00:00	2017-04-30 00:00:00	2017-07-31 00:00:00	2017-10-31 00:00:00	...	2020-07-31 00:00:00	2020-10-31 00:00:00	2021-01-31 00:00:00	2021-04-30 00:00:00	2021-07-31 00:00:00	2021-10-31 00:00:00	2022-01-31 00:00:00	2022-04-30 00:00:00	2022-07-31 00:00:00	2022-10-31 00:00:00
0	A UN Equity	BESG Environmental Pillar Score	NaN	NaN	NaN	1.50	1.50	1.50	1.50	1.50	...	1.50	1.50	1.50	1.50	1.50	3.00	3.00	3.00	3.00	3.00
1	A UN Equity	BESG Social Pillar Score	NaN	NaN	NaN	1.64	1.64	1.64	1.64	1.60	...	1.80	5.14	5.14	5.14	5.14	7.66	7.66	7.66	7.66	7.66
2	A UN Equity	BESG Governance Pillar Score	NaN	NaN	NaN	5.92	5.92	5.92	5.92	6.35	...	6.68	6.56	6.56	6.56	6.56	6.73	6.73	6.73	6.73	6.73

columns	Instrument	Date	BESG Environmental Pillar Score	BESG Governance Pillar Score	BESG Social Pillar Score	Current Market Cap	Last Price	Net Income/Net Profit (Losses)	Price Earnings Ratio (P/E)	Price to Book Ratio	Relative Share Price Momentum	Return on Assets	Return on Common Equity	Revenue	Total Debt to Total Equity
0	A UN Equity	2016-01-31	NaN	NaN	NaN	12340.4987	37.65	121.0	24.9317	3.0530	4.0554	6.1939	11.0843	1028.0	42.8113
1	A UN Equity	2016-04-30	NaN	NaN	NaN	13412.3517	40.92	91.0	26.7484	3.1953	9.8799	6.1520	11.1298	1019.0	45.3541
2	A UN Equity	2016-07-31	NaN	NaN	NaN	15660.8489	48.11	124.0	30.7221	3.5891	8.5025	6.3530	11.2716	1044.0	43.4192
3	A UN Equity	2016-10-31	1.50	5.92	1.64	14133.4442	43.57	126.0	27.0396	3.3271	7.9585	6.0499	10.9869	1111.0	44.8422
4	A UN Equity	2017-01-31	1.50	5.92	1.64	15755.9944	48.97	168.0	27.9601	3.6696	-2.4319	6.7088	12.2033	1067.0	46.3256
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
13427	ZTS UN Equity	2021-07-31	4.59	7.25	3.47	92011.5256	194.14	552.0	42.1919	19.6411	16.4668	14.4513	47.8686	1990.0	144.2427
13428	ZTS UN Equity	2021-10-31	4.59	7.19	3.47	115456.8943	244.03	414.0	52.1302	25.3846	15.3516	14.8097	49.0135	1967.0	149.2958
13429	ZTS UN Equity	2022-01-31	4.59	7.19	3.47	88976.7529	188.59	595.0	40.1436	19.0780	-4.4566	14.9913	47.3991	1986.0	144.6114
13430	ZTS UN Equity	2022-04-30	4.59	7.19	3.47	80896.3595	171.89	529.0	36.5053	17.5873	-14.7083	15.0149	46.8085	2052.0	147.0306
13431	ZTS UN Equity	2022-07-31	4.59	7.19	3.47	69420.2682	148.29	529.0	31.7942	14.8335	-6.0819	15.0992	44.2518	2002.0	144.7662

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
1265	Citigroup Inc	Financials	C.N	C	87.25	-1.867057	88.91	31/03/2023	8.926487e+10	1.56	True
1266	Goldman Sachs Group Inc	Financials	GS.N	GS	84.44	-2.987132	87.04	31/03/2023	1.076154e+11	1.40	True
1267	JPMorgan Chase & Co	Financials	JPM.N	JPM	83.09	-1.981833	84.77	29/03/2023	3.736733e+11	1.08	True

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
1564	Brown & Brown Inc	Financials	BRO.N	BRO	36.28	14.088050	31.80	29/03/2023	1.681318e+10	0.74	True
1457	Loews Corp	Financials	L.N	L	44.06	4.929745	41.99	29/03/2023	1.301680e+10	0.83	True
1430	First Republic Bank	Financials	FRC.N	FRC	46.86	3.741421	45.17	09/03/2023	2.566443e+09	1.01	True

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
3038	Healthpeak Properties Inc	Real Estate	PEAK.N	PEAK	88.59	-1.325462	89.78	31/03/2023	1.172752e+10	0.88	True
3039	CBRE Group Inc	Real Estate	CBRE.N	CBRE	85.53	-0.650482	86.09	29/03/2023	2.185135e+10	1.36	True
3040	Realty Income Corp	Real Estate	O.N	O	85.43	10.118587	77.58	09/03/2023	4.132990e+10	0.80	True

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
1784	Johnson & Johnson	Healthcare	JNJ.N	JNJ	92.20	6.824238	86.31	31/03/2023	4.291241e+11	0.53	True
1785	CVS Health Corp	Healthcare	CVS.N	CVS	88.89	1.414718	87.65	29/03/2023	9.957002e+10	0.60	True
1786	Agilent Technologies Inc	Healthcare	A.N	A	88.74	1.533181	87.40	09/03/2023	4.081573e+10	1.03	True

	Company Name	Sector	Ticker Symbol	Ticker Symbol (Simplified)	ESG Score (FY0)	Yoy Change in ESG Score in FY0 (%)	ESG Score (FY-1)	ESG Score Latest Update Date (FY0)	Market Cap	Beta	Is S&P500
1935	Universal Health Services Inc	Healthcare	UHS.N	UHS	45.53	30.533257	34.88	31/03/2023	8.972948e+09	1.25	True
1916	Dexcom Inc	Healthcare	DXCM.OQ	DXCM	47.81	8.634401	44.01	29/03/2023	4.359738e+10	1.16	True
1899	Bio Rad Laboratories Inc	Healthcare	BIO.N	BIO	51.78	6.171827	48.77	29/03/2023	1.378785e+10	0.91	True

	Instrument	Date	BESG Environmental Pillar Score	BESG Governance Pillar Score	BESG Social Pillar Score	Current Market Cap	Last Price	Net Income/Net Profit (Losses)	Price Earnings Ratio (P/E)	Price to Book Ratio	Relative Share Price Momentum	Return on Assets	Return on Common Equity	Revenue	Total Debt to Total Equity	Industry
0	A	2016-01	1.5	5.92	1.64	12340.4987	37.65	121.0	24.9317	3.0530	4.0554	6.1939	11.0843	1028.0	42.8113	Health Care
1	A	2016-04	1.5	5.92	1.64	13412.3517	40.92	91.0	26.7484	3.1953	9.8799	6.1520	11.1298	1019.0	45.3541	Health Care
2	A	2016-07	1.5	5.92	1.64	15660.8489	48.11	124.0	30.7221	3.5891	8.5025	6.3530	11.2716	1044.0	43.4192	Health Care
3	A	2016-10	1.5	5.92	1.64	14133.4442	43.57	126.0	27.0396	3.3271	7.9585	6.0499	10.9869	1111.0	44.8422	Health Care
4	A	2017-01	1.5	5.92	1.64	15755.9944	48.97	168.0	27.9601	3.6696	-2.4319	6.7088	12.2033	1067.0	46.3256	Health Care

	Instrument	Date	BESG Environmental Pillar Score	BESG Governance Pillar Score	BESG Social Pillar Score	Current Market Cap	Net Income/Net Profit (Losses)	Price Earnings Ratio (P/E)	Price to Book Ratio	Relative Share Price Momentum	Return on Assets	Return on Common Equity	Revenue	Total Debt to Total Equity	Industry	Quarterly Return
6071	RHI	2016-04	0.00	6.67	1.71	5011.1179	91.616	13.6774	4.6433	-18.3080	20.8316	35.0449	1344.160	0.1014	Industrials	-0.180764
6476	SPGI	2016-04	4.69	8.08	1.94	28402.4467	383.000	23.8403	75.2817	9.7340	16.4569	206.6725	1482.000	280.9665	Financials	0.083653
5369	ORCL	2016-04	4.68	6.04	0.83	166824.5824	2814.000	18.6499	3.5117	2.0127	7.9791	18.5509	10594.000	91.7661	Information Technology	0.092985
4343	LOW	2016-04	1.63	6.94	1.68	68223.3128	884.000	21.7946	9.4234	2.2649	7.7012	32.9439	15234.000	213.6023	Consumer Discretionary	0.060843
4451	MAA	2016-04	0.00	7.07	1.50	8034.2374	45.144	48.5020	2.7187	10.5362	3.2565	7.5275	272.236	111.4943	Real Estate	0.040994

	Instrument	Date	E Score	G Score	S Score	Current Market Cap	NI	PE	PB	Momentum	ROA	ROE	Revenue	Debt Ratio	Industry	Quarterly Return
7426	DHR	2022-07	0.00	6.84	3.49	187891.8607	1572.000	28.4515	4.1488	11.7926	8.2738	15.2472	7663.000	43.0851	Health Care	0.000930
7427	ACN	2022-07	6.42	7.83	2.11	191868.5670	1665.128	26.6385	8.2426	-0.2166	15.2084	33.0351	15423.656	14.6206	Information Technology	-0.033505
7428	SJM	2022-07	4.59	7.78	2.76	14099.7891	109.800	19.0025	1.7313	8.7050	3.6173	7.1923	1873.000	59.0744	Consumer Staples	-0.033667
7429	CFG	2022-07	0.49	6.85	4.53	17030.3073	636.000	7.2331	0.8076	-13.2617	0.9473	8.6437	2481.000	71.4983	Financials	-0.037265
7430	ZTS	2022-07	4.59	7.19	3.47	69420.2682	529.000	31.7942	14.8335	-6.0819	15.0992	44.2518	2002.000	144.7662	Health Care	-0.137297

	Date	Instrument	Quarterly Return	E Score	S Score	G Score	NI	PE	PB	Momentum	ROA	ROE	Revenue	Debt Ratio
0	2016-04	RHI	-0.180764	-0.794609	-0.417906	-0.682521	0.646252	-0.238948	0.039841	-1.142926	2.899639	0.345103	0.342765	-0.242635
1	2016-04	SPGI	0.083653	2.124629	-0.078859	1.207612	-0.310351	0.904303	2.276875	0.638833	3.835884	3.231890	-0.637843	1.226947
2	2016-04	ORCL	0.092985	0.085032	-1.244272	-2.352521	1.116564	-0.402256	-0.546795	-0.066309	-0.226945	-0.478035	0.203074	-0.280510
3	2016-04	LOW	0.060843	-0.635830	-0.462489	-0.462001	-0.198123	-0.109821	-0.388520	0.086307	-0.235228	-0.303302	0.238386	-0.327309
4	2016-04	MAA	0.040994	-1.278139	-0.720425	0.298226	-0.158611	-0.109704	-0.336812	1.045642	-0.116065	-0.194657	-0.352459	-0.208183
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
7426	2022-07	DHR	0.000930	-1.569813	0.443858	-0.681898	-0.304783	0.296278	-0.459524	0.585317	-0.094883	-0.627106	-1.138502	-0.434464
7427	2022-07	ACN	-0.033505	0.755874	0.251713	1.085647	-0.075711	-0.308817	-0.249107	-0.251594	0.790317	-0.078159	1.096902	-0.664381
7428	2022-07	SJM	-0.033667	0.727459	0.150126	0.930223	0.078841	-0.151280	-0.219080	0.845005	-0.790318	-0.176290	0.157363	-0.194284
7429	2022-07	CFG	-0.037265	-0.139193	1.445985	-0.677444	0.179862	-0.834155	-0.081724	-0.820258	-0.393494	-0.083843	-0.178153	-0.286319
7430	2022-07	ZTS	-0.137297	0.620567	0.572084	-0.003925	-0.494337	0.661950	-0.068215	-0.638368	1.321123	0.299901	-0.879869	-0.212003

	Date	Instrument	PCA Factor 1	PCA Factor 2	PCA Factor 3	Next Q Return	Quarterly Return	E Score	S Score	G Score
1199	2017-04	RHI	0.852194	-1.940168	0.434228	0.050282	-0.018431	-0.861106	-0.445540	-0.720786
1297	2017-04	SPGI	4.612328	-2.510695	3.296044	0.070690	0.116644	2.029091	-0.118562	1.152635
1208	2017-04	ORCL	-0.743416	-0.238669	-0.603372	0.108835	0.065743	0.121018	-1.189149	-2.045449
1413	2017-04	LOW	-0.560841	0.274964	-0.065589	-0.088721	0.173421	-0.594675	-0.473439	-0.360427
1389	2017-04	MAA	-1.021647	0.370841	0.204787	0.014234	0.035777	-1.413413	-0.776643	0.220382
...	...	...	...	...	...	...	...	...	...	...
7262	2022-07	WM	-0.452703	-0.290607	1.159184	NaN	0.047261	0.499012	0.381293	1.670612
7252	2022-07	CAH	0.433156	-1.607065	-1.295089	NaN	0.275684	1.153382	-0.063716	0.859551
7407	2022-07	AVY	0.548646	-0.349065	0.162362	NaN	-0.060558	0.994782	-0.558506	-0.350338
7249	2022-07	CMS	1.001439	-0.688955	0.923604	NaN	-0.137185	0.305028	0.946862	2.218818
7368	2022-07	DXC	0.106735	-0.544495	-0.999375	NaN	-0.192346	1.631655	-0.197834	0.322900

	Date	Instrument	Quarterly Return	E Score	S Score	G Score	NI	PE	PB	Momentum	ROA	ROE	Revenue	Debt Ratio	Next Q Return
0	2016-04	RHI	-0.180764	-0.794609	-0.417906	-0.682521	0.646252	-0.238948	0.039841	-1.142926	2.899639	0.345103	0.342765	-0.242635	-0.007862
562	2016-07	RHI	-0.007862	-0.788871	-0.415521	-0.679219	0.655998	-0.238228	0.040337	-1.159616	2.742283	0.340419	0.352340	-0.241018	0.288431
840	2016-10	RHI	0.288431	-0.871597	-0.449900	-0.726823	0.478711	-0.119151	0.017910	1.211123	2.674064	0.264757	0.187422	-0.264439	0.001025
996	2017-01	RHI	0.001025	-0.869063	-0.448847	-0.725365	0.484592	-0.114168	0.019109	1.291428	2.541265	0.256264	0.195962	-0.263726	-0.018431
1199	2017-04	RHI	-0.018431	-0.861106	-0.445540	-0.720786	0.502390	-0.108408	0.019690	-0.556969	2.318636	0.244288	0.214258	-0.261481	0.050282

	Date	Instrument	Quarterly Return	E Score	S Score	G Score	NI	PE	PB	Momentum	ROA	ROE	Revenue	Debt Ratio	Next Q Return
0	2016-04	RHI	-0.180764	-0.794609	-0.417906	-0.682521	0.646252	-0.238948	0.039841	-1.142926	2.899639	0.345103	0.342765	-0.242635	-0.007862
1	2016-07	RHI	-0.007862	-0.788871	-0.415521	-0.679219	0.655998	-0.238228	0.040337	-1.159616	2.742283	0.340419	0.352340	-0.241018	0.288431
2	2016-10	RHI	0.288431	-0.871597	-0.449900	-0.726823	0.478711	-0.119151	0.017910	1.211123	2.674064	0.264757	0.187422	-0.264439	0.001025
3	2017-01	RHI	0.001025	-0.869063	-0.448847	-0.725365	0.484592	-0.114168	0.019109	1.291428	2.541265	0.256264	0.195962	-0.263726	-0.018431
4	2017-04	RHI	-0.018431	-0.861106	-0.445540	-0.720786	0.502390	-0.108408	0.019690	-0.556969	2.318636	0.244288	0.214258	-0.261481	0.050282
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
7425	2021-04	DXC	0.245681	1.439174	-0.277825	0.212136	0.545462	0.210870	-0.231891	3.262569	-0.633070	-0.109188	0.653665	0.113160	-0.136877
7426	2021-07	DXC	-0.136877	1.493021	-0.255447	0.243122	0.203621	-0.074785	-0.219816	0.762283	-0.554010	-0.020808	0.729533	0.166225	-0.042249
7427	2021-10	DXC	-0.042249	1.507222	-0.249545	0.251294	0.505048	-0.073831	-0.217727	-1.989807	-1.196390	-0.617624	0.768243	0.148156	0.013669
7428	2022-01	DXC	0.013669	1.513221	-0.247052	0.254746	0.930386	-0.106554	-0.227228	-0.535099	-0.296670	0.169376	0.762821	0.121962	-0.071100
7429	2022-04	DXC	-0.071100	1.559481	-0.227828	0.281367	0.610223	-0.266300	-0.211008	1.944347	-0.365724	0.105921	0.786748	0.141913	-0.192346

	PCA Factor 1	PCA Factor 2	PCA Factor 3	Next Q Return	E Score	S Score	G Score	Covid
count	6005.000000	6005.000000	6005.000000	6005.000000	6005.000000	6005.000000	6005.000000	6005.000000
mean	-0.003506	0.029440	-0.014076	0.024194	0.053216	0.050056	0.015121	0.381016
std	1.494748	1.131171	1.034577	0.158069	0.950474	1.000145	0.979416	0.485677
min	-5.313446	-5.422237	-4.328957	-0.812361	-2.681774	-3.031672	-3.047624	0.000000
25%	-0.703778	-0.607372	-0.612219	-0.060558	-0.623677	-0.680531	-0.575794	0.000000
50%	-0.339823	-0.070666	-0.077146	0.025691	-0.016866	-0.134695	0.109163	0.000000
75%	0.146190	0.509475	0.462595	0.109748	0.654640	0.669262	0.697301	1.000000
max	16.026433	9.816880	8.207443	0.957227	3.337515	3.264825	2.825667	1.000000

OLS Regression Results
Dep. Variable:	Next Q Return	R-squared:	0.112
Model:	OLS	Adj. R-squared:	0.111
Method:	Least Squares	F-statistic:	82.20
Date:	Sun, 23 Apr 2023	Prob (F-statistic):	6.56e-113
Time:	12:40:27	Log-Likelihood:	2111.9
No. Observations:	4576	AIC:	-4208.
Df Residuals:	4568	BIC:	-4156.
Df Model:	7
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	-0.0004	0.003	-0.136	0.892	-0.006	0.005
x1	0.0001	0.003	0.050	0.960	-0.005	0.005
x2	-0.0069	0.003	-2.663	0.008	-0.012	-0.002
x3	0.0001	0.002	0.043	0.966	-0.005	0.005
x4	0.0009	0.002	0.598	0.550	-0.002	0.004
x5	0.0036	0.002	1.737	0.083	-0.000	0.008
x6	-0.0014	0.002	-0.623	0.533	-0.006	0.003
x7	0.1174	0.005	23.012	0.000	0.107	0.127

Omnibus:	428.428	Durbin-Watson:	1.472
Prob(Omnibus):	0.000	Jarque-Bera (JB):	1585.733
Skew:	-0.425	Prob(JB):	0.00
Kurtosis:	5.756	Cond. No.	3.49

	Instrument	Date	LR	RF	SVR	GB	Actual Return
5764	RHI	2021-04	0.123420	0.168349	0.158057	0.175005	0.127683
5929	SPGI	2021-04	0.123906	0.129902	0.138656	0.106414	0.035181
5883	ORCL	2021-04	0.126573	0.167482	0.105094	0.077113	0.131953
5972	LOW	2021-04	0.118705	0.120991	0.282981	0.113571	-0.018140
5946	MAA	2021-04	0.114827	0.143250	0.112849	0.124268	0.108835
...	...	...	...	...	...	...	...
7107	WM	2022-04	-0.001145	-0.000266	-0.015315	-0.004667	0.047261
6970	CAH	2022-04	-0.007403	-0.027068	0.175226	-0.030647	0.275684
6998	AVY	2022-04	0.003846	-0.006445	0.004178	0.018158	-0.060558
7078	CMS	2022-04	-0.004518	0.001014	0.066760	0.021595	-0.137185
6889	DXC	2022-04	-0.002212	0.004876	0.000648	0.004377	-0.192346

	Instrument	Date	LR	RF	SVR	GB	Actual Return
5960	A	2021-04	0.110206	0.160231	0.115178	0.178601	0.146588
6059	A	2021-07	0.112734	0.131651	0.160899	0.163970	0.027801
6293	A	2021-10	0.101839	0.184062	0.105382	0.069218	-0.115372
6764	A	2022-01	-0.017033	-0.088556	-0.001623	-0.084798	-0.143913
7015	A	2022-04	-0.019506	-0.075513	-0.019761	-0.098354	0.124340
...	...	...	...	...	...	...	...
5820	ZTS	2021-04	0.117553	0.057486	-0.009100	0.071518	0.041747
6193	ZTS	2021-07	0.120532	0.098244	0.146891	0.123646	0.256979
6572	ZTS	2021-10	0.123629	0.235019	0.231796	0.210828	-0.227185
6768	ZTS	2022-01	0.002745	0.018562	0.012963	0.034367	-0.088552
6882	ZTS	2022-04	0.000819	0.070902	0.029919	0.063524	-0.137297

	Date	LR	RF	SVR	GBDT
0	2021-07-01	-0.023292	-0.041212	0.008082	0.020397
1	2021-10-01	0.088594	0.166775	0.115003	0.166775
2	2022-01-01	-0.098635	0.121873	-0.033019	0.028023
3	2022-04-01	-0.205070	-0.112781	-0.099513	-0.048980
4	2022-07-01	-0.070502	-0.028746	-0.017714	-0.094978

	Date	Last Price
0	03/31/2016	2059.74
1	06/30/2016	2098.86
2	09/30/2016	2168.27
3	12/30/2016	2238.83
4	03/31/2017	2362.72
5	06/30/2017	2423.41
6	09/29/2017	2519.36
7	12/29/2017	2673.61
8	03/30/2018	2640.87
9	06/29/2018	2718.37
10	09/28/2018	2913.98
11	12/31/2018	2506.85
12	03/29/2019	2834.4
13	06/28/2019	2941.76
14	09/30/2019	2976.74
15	12/31/2019	3230.78
16	03/31/2020	2584.59
17	06/30/2020	3100.29
18	09/30/2020	3363
19	12/31/2020	3756.07
20	03/31/2021	3972.89
21	06/30/2021	4297.5
22	09/30/2021	4307.54
23	12/31/2021	4766.18
24	03/31/2022	4530.41
25	06/30/2022	3785.38
26	09/30/2022	3585.62
27	12/30/2022	3839.5

	Last Price
Date
06/30/2021	0.081706
09/30/2021	0.002336
12/31/2021	0.106474
03/31/2022	-0.049467
06/30/2022	-0.164451
09/30/2022	-0.052771
12/30/2022	0.070805

"Is ESG Dead? Investigating the Link Between ESG Scores and Stock Market"¶

ECON 323 Project - Dalton Du & Harvey Chan¶

The primary goal of this analysis is to find how ESG affect the stock performance. To be precise, the questions we are trying to answer are:¶

Does ESG explain Stock Volatilities (Beta), and Is it useful in stock prediction? Do investors care about ESG ?¶

Abstract¶

I - Introduction¶

1. What is ESG¶

2. ESG Investing¶

II - Datasets for Visualization¶

Sustainable Stock Exchange Initiative (SSE)¶

Refinitiv¶

III - Research Intention of S&P 500 Stock¶¶

IV - Data Visualizations¶

1) Display the Progression of Establishing ESG Guidance in Stock Exchange across World¶

2) Display the Market Cap and ESG Score across Sector¶

The Treemap visualization is interactive. You can click on each sector to see the detail on different companies.¶

Discussion¶

Q1: Does ESG explain Stock Volatilities?¶

Correlation between ESG Score and Beta in different sectors¶

What is Beta¶

V - Analysis¶

Hypoethsis¶

VI - Data Collection for Selected Stocks¶

A ) Positive Relationship Between ESG Score and Beta - Financials Sector¶

1) Identify the top three and bottom three ESG score companies in Financials Sector:¶

2) Gather their closed stock price per week from 2011-01-01 to 2023-04-08 by Yahoo Finance:¶

3) Gather their ESG Score for each year (from D- to A+) by Refinitiv:¶

4) Design a function that visualize the ESG grading (from D- to A+) into different color (from red to green):¶

5) Visualization:¶

Discussion - The result of Financials Sector¶

B ) Positive Relationship Between ESG Score and Beta - Real Estate Sector¶

1) Identify the top three and bottom three ESG score companies in Real Estate Sector:¶

2) Gather their closed stock price per week from 2011-01-01 to 2023-04-08 by Yahoo Finance:¶

3) Gather their ESG Score for each year (from D- to A+) by Refinitiv:¶

4) Visualization:¶

Discussion - The result of Real Estate Sector¶

C ) Negative Relationship Between ESG Score and Beta - Healthcare Sector¶

1) Identify the top three and bottom three ESG score companies in Real Estate Sector:¶

2) Gather their closed stock price per week from 2011-01-01 to 2023-04-08 by Yahoo Finance:¶

3) Gather their ESG Score for each year (from D- to A+) by Refinitiv:¶

4) Visualization:¶

Discussion - The result of Healthcare Sector¶

Problem with ESG Score¶

ESG investing and Multi-Factor Machine Learning Modeling¶

VII - Literature Review¶

1. Factor Construction¶

2. Machine Learning¶

VIII - Methodology¶

IX - Data Retrieving, Data Cleaning & Data Manipulation¶

Clean and Rename Columns, Remove Extreme Value & Standardizing Data & Neutrailization¶

Standardizing¶

Neutralizing¶

PCA¶

X. Final Dataset¶

XI. Linear Regression, Random Forest, SVR, and Gradient Boost¶

XII. Result¶

Q2: Are ESG Scores Significant in Stock Prediction?¶

Q3: Do machine learning models incorporating ESG factors useful in stock prediction?¶

Q4: Can ESG machine learning modelling be an investment strategy?¶

XIII - Discussion¶

XIV. - Conclusion, Limitation and Future Research¶

XV. - References:¶