from google.cloud import bigquery
client = bigquery.Client(project='fred-employment-initial-test')

# Your query string
query = """
SELECT *
FROM `fred-employment-initial-test.fred_data.fred_data_withnotes`
WHERE series_id IN ('MANEMP')
"""

# Run the query
query_job = client.query(query)

# Convert the query results to a pandas DataFrame
data = query_job.to_dataframe()

# Display the first few rows
print(data.head())

import pandas_datareader.data as web
from datetime import datetime

# Define the date range
start = datetime(1939, 1, 1)
end = datetime(2024, 1, 1)

# Fetch the Manufacturing Productivity data (MFGOPH) from FRED
productivity_data = web.DataReader('MFGOPH', 'fred', start, end)

# Display the first few rows of the fetched data
print(productivity_data.head())

import pandas as pd

# Convert the date column to datetime if not already in that format
data['date'] = pd.to_datetime(data['date'])
productivity_data.index = pd.to_datetime(productivity_data.index)

# Set the date column as the index for both dataframes
data.set_index('date', inplace=True)
productivity_data.index.name = 'date'

# Remove timezone info from the indexes of both DataFrames
data.index = data.index.tz_localize(None)
productivity_data.index = productivity_data.index.tz_localize(None)

# Now merge the DataFrames
merged_data = data.join(productivity_data, how='inner')

# Display the first few rows of the merged DataFrame
print(merged_data.head())

import matplotlib.pyplot as plt

# Plotting the data on a dual-axis plot
fig, ax1 = plt.subplots(figsize=(12, 6))

color = 'tab:blue'
ax1.set_xlabel('Date')
ax1.set_ylabel('Manufacturing Employment (Thousands of Persons)', color=color)
ax1.plot(merged_data.index, merged_data['value'], color=color)
ax1.tick_params(axis='y', labelcolor=color)
ax1.set_ylim(8000, 20000)  # Set the y-axis limits for employment

ax2 = ax1.twinx()
color = 'tab:orange'
ax2.set_ylabel('Manufacturing Labor Productivity Index', color=color)
ax2.plot(merged_data.index, merged_data['MFGOPH'], color=color)
ax2.tick_params(axis='y', labelcolor=color)
ax2.set_ylim(30, 110)  # Set the y-axis limits for labor productivity

fig.tight_layout()
plt.title('Trends in Manufacturing Employment and Labor Productivity')
plt.show()

# Calculate the correlation coefficient
correlation = merged_data['value'].corr(merged_data['MFGOPH'])

# Display the result
print(f"Correlation coefficient between manufacturing employment and labor productivity: {correlation}")

from scipy import stats

# Perform a paired t-test
t_stat, p_value = stats.ttest_rel(merged_data['value'], merged_data['MFGOPH'])

# Display the t-statistic and p-value
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

# Conclusion
if p_value < 0.05:
    print("Reject the null hypothesis: There is a significant difference between manufacturing employment and labor productivity.")
else:
    print("Fail to reject the null hypothesis: No significant difference between manufacturing employment and labor productivity.")

from statsmodels.tsa.stattools import adfuller

# Perform the Augmented Dickey-Fuller test for stationarity on manufacturing employment
adf_test_employment = adfuller(merged_data['value'].dropna())
print("ADF Statistic for Manufacturing Employment:", adf_test_employment[0])
print("p-value for Manufacturing Employment:", adf_test_employment[1])

# Perform the Augmented Dickey-Fuller test for stationarity on manufacturing labor productivity
adf_test_productivity = adfuller(merged_data['MFGOPH'].dropna())
print("ADF Statistic for Manufacturing Labor Productivity:", adf_test_productivity[0])
print("p-value for Manufacturing Labor Productivity:", adf_test_productivity[1])

# First-order differencing for manufacturing employment
merged_data['employment_diff'] = merged_data['value'].diff().dropna()

# First-order differencing for manufacturing labor productivity
merged_data['productivity_diff'] = merged_data['MFGOPH'].diff().dropna()

# Dropping the NaN values that result from differencing
merged_data.dropna(inplace=True)

# Display the first few rows of the differenced data
print(merged_data[['employment_diff', 'productivity_diff']].head())

from statsmodels.tsa.stattools import adfuller

# ADF test for differenced manufacturing employment
adf_test_employment_diff = adfuller(merged_data['employment_diff'])
print("ADF Statistic for differenced Manufacturing Employment:", adf_test_employment_diff[0])
print("p-value for differenced Manufacturing Employment:", adf_test_employment_diff[1])

# ADF test for differenced manufacturing labor productivity
adf_test_productivity_diff = adfuller(merged_data['productivity_diff'])
print("ADF Statistic for differenced Manufacturing Labor Productivity:", adf_test_productivity_diff[0])
print("p-value for differenced Manufacturing Labor Productivity:", adf_test_productivity_diff[1])

# Perform second-order differencing
merged_data['employment_diff_2'] = merged_data['employment_diff'].diff().dropna()
merged_data['productivity_diff_2'] = merged_data['productivity_diff'].diff().dropna()

# Display the first few rows of the differenced data
print(merged_data[['employment_diff_2', 'productivity_diff_2']].head())

# ADF test for second-order differenced manufacturing employment
adf_test_employment_diff_2 = adfuller(merged_data['employment_diff_2'].dropna())
print("ADF Statistic for second-order differenced Manufacturing Employment:", adf_test_employment_diff_2[0])
print("p-value for second-order differenced Manufacturing Employment:", adf_test_employment_diff_2[1])

# ADF test for second-order differenced manufacturing labor productivity
adf_test_productivity_diff_2 = adfuller(merged_data['productivity_diff_2'].dropna())
print("ADF Statistic for second-order differenced Manufacturing Labor Productivity:", adf_test_productivity_diff_2[0])
print("p-value for second-order differenced Manufacturing Labor Productivity:", adf_test_productivity_diff_2[1])

from statsmodels.tsa.stattools import grangercausalitytests

# Prepare the data for the Granger Causality Test
# The Granger causality test expects a DataFrame with two columns: the first column should be the dependent variable,
# and the second column should be the independent variable.

# For example, to test if labor productivity Granger-causes employment:
gc_data = merged_data[['employment_diff_2', 'productivity_diff_2']].dropna()

# Perform the Granger Causality Test
# The maxlag parameter specifies the maximum number of lags to test for causality.
max_lag = 4
granger_test_result = grangercausalitytests(gc_data, max_lag, verbose=True)

from statsmodels.tsa.api import VAR


# Prepare the data for the VAR model
var_data = merged_data[['employment_diff_2', 'productivity_diff_2']].dropna()

# Instantiate the VAR model
model = VAR(var_data)

# Fit the model
var_result = model.fit(maxlags=3)

# Display the summary of the VAR model
print(var_result.summary())

# Check the stability of the VAR model
print("Is the model stable? ", var_result.is_stable())

 # Generate Impulse Response Functions (IRFs)
irf = var_result.irf()
irf.plot(orth=False)

from statsmodels.tsa.api import VAR

# Fit the VAR model
model = VAR(merged_data[['employment_diff_2', 'productivity_diff_2']].dropna())
model_fitted = model.fit(maxlags=3)  # Use the appropriate number of lags based on your previous analysis

# Perform the forecast error variance decomposition
fevd = model_fitted.fevd(10)  # Decompose variance over 10 periods (you can adjust the number of periods)
fevd_summary = fevd.summary()

import matplotlib.pyplot as plt

# Convert FEVD results to DataFrame for easier handling
fevd_df_employment = pd.DataFrame(fevd.decomp[:, 0, :], columns=['employment_diff_2', 'productivity_diff_2'])
fevd_df_productivity = pd.DataFrame(fevd.decomp[:, 1, :], columns=['employment_diff_2', 'productivity_diff_2'])

# Create a figure and axis for the stacked area plots
fig, axs = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

# Stacked area plot for employment_diff_2
axs[0].stackplot(fevd_df_employment.index, fevd_df_employment.T, labels=['Employment_diff_2', 'Productivity_diff_2'], colors=['#1f77b4', '#ff7f0e'])
axs[0].set_title('FEVD of Employment (employment_diff_2)')
axs[0].set_ylabel('Variance Explained')
axs[0].legend(loc='upper right')

# Stacked area plot for productivity_diff_2
axs[1].stackplot(fevd_df_productivity.index, fevd_df_productivity.T, labels=['Employment_diff_2', 'Productivity_diff_2'], colors=['#1f77b4', '#ff7f0e'])
axs[1].set_title('FEVD of Productivity (productivity_diff_2)')
axs[1].set_xlabel('Lags')
axs[1].set_ylabel('Variance Explained')
axs[1].legend(loc='upper right')

plt.tight_layout()
plt.show()

import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

# Define colors and background settings
background_color = '#000000'
text_color = '#FFFFFF'
highlight_color = '#FF00FF'  # Magenta
secondary_color = '#00FFFF'  # Cyan

# Create the figure and set its background color
fig = plt.figure(figsize=(14, 10), facecolor=background_color)
gs = GridSpec(3, 2, figure=fig, height_ratios=[3, 1, 1])

# Title
fig.suptitle('Manufacturing Employment and Productivity Analysis', fontsize=24, color=text_color, weight='bold')

# Manufacturing Employment vs Labor Productivity plot
ax1 = fig.add_subplot(gs[0, 0])
ax1.set_facecolor(background_color)
ax1.plot(merged_data.index, merged_data['value'], color=secondary_color, label='Employment (Thousands of Persons)')
ax1.set_ylabel('Employment (Thousands of Persons)', color=secondary_color)
ax1.tick_params(axis='y', labelcolor=secondary_color)

ax2 = ax1.twinx()
ax2.plot(merged_data.index, merged_data['MFGOPH'], color=highlight_color, label='Labor Productivity Index')
ax2.set_ylabel('Labor Productivity Index', color=highlight_color)
ax2.tick_params(axis='y', labelcolor=highlight_color)

ax1.set_title('Trends in Manufacturing Employment and Labor Productivity', color=text_color)
ax1.set_xlabel('Year', color=text_color)

# IRF plot - Plot separately to avoid the ax error
irf = var_result.irf()

# Customize the IRF plot colors
fig_irf = irf.plot(orth=False, subplot_params={'facecolor': background_color})
for ax in fig_irf.axes:
    for line in ax.get_lines():
        line.set_color(secondary_color)  # Set IRF lines to cyan
    ax.set_facecolor(background_color)
    ax.spines['top'].set_color(text_color)
    ax.spines['bottom'].set_color(text_color)
    ax.spines['left'].set_color(text_color)
    ax.spines['right'].set_color(text_color)
    ax.xaxis.label.set_color(text_color)
    ax.yaxis.label.set_color(text_color)
    ax.tick_params(axis='x', colors=text_color)
    ax.tick_params(axis='y', colors=text_color)

# Save the IRF plot and embed it into the main figure
fig_irf.savefig('irf_plot.png', facecolor=background_color)
img = plt.imread('irf_plot.png')

# Display the IRF plot as an image within the main figure
ax_irf = fig.add_subplot(gs[0, 1])
ax_irf.imshow(img)
ax_irf.axis('off')  # Turn off axis for the image

# Adding IRF title
ax_irf_title = fig.add_subplot(gs[0, 1])
ax_irf_title.set_facecolor(background_color)
ax_irf_title.text(0.5, 1.02, 'Impulse Response Functions (IRF)', fontsize=14, color=text_color, ha='center', weight='bold')
ax_irf_title.axis('off')  # Hide the axis

# Key Metrics (Bottom Left)
ax3 = fig.add_subplot(gs[1, 0])
ax3.set_facecolor(background_color)
ax3.text(0, 0.9, 'Key Metrics', fontsize=18, color=text_color, weight='bold')
ax3.text(0, 0.7, 'Correlation Coefficient: -0.96', fontsize=14, color=highlight_color)
ax3.text(0, 0.55, 'T-Statistic: 37.07', fontsize=14, color=highlight_color)
ax3.text(0, 0.4, 'P-Value: 1.11e-29', fontsize=14, color=highlight_color)
ax3.axis('off')  # Hide the axis

# VAR Model Summary (Bottom Right)
ax4 = fig.add_subplot(gs[1, 1])
ax4.set_facecolor(background_color)
ax4.text(0, 0.9, 'VAR Model Summary', fontsize=18, color=text_color, weight='bold')
ax4.text(0, 0.7, 'L1 Employment to Productivity: p < 0.05', fontsize=12, color=highlight_color)
ax4.text(0, 0.55, 'L1 Productivity to Employment: p < 0.05', fontsize=12, color=highlight_color)
ax4.text(0, 0.4, 'L2 Employment to Productivity: p > 0.05', fontsize=12, color=highlight_color)
ax4.text(0, 0.25, 'L2 Productivity to Employment: p > 0.05', fontsize=12, color=highlight_color)
ax4.axis('off')  # Hide the axis

# Adjust layout to ensure there's no overlap
plt.tight_layout(rect=[0, 0, 1, 0.92])
plt.show()