from google.cloud import bigquery client = bigquery.Client(project='fred-employment-initial-test') # Your query string query = """ SELECT * FROM `fred-employment-initial-test.fred_data.fred_data_withnotes` WHERE series_id IN ('MANEMP') """ # Run the query query_job = client.query(query) # Convert the query results to a pandas DataFrame data = query_job.to_dataframe() # Display the first few rows print(data.head()) import pandas_datareader.data as web from datetime import datetime # Define the date range start = datetime(1939, 1, 1) end = datetime(2024, 1, 1) # Fetch the Manufacturing Productivity data (MFGOPH) from FRED productivity_data = web.DataReader('MFGOPH', 'fred', start, end) # Display the first few rows of the fetched data print(productivity_data.head()) import pandas as pd # Convert the date column to datetime if not already in that format data['date'] = pd.to_datetime(data['date']) productivity_data.index = pd.to_datetime(productivity_data.index) # Set the date column as the index for both dataframes data.set_index('date', inplace=True) productivity_data.index.name = 'date' # Remove timezone info from the indexes of both DataFrames data.index = data.index.tz_localize(None) productivity_data.index = productivity_data.index.tz_localize(None) # Now merge the DataFrames merged_data = data.join(productivity_data, how='inner') # Display the first few rows of the merged DataFrame print(merged_data.head()) import matplotlib.pyplot as plt # Plotting the data on a dual-axis plot fig, ax1 = plt.subplots(figsize=(12, 6)) color = 'tab:blue' ax1.set_xlabel('Date') ax1.set_ylabel('Manufacturing Employment (Thousands of Persons)', color=color) ax1.plot(merged_data.index, merged_data['value'], color=color) ax1.tick_params(axis='y', labelcolor=color) ax1.set_ylim(8000, 20000) # Set the y-axis limits for employment ax2 = ax1.twinx() color = 'tab:orange' ax2.set_ylabel('Manufacturing Labor Productivity Index', color=color) ax2.plot(merged_data.index, merged_data['MFGOPH'], color=color) ax2.tick_params(axis='y', labelcolor=color) ax2.set_ylim(30, 110) # Set the y-axis limits for labor productivity fig.tight_layout() plt.title('Trends in Manufacturing Employment and Labor Productivity') plt.show() # Calculate the correlation coefficient correlation = merged_data['value'].corr(merged_data['MFGOPH']) # Display the result print(f"Correlation coefficient between manufacturing employment and labor productivity: {correlation}") from scipy import stats # Perform a paired t-test t_stat, p_value = stats.ttest_rel(merged_data['value'], merged_data['MFGOPH']) # Display the t-statistic and p-value print(f"T-statistic: {t_stat}") print(f"P-value: {p_value}") # Conclusion if p_value < 0.05: print("Reject the null hypothesis: There is a significant difference between manufacturing employment and labor productivity.") else: print("Fail to reject the null hypothesis: No significant difference between manufacturing employment and labor productivity.") from statsmodels.tsa.stattools import adfuller # Perform the Augmented Dickey-Fuller test for stationarity on manufacturing employment adf_test_employment = adfuller(merged_data['value'].dropna()) print("ADF Statistic for Manufacturing Employment:", adf_test_employment[0]) print("p-value for Manufacturing Employment:", adf_test_employment[1]) # Perform the Augmented Dickey-Fuller test for stationarity on manufacturing labor productivity adf_test_productivity = adfuller(merged_data['MFGOPH'].dropna()) print("ADF Statistic for Manufacturing Labor Productivity:", adf_test_productivity[0]) print("p-value for Manufacturing Labor Productivity:", adf_test_productivity[1]) # First-order differencing for manufacturing employment merged_data['employment_diff'] = merged_data['value'].diff().dropna() # First-order differencing for manufacturing labor productivity merged_data['productivity_diff'] = merged_data['MFGOPH'].diff().dropna() # Dropping the NaN values that result from differencing merged_data.dropna(inplace=True) # Display the first few rows of the differenced data print(merged_data[['employment_diff', 'productivity_diff']].head()) from statsmodels.tsa.stattools import adfuller # ADF test for differenced manufacturing employment adf_test_employment_diff = adfuller(merged_data['employment_diff']) print("ADF Statistic for differenced Manufacturing Employment:", adf_test_employment_diff[0]) print("p-value for differenced Manufacturing Employment:", adf_test_employment_diff[1]) # ADF test for differenced manufacturing labor productivity adf_test_productivity_diff = adfuller(merged_data['productivity_diff']) print("ADF Statistic for differenced Manufacturing Labor Productivity:", adf_test_productivity_diff[0]) print("p-value for differenced Manufacturing Labor Productivity:", adf_test_productivity_diff[1]) # Perform second-order differencing merged_data['employment_diff_2'] = merged_data['employment_diff'].diff().dropna() merged_data['productivity_diff_2'] = merged_data['productivity_diff'].diff().dropna() # Display the first few rows of the differenced data print(merged_data[['employment_diff_2', 'productivity_diff_2']].head()) # ADF test for second-order differenced manufacturing employment adf_test_employment_diff_2 = adfuller(merged_data['employment_diff_2'].dropna()) print("ADF Statistic for second-order differenced Manufacturing Employment:", adf_test_employment_diff_2[0]) print("p-value for second-order differenced Manufacturing Employment:", adf_test_employment_diff_2[1]) # ADF test for second-order differenced manufacturing labor productivity adf_test_productivity_diff_2 = adfuller(merged_data['productivity_diff_2'].dropna()) print("ADF Statistic for second-order differenced Manufacturing Labor Productivity:", adf_test_productivity_diff_2[0]) print("p-value for second-order differenced Manufacturing Labor Productivity:", adf_test_productivity_diff_2[1]) from statsmodels.tsa.stattools import grangercausalitytests # Prepare the data for the Granger Causality Test # The Granger causality test expects a DataFrame with two columns: the first column should be the dependent variable, # and the second column should be the independent variable. # For example, to test if labor productivity Granger-causes employment: gc_data = merged_data[['employment_diff_2', 'productivity_diff_2']].dropna() # Perform the Granger Causality Test # The maxlag parameter specifies the maximum number of lags to test for causality. max_lag = 4 granger_test_result = grangercausalitytests(gc_data, max_lag, verbose=True) from statsmodels.tsa.api import VAR # Prepare the data for the VAR model var_data = merged_data[['employment_diff_2', 'productivity_diff_2']].dropna() # Instantiate the VAR model model = VAR(var_data) # Fit the model var_result = model.fit(maxlags=3) # Display the summary of the VAR model print(var_result.summary()) # Check the stability of the VAR model print("Is the model stable? ", var_result.is_stable()) # Generate Impulse Response Functions (IRFs) irf = var_result.irf() irf.plot(orth=False) from statsmodels.tsa.api import VAR # Fit the VAR model model = VAR(merged_data[['employment_diff_2', 'productivity_diff_2']].dropna()) model_fitted = model.fit(maxlags=3) # Use the appropriate number of lags based on your previous analysis # Perform the forecast error variance decomposition fevd = model_fitted.fevd(10) # Decompose variance over 10 periods (you can adjust the number of periods) fevd_summary = fevd.summary() import matplotlib.pyplot as plt # Convert FEVD results to DataFrame for easier handling fevd_df_employment = pd.DataFrame(fevd.decomp[:, 0, :], columns=['employment_diff_2', 'productivity_diff_2']) fevd_df_productivity = pd.DataFrame(fevd.decomp[:, 1, :], columns=['employment_diff_2', 'productivity_diff_2']) # Create a figure and axis for the stacked area plots fig, axs = plt.subplots(2, 1, figsize=(10, 8), sharex=True) # Stacked area plot for employment_diff_2 axs[0].stackplot(fevd_df_employment.index, fevd_df_employment.T, labels=['Employment_diff_2', 'Productivity_diff_2'], colors=['#1f77b4', '#ff7f0e']) axs[0].set_title('FEVD of Employment (employment_diff_2)') axs[0].set_ylabel('Variance Explained') axs[0].legend(loc='upper right') # Stacked area plot for productivity_diff_2 axs[1].stackplot(fevd_df_productivity.index, fevd_df_productivity.T, labels=['Employment_diff_2', 'Productivity_diff_2'], colors=['#1f77b4', '#ff7f0e']) axs[1].set_title('FEVD of Productivity (productivity_diff_2)') axs[1].set_xlabel('Lags') axs[1].set_ylabel('Variance Explained') axs[1].legend(loc='upper right') plt.tight_layout() plt.show() import matplotlib.pyplot as plt from matplotlib.gridspec import GridSpec # Define colors and background settings background_color = '#000000' text_color = '#FFFFFF' highlight_color = '#FF00FF' # Magenta secondary_color = '#00FFFF' # Cyan # Create the figure and set its background color fig = plt.figure(figsize=(14, 10), facecolor=background_color) gs = GridSpec(3, 2, figure=fig, height_ratios=[3, 1, 1]) # Title fig.suptitle('Manufacturing Employment and Productivity Analysis', fontsize=24, color=text_color, weight='bold') # Manufacturing Employment vs Labor Productivity plot ax1 = fig.add_subplot(gs[0, 0]) ax1.set_facecolor(background_color) ax1.plot(merged_data.index, merged_data['value'], color=secondary_color, label='Employment (Thousands of Persons)') ax1.set_ylabel('Employment (Thousands of Persons)', color=secondary_color) ax1.tick_params(axis='y', labelcolor=secondary_color) ax2 = ax1.twinx() ax2.plot(merged_data.index, merged_data['MFGOPH'], color=highlight_color, label='Labor Productivity Index') ax2.set_ylabel('Labor Productivity Index', color=highlight_color) ax2.tick_params(axis='y', labelcolor=highlight_color) ax1.set_title('Trends in Manufacturing Employment and Labor Productivity', color=text_color) ax1.set_xlabel('Year', color=text_color) # IRF plot - Plot separately to avoid the ax error irf = var_result.irf() # Customize the IRF plot colors fig_irf = irf.plot(orth=False, subplot_params={'facecolor': background_color}) for ax in fig_irf.axes: for line in ax.get_lines(): line.set_color(secondary_color) # Set IRF lines to cyan ax.set_facecolor(background_color) ax.spines['top'].set_color(text_color) ax.spines['bottom'].set_color(text_color) ax.spines['left'].set_color(text_color) ax.spines['right'].set_color(text_color) ax.xaxis.label.set_color(text_color) ax.yaxis.label.set_color(text_color) ax.tick_params(axis='x', colors=text_color) ax.tick_params(axis='y', colors=text_color) # Save the IRF plot and embed it into the main figure fig_irf.savefig('irf_plot.png', facecolor=background_color) img = plt.imread('irf_plot.png') # Display the IRF plot as an image within the main figure ax_irf = fig.add_subplot(gs[0, 1]) ax_irf.imshow(img) ax_irf.axis('off') # Turn off axis for the image # Adding IRF title ax_irf_title = fig.add_subplot(gs[0, 1]) ax_irf_title.set_facecolor(background_color) ax_irf_title.text(0.5, 1.02, 'Impulse Response Functions (IRF)', fontsize=14, color=text_color, ha='center', weight='bold') ax_irf_title.axis('off') # Hide the axis # Key Metrics (Bottom Left) ax3 = fig.add_subplot(gs[1, 0]) ax3.set_facecolor(background_color) ax3.text(0, 0.9, 'Key Metrics', fontsize=18, color=text_color, weight='bold') ax3.text(0, 0.7, 'Correlation Coefficient: -0.96', fontsize=14, color=highlight_color) ax3.text(0, 0.55, 'T-Statistic: 37.07', fontsize=14, color=highlight_color) ax3.text(0, 0.4, 'P-Value: 1.11e-29', fontsize=14, color=highlight_color) ax3.axis('off') # Hide the axis # VAR Model Summary (Bottom Right) ax4 = fig.add_subplot(gs[1, 1]) ax4.set_facecolor(background_color) ax4.text(0, 0.9, 'VAR Model Summary', fontsize=18, color=text_color, weight='bold') ax4.text(0, 0.7, 'L1 Employment to Productivity: p < 0.05', fontsize=12, color=highlight_color) ax4.text(0, 0.55, 'L1 Productivity to Employment: p < 0.05', fontsize=12, color=highlight_color) ax4.text(0, 0.4, 'L2 Employment to Productivity: p > 0.05', fontsize=12, color=highlight_color) ax4.text(0, 0.25, 'L2 Productivity to Employment: p > 0.05', fontsize=12, color=highlight_color) ax4.axis('off') # Hide the axis # Adjust layout to ensure there's no overlap plt.tight_layout(rect=[0, 0, 1, 0.92]) plt.show()