from google.cloud import bigquery client = bigquery.Client(project='fred-employment-initial-test') # Your query string query = """ SELECT * FROM `fred-employment-initial-test.fred_data.fred_data_withnotes` WHERE series_id IN ('AHEMAN') """ # Run the query query_job = client.query(query) # Convert the query results to a pandas DataFrame data = query_job.to_dataframe() # Display the first few rows print(data.head()) import pandas_datareader.data as web from datetime import datetime # Set the date range start = datetime(1939, 1, 1) end = datetime(2024, 1, 1) # Fetch CPI data from FRED cpi_data = web.DataReader('CPIAUCSL', 'fred', start, end) # Display the first few rows to verify print(cpi_data.head()) import pandas as pd # Remove timezone information from the date columns cpi_data['DATE'] = pd.to_datetime(cpi_data['DATE']).dt.tz_localize(None) data['date'] = pd.to_datetime(data['date']).dt.tz_localize(None) # Now you can merge the dataframes on the 'date' column merged_data = pd.merge(data, cpi_data, left_on='date', right_on='DATE', suffixes=('_AHEMAN', '_CPIAUCSL')) # Display the merged data print(merged_data.head()) print(merged_data.columns) !pip install mplcyberpunk import matplotlib.pyplot as plt import mplcyberpunk # Plotting the data with Cyberpunk theme plt.style.use("cyberpunk") # Create figure and axis fig, ax1 = plt.subplots(figsize=(10, 6)) # Plot AHEMAN data on the first axis ax1.plot(merged_data['date'], merged_data['value'], color='blue', label='AHEMAN') ax1.set_xlabel('Date', fontsize=14, color='blue') ax1.set_ylabel('AHEMAN (Dollars per Hour)', color='blue') ax1.tick_params(axis='y', labelcolor='blue') # Create a second y-axis for CPIAUCSL (CPI data) ax2 = ax1.twinx() ax2.plot(merged_data['date'], merged_data['CPIAUCSL'], color='orange', label='CPIAUCSL') ax2.set_ylabel('CPIAUCSL', color='orange') ax2.tick_params(axis='y', labelcolor='orange') # Set the title and show the plot with cyberpunk effects plt.title('AHEMAN vs. CPIAUCSL Over Time') mplcyberpunk.add_glow_effects() plt.tight_layout() # Adjust layout to prevent overlap plt.show() print(merged_data.columns) # Calculate the Pearson correlation coefficient correlation = merged_data['value'].corr(merged_data['CPIAUCSL']) print(f'Pearson Correlation Coefficient: {correlation}') import statsmodels.api as sm # Define the independent and dependent variables X = merged_data['value'] # Independent variable (Average Hourly Earnings) y = merged_data['CPIAUCSL'] # Dependent variable (Consumer Price Index) # Add a constant to the independent variable X = sm.add_constant(X) # Run the OLS regression model = sm.OLS(y, X).fit() # Print the summary of the regression print(model.summary()) # Generate predictions y_pred = model.predict(X) # Plot the data points and the regression line plt.figure(figsize=(10, 6)) plt.scatter(merged_data['value'], merged_data['CPIAUCSL'], color='blue', label='Actual Data') # Replace 'AHEMAN' with 'value' plt.plot(merged_data['value'], y_pred, color='red', label='Regression Line') # Replace 'AHEMAN' with 'value' plt.xlabel('Average Hourly Earnings (AHEMAN)') plt.ylabel('Consumer Price Index (CPIAUCSL)') plt.title('Scatter Plot of AHEMAN vs. CPIAUCSL with Regression Line') plt.legend() plt.show() import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm from IPython.display import HTML # Generate predictions from the regression model y_pred = model.predict(X) # Create a figure and axes for the dashboard fig, axs = plt.subplots(2, 2, figsize=(18, 14), facecolor='#000000') plt.subplots_adjust(hspace=0.4, wspace=0.4) # Plot 1: Time Series - AHEMAN vs CPIAUCSL (Dual Axis) ax1 = axs[0, 0] ax1.plot(merged_data['date'], merged_data['value'], color='#00FFFF', label='AHEMAN (Hourly Earnings)', zorder=3) ax1.set_xlabel('Date', fontsize=18, color='white') ax1.set_ylabel('AHEMAN (Dollars per Hour)', fontsize=18, color='#00FFFF') ax1.tick_params(axis='y', labelsize=14, colors='#00FFFF') ax1.tick_params(axis='x', labelsize=14, colors='white') ax1.set_facecolor('#000000') ax1.spines['left'].set_color('#00FFFF') ax1.spines['bottom'].set_color('white') # Second y-axis for CPIAUCSL ax2 = ax1.twinx() ax2.plot(merged_data['date'], merged_data['CPIAUCSL'], color='#FF00FF', label='CPIAUCSL (Consumer Price Index)', zorder=3) ax2.set_ylabel('CPIAUCSL (CPI)', fontsize=18, color='#FF00FF') ax2.tick_params(axis='y', labelsize=14, colors='#FF00FF') ax2.set_ylim(0, 350) # Adjusted to fit the CPI data range ax2.spines['right'].set_color('#FF00FF') # Titles and layout adjustments ax1.set_title('AHEMAN vs CPIAUCSL Over Time', fontsize=22, color='white', pad=35) ax1.spines['top'].set_color('white') ax2.spines['top'].set_color('white') # Plot 2: Scatter Plot with Regression Line axs[0, 1].scatter(merged_data['value'], merged_data['CPIAUCSL'], color='#FF00FF', label='Actual Data', edgecolor='white') axs[0, 1].plot(merged_data['value'], y_pred, color='#00FFFF', label='Regression Line', zorder=4) axs[0, 1].set_facecolor('#000000') axs[0, 1].set_xlabel('AHEMAN (Hourly Earnings)', fontsize=18, color='white') axs[0, 1].set_ylabel('CPIAUCSL (Consumer Price Index)', fontsize=18, color='white') axs[0, 1].tick_params(axis='both', labelsize=14, colors='white') axs[0, 1].spines['left'].set_color('white') axs[0, 1].spines['bottom'].set_color('white') axs[0, 1].spines['top'].set_color('white') axs[0, 1].spines['right'].set_color('white') axs[0, 1].set_title('Scatter Plot of AHEMAN vs CPIAUCSL', fontsize=22, color='white', pad=35) axs[0, 1].legend(loc='upper left', fontsize=14, facecolor='#181818', edgecolor='white', labelcolor=['#00FFFF', '#FF00FF']) # Plot 3: Pearson Correlation Coefficient correlation = merged_data['value'].corr(merged_data['CPIAUCSL']) axs[1, 0].text(0.5, 0.5, f'Pearson Correlation Coefficient (Adjusted Data): {correlation:.2f}', transform=axs[1, 0].transAxes, fontsize=20, verticalalignment='center', horizontalalignment='center', bbox=dict(boxstyle='round', facecolor='#000000', edgecolor='#FF00FF')) axs[1, 0].axis('off') # Plot 4: Linear Regression Summary summary_text = model.summary().as_text() axs[1, 1].text(0.5, 0.5, f'Linear Regression Summary:\n\n{summary_text}', transform=axs[1, 1].transAxes, fontsize=10, color='white', horizontalalignment='center', verticalalignment='center', bbox=dict(facecolor='#000000', edgecolor='#FF00FF', boxstyle='round,pad=0.5')) axs[1, 1].axis('off') # Adjust layout to prevent overlap plt.tight_layout() # Show the complete dashboard plt.show()