from google.cloud import bigquery client = bigquery.Client(project='fred-employment-initial-test') # Your query string query = """ SELECT * FROM `fred-employment-initial-test.fred_data.fred_data_withnotes` WHERE series_id IN ('USSERV' , 'USGOOD') """ # Run the query query_job = client.query(query) # Convert the query results to a pandas DataFrame data = query_job.to_dataframe() # Display the first few rows print(data.head()) # Filter data for each series service_data = data[data['series_id'] == 'USSERV'].copy() goods_data = data[data['series_id'] == 'USGOOD'].copy() # Calculate the monthly growth rate (using the percentage change method) service_data['growth_rate'] = service_data['value'].pct_change() goods_data['growth_rate'] = goods_data['value'].pct_change() # Drop NaN values that result from pct_change() service_data = service_data.dropna(subset=['growth_rate']) goods_data = goods_data.dropna(subset=['growth_rate']) # Display the first few rows to verify print(service_data.head()) print(goods_data.head()) import matplotlib.pyplot as plt # Create a figure and axis fig, ax1 = plt.subplots(figsize=(10, 6)) # Plot the service sector growth rate on the left y-axis ax1.set_xlabel('Date') ax1.set_ylabel('Service Sector Growth Rate (%)', color='tab:blue') ax1.plot(service_data['date'], service_data['growth_rate'] * 100, color='tab:blue', label='Service Sector Growth Rate') ax1.tick_params(axis='y', labelcolor='tab:blue') # Create a second y-axis for the goods-producing sector growth rate ax2 = ax1.twinx() ax2.set_ylabel('Goods-Producing Sector Growth Rate (%)', color='tab:orange') ax2.plot(goods_data['date'], goods_data['growth_rate'] * 100, color='tab:orange', label='Goods-Producing Sector Growth Rate') ax2.tick_params(axis='y', labelcolor='tab:orange') # Synchronize the y-axes and set appropriate limits ax1.set_ylim([-12, 12]) # You can adjust the range based on your data ax2.set_ylim([-12, 12]) # Add a title and show the plot plt.title('Synchronized Growth Rates of Service Sector vs. Goods-Producing Sector (in %)') fig.tight_layout() plt.show() # Calculate the mean growth rates for both sectors mean_service_sector_growth = service_data['growth_rate'].mean() mean_goods_sector_growth = goods_data['growth_rate'].mean() # Display the results print(f"Mean Service Sector Growth Rate: {mean_service_sector_growth * 100:.4f}%") print(f"Mean Goods-Producing Sector Growth Rate: {mean_goods_sector_growth * 100:.4f}%") from scipy import stats # Perform a paired t-test using the growth_rate columns from the respective datasets t_stat, p_value = stats.ttest_rel(service_data['growth_rate'], goods_data['growth_rate']) # Display the t-statistic and p-value print(f"T-statistic: {t_stat}") print(f"P-value: {p_value}") import pandas as pd # Drop NaN values in both series cleaned_service_sector_growth = service_data['growth_rate'].dropna() cleaned_goods_sector_growth = goods_data['growth_rate'].dropna() # Align the data by index to ensure both series have the same length aligned_growth = pd.concat([cleaned_service_sector_growth, cleaned_goods_sector_growth], axis=1).dropna() # Recalculate the correlation correlation = aligned_growth.iloc[:, 0].corr(aligned_growth.iloc[:, 1]) # Display the correlation coefficient print(f"Correlation Coefficient: {correlation}") # Check for remaining NaN values print(f"Remaining NaN in service sector growth: {cleaned_service_sector_growth.isna().sum()}") print(f"Remaining NaN in goods sector growth: {cleaned_goods_sector_growth.isna().sum()}") # Check if any series is constant print(f"Is service sector growth constant? {cleaned_service_sector_growth.nunique() == 1}") print(f"Is goods sector growth constant? {cleaned_goods_sector_growth.nunique() == 1}") # Get summary statistics print("Service Sector Growth Summary:") print(cleaned_service_sector_growth.describe()) print("\nGoods Sector Growth Summary:") print(cleaned_goods_sector_growth.describe()) # Standardize the data to have a mean of 0 and standard deviation of 1 standardized_service_sector_growth = (cleaned_service_sector_growth - cleaned_service_sector_growth.mean()) / cleaned_service_sector_growth.std() standardized_goods_sector_growth = (cleaned_goods_sector_growth - cleaned_goods_sector_growth.mean()) / cleaned_goods_sector_growth.std() # Perform correlation analysis again correlation = standardized_service_sector_growth.corr(standardized_goods_sector_growth) print(f"Correlation Coefficient: {correlation}") plt.figure(figsize=(10, 6)) plt.scatter(cleaned_service_sector_growth, cleaned_goods_sector_growth, alpha=0.5) plt.title('Scatter Plot of Service Sector Growth vs. Goods-Producing Sector Growth') plt.xlabel('Service Sector Growth') plt.ylabel('Goods-Producing Sector Growth') plt.xlim([-0.05, 0.05]) # Adjust these limits based on the data plt.ylim([-0.05, 0.05]) plt.show() # Define thresholds for outlier removal (e.g., 3 standard deviations) threshold_service = 3 * cleaned_service_sector_growth.std() threshold_goods = 3 * cleaned_goods_sector_growth.std() # Remove outliers filtered_service_growth = cleaned_service_sector_growth[ (cleaned_service_sector_growth > -threshold_service) & (cleaned_service_sector_growth < threshold_service) ].reset_index(drop=True) filtered_goods_growth = cleaned_goods_sector_growth[ (cleaned_goods_sector_growth > -threshold_goods) & (cleaned_goods_sector_growth < threshold_goods) ].reset_index(drop=True) # Ensure both series have the same length min_length = min(len(filtered_service_growth), len(filtered_goods_growth)) filtered_service_growth = filtered_service_growth[:min_length] filtered_goods_growth = filtered_goods_growth[:min_length] # Recalculate correlation correlation = filtered_service_growth.corr(filtered_goods_growth) print(f"Correlation Coefficient after outlier removal: {correlation}") import matplotlib.pyplot as plt import numpy as np from scipy import stats # Sample data for service and goods sector (replace with actual data) dates = service_data['date'] # assuming you've already parsed the date service_growth = service_data['growth_rate'] * 100 # converting to percentage goods_growth = goods_data['growth_rate'] * 100 # converting to percentage # Set up the overall figure size and layout (2x2 grid) fig, axs = plt.subplots(2, 2, figsize=(14, 10), facecolor='black') # Set a dark background for all subplots plt.rcParams['axes.facecolor'] = 'black' plt.rcParams['savefig.facecolor'] = 'black' plt.rcParams['text.color'] = 'white' # 1. Dual Axis Line Plot ax1 = axs[0, 0] ax1.plot(dates, service_growth, label="Service Sector Growth Rate", color="#00FFFF") ax1.plot(dates, goods_growth, label="Goods-Producing Sector Growth Rate", color="#FF00FF") # Changed to magenta ax1.set_xlabel('Date', color='white') ax1.set_ylabel('Service Sector Growth Rate (%)', color='white') ax1.set_title('Synchronized Growth Rates of Service Sector vs. Goods-Producing Sector (%)', color='white') # Adjusted title ax1.tick_params(axis='x', colors='white') ax1.tick_params(axis='y', colors='white') ax1.legend(facecolor='black', edgecolor='white') # 2. Bar Chart of Mean Growth Rates mean_service_sector_growth = service_data['growth_rate'].mean() * 100 mean_goods_sector_growth = goods_data['growth_rate'].mean() * 100 axs[1, 0].bar(['Service Sector', 'Goods-Producing Sector'], [mean_service_sector_growth, mean_goods_sector_growth], color=['#00FFFF', '#FF00FF']) axs[1, 0].set_title('Mean Growth Rate of Service vs Goods-Producing Sector', color='white') axs[1, 0].set_ylabel('Mean Growth Rate', color='white') axs[1, 0].tick_params(axis='x', colors='white') axs[1, 0].tick_params(axis='y', colors='white') # Adjust the tick labels if mean_service_sector_growth < 0.5 and mean_goods_sector_growth < 0.5: axs[1, 0].set_ylim(0, 0.2) # Adjust Y-limit to zoom in on smaller values else: axs[1, 0].set_yticklabels([f'{int(y)}%' for y in axs[1, 0].get_yticks()]) # Correct Y-axis to percentages # 3. Box Plot - Adjusted for better clarity axs[0, 1].boxplot([service_growth, goods_growth], patch_artist=True, boxprops=dict(facecolor='#00FFFF', color='#FF00FF'), whiskerprops=dict(color='#FF00FF'), capprops=dict(color='#FF00FF'), medianprops=dict(color='#FF00FF')) # Further adjust the Y-limit for better scaling (zoom in even more) axs[0, 1].set_ylim(-1, 1) # Narrowing the range to better show the box plot variation axs[0, 1].set_xticklabels(['Service Sector', 'Goods-Producing Sector'], color='white') axs[0, 1].set_title('Distribution of Growth Rates: Service vs Goods-Producing Sector', color='white') axs[0, 1].set_ylabel('Growth Rate', color='white') axs[0, 1].tick_params(axis='y', colors='white') # 4. Scatter Plot (Correlation between Growth Rates) axs[1, 1].scatter(service_growth, goods_growth, alpha=0.5, color='#00FFFF') axs[1, 1].set_title('Scatter Plot: Service vs Goods-Producing Sector Growth (%)', color='white') axs[1, 1].set_xlabel('Service Sector Growth (%)', color='white') axs[1, 1].set_ylabel('Goods-Producing Sector Growth (%)', color='white') axs[1, 1].tick_params(axis='x', colors='white') axs[1, 1].tick_params(axis='y', colors='white') # T-Statistic and P-Value (Neon-styled Box) t_stat, p_value = stats.ttest_rel(service_data['growth_rate'], goods_data['growth_rate']) # Neon-styled box (moved down slightly to avoid overlap) fig.text(0.75, 0.35, f'T-statistic: {t_stat:.4f}\nP-value: {p_value:.4e}', ha='center', va='center', fontsize=14, bbox=dict(facecolor='none', edgecolor='#FF00FF', boxstyle='round,pad=1'), color='white') # Adjust layout to fit everything plt.tight_layout() plt.show()