from google.cloud import bigquery
client = bigquery.Client(project='fred-employment-initial-test')

# Your query string
query = """
SELECT *
FROM `fred-employment-initial-test.fred_data.fred_data_withnotes`
WHERE series_id IN ('USSERV' , 'USGOOD')
"""

# Run the query
query_job = client.query(query)

# Convert the query results to a pandas DataFrame
data = query_job.to_dataframe()

# Display the first few rows
print(data.head())

# Filter data for each series
service_data = data[data['series_id'] == 'USSERV'].copy()
goods_data = data[data['series_id'] == 'USGOOD'].copy()

# Calculate the monthly growth rate (using the percentage change method)
service_data['growth_rate'] = service_data['value'].pct_change()
goods_data['growth_rate'] = goods_data['value'].pct_change()

# Drop NaN values that result from pct_change()
service_data = service_data.dropna(subset=['growth_rate'])
goods_data = goods_data.dropna(subset=['growth_rate'])

# Display the first few rows to verify
print(service_data.head())
print(goods_data.head())

import matplotlib.pyplot as plt

# Create a figure and axis
fig, ax1 = plt.subplots(figsize=(10, 6))

# Plot the service sector growth rate on the left y-axis
ax1.set_xlabel('Date')
ax1.set_ylabel('Service Sector Growth Rate (%)', color='tab:blue')
ax1.plot(service_data['date'], service_data['growth_rate'] * 100, color='tab:blue', label='Service Sector Growth Rate')
ax1.tick_params(axis='y', labelcolor='tab:blue')

# Create a second y-axis for the goods-producing sector growth rate
ax2 = ax1.twinx()
ax2.set_ylabel('Goods-Producing Sector Growth Rate (%)', color='tab:orange')
ax2.plot(goods_data['date'], goods_data['growth_rate'] * 100, color='tab:orange', label='Goods-Producing Sector Growth Rate')
ax2.tick_params(axis='y', labelcolor='tab:orange')

# Synchronize the y-axes and set appropriate limits
ax1.set_ylim([-12, 12])  # You can adjust the range based on your data
ax2.set_ylim([-12, 12])

# Add a title and show the plot
plt.title('Synchronized Growth Rates of Service Sector vs. Goods-Producing Sector (in %)')
fig.tight_layout()
plt.show()


# Calculate the mean growth rates for both sectors
mean_service_sector_growth = service_data['growth_rate'].mean()
mean_goods_sector_growth = goods_data['growth_rate'].mean()

# Display the results
print(f"Mean Service Sector Growth Rate: {mean_service_sector_growth * 100:.4f}%")
print(f"Mean Goods-Producing Sector Growth Rate: {mean_goods_sector_growth * 100:.4f}%")


from scipy import stats

# Perform a paired t-test using the growth_rate columns from the respective datasets
t_stat, p_value = stats.ttest_rel(service_data['growth_rate'], goods_data['growth_rate'])

# Display the t-statistic and p-value
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

import pandas as pd

# Drop NaN values in both series
cleaned_service_sector_growth = service_data['growth_rate'].dropna()
cleaned_goods_sector_growth = goods_data['growth_rate'].dropna()

# Align the data by index to ensure both series have the same length
aligned_growth = pd.concat([cleaned_service_sector_growth, cleaned_goods_sector_growth], axis=1).dropna()

# Recalculate the correlation
correlation = aligned_growth.iloc[:, 0].corr(aligned_growth.iloc[:, 1])

# Display the correlation coefficient
print(f"Correlation Coefficient: {correlation}")

# Check for remaining NaN values
print(f"Remaining NaN in service sector growth: {cleaned_service_sector_growth.isna().sum()}")
print(f"Remaining NaN in goods sector growth: {cleaned_goods_sector_growth.isna().sum()}")

# Check if any series is constant
print(f"Is service sector growth constant? {cleaned_service_sector_growth.nunique() == 1}")
print(f"Is goods sector growth constant? {cleaned_goods_sector_growth.nunique() == 1}")

# Get summary statistics
print("Service Sector Growth Summary:")
print(cleaned_service_sector_growth.describe())

print("\nGoods Sector Growth Summary:")
print(cleaned_goods_sector_growth.describe())

# Standardize the data to have a mean of 0 and standard deviation of 1
standardized_service_sector_growth = (cleaned_service_sector_growth - cleaned_service_sector_growth.mean()) / cleaned_service_sector_growth.std()
standardized_goods_sector_growth = (cleaned_goods_sector_growth - cleaned_goods_sector_growth.mean()) / cleaned_goods_sector_growth.std()

# Perform correlation analysis again
correlation = standardized_service_sector_growth.corr(standardized_goods_sector_growth)
print(f"Correlation Coefficient: {correlation}")

plt.figure(figsize=(10, 6))
plt.scatter(cleaned_service_sector_growth, cleaned_goods_sector_growth, alpha=0.5)
plt.title('Scatter Plot of Service Sector Growth vs. Goods-Producing Sector Growth')
plt.xlabel('Service Sector Growth')
plt.ylabel('Goods-Producing Sector Growth')
plt.xlim([-0.05, 0.05])  # Adjust these limits based on the data
plt.ylim([-0.05, 0.05])
plt.show()

# Define thresholds for outlier removal (e.g., 3 standard deviations)
threshold_service = 3 * cleaned_service_sector_growth.std()
threshold_goods = 3 * cleaned_goods_sector_growth.std()

# Remove outliers
filtered_service_growth = cleaned_service_sector_growth[
    (cleaned_service_sector_growth > -threshold_service) &
    (cleaned_service_sector_growth < threshold_service)
].reset_index(drop=True)

filtered_goods_growth = cleaned_goods_sector_growth[
    (cleaned_goods_sector_growth > -threshold_goods) &
    (cleaned_goods_sector_growth < threshold_goods)
].reset_index(drop=True)

# Ensure both series have the same length
min_length = min(len(filtered_service_growth), len(filtered_goods_growth))
filtered_service_growth = filtered_service_growth[:min_length]
filtered_goods_growth = filtered_goods_growth[:min_length]

# Recalculate correlation
correlation = filtered_service_growth.corr(filtered_goods_growth)
print(f"Correlation Coefficient after outlier removal: {correlation}")

import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# Sample data for service and goods sector (replace with actual data)
dates = service_data['date']  # assuming you've already parsed the date
service_growth = service_data['growth_rate'] * 100  # converting to percentage
goods_growth = goods_data['growth_rate'] * 100  # converting to percentage

# Set up the overall figure size and layout (2x2 grid)
fig, axs = plt.subplots(2, 2, figsize=(14, 10), facecolor='black')

# Set a dark background for all subplots
plt.rcParams['axes.facecolor'] = 'black'
plt.rcParams['savefig.facecolor'] = 'black'
plt.rcParams['text.color'] = 'white'

# 1. Dual Axis Line Plot
ax1 = axs[0, 0]
ax1.plot(dates, service_growth, label="Service Sector Growth Rate", color="#00FFFF")
ax1.plot(dates, goods_growth, label="Goods-Producing Sector Growth Rate", color="#FF00FF")  # Changed to magenta
ax1.set_xlabel('Date', color='white')
ax1.set_ylabel('Service Sector Growth Rate (%)', color='white')
ax1.set_title('Synchronized Growth Rates of Service Sector vs. Goods-Producing Sector (%)', color='white')  # Adjusted title
ax1.tick_params(axis='x', colors='white')
ax1.tick_params(axis='y', colors='white')
ax1.legend(facecolor='black', edgecolor='white')

# 2. Bar Chart of Mean Growth Rates
mean_service_sector_growth = service_data['growth_rate'].mean() * 100
mean_goods_sector_growth = goods_data['growth_rate'].mean() * 100

axs[1, 0].bar(['Service Sector', 'Goods-Producing Sector'],
              [mean_service_sector_growth, mean_goods_sector_growth],
              color=['#00FFFF', '#FF00FF'])
axs[1, 0].set_title('Mean Growth Rate of Service vs Goods-Producing Sector', color='white')
axs[1, 0].set_ylabel('Mean Growth Rate', color='white')
axs[1, 0].tick_params(axis='x', colors='white')
axs[1, 0].tick_params(axis='y', colors='white')

# Adjust the tick labels
if mean_service_sector_growth < 0.5 and mean_goods_sector_growth < 0.5:
    axs[1, 0].set_ylim(0, 0.2)  # Adjust Y-limit to zoom in on smaller values
else:
    axs[1, 0].set_yticklabels([f'{int(y)}%' for y in axs[1, 0].get_yticks()])  # Correct Y-axis to percentages

# 3. Box Plot - Adjusted for better clarity
axs[0, 1].boxplot([service_growth, goods_growth], patch_artist=True,
                  boxprops=dict(facecolor='#00FFFF', color='#FF00FF'),
                  whiskerprops=dict(color='#FF00FF'), capprops=dict(color='#FF00FF'),
                  medianprops=dict(color='#FF00FF'))

# Further adjust the Y-limit for better scaling (zoom in even more)
axs[0, 1].set_ylim(-1, 1)  # Narrowing the range to better show the box plot variation
axs[0, 1].set_xticklabels(['Service Sector', 'Goods-Producing Sector'], color='white')
axs[0, 1].set_title('Distribution of Growth Rates: Service vs Goods-Producing Sector', color='white')
axs[0, 1].set_ylabel('Growth Rate', color='white')
axs[0, 1].tick_params(axis='y', colors='white')


# 4. Scatter Plot (Correlation between Growth Rates)
axs[1, 1].scatter(service_growth, goods_growth, alpha=0.5, color='#00FFFF')
axs[1, 1].set_title('Scatter Plot: Service vs Goods-Producing Sector Growth (%)', color='white')
axs[1, 1].set_xlabel('Service Sector Growth (%)', color='white')
axs[1, 1].set_ylabel('Goods-Producing Sector Growth (%)', color='white')
axs[1, 1].tick_params(axis='x', colors='white')
axs[1, 1].tick_params(axis='y', colors='white')

# T-Statistic and P-Value (Neon-styled Box)
t_stat, p_value = stats.ttest_rel(service_data['growth_rate'], goods_data['growth_rate'])

# Neon-styled box (moved down slightly to avoid overlap)
fig.text(0.75, 0.35, f'T-statistic: {t_stat:.4f}\nP-value: {p_value:.4e}', ha='center', va='center',
         fontsize=14, bbox=dict(facecolor='none', edgecolor='#FF00FF', boxstyle='round,pad=1'),
         color='white')

# Adjust layout to fit everything
plt.tight_layout()
plt.show()