from google.cloud import bigquery
client = bigquery.Client(project='fred-employment-initial-test')

# Your query string
query = """
SELECT *
FROM `fred-employment-initial-test.fred_data.fred_data_withnotes`
WHERE series_id IN ('LNU02300000')
"""

# Run the query
query_job = client.query(query)

# Convert the query results to a pandas DataFrame
data = query_job.to_dataframe()

# Display the first few rows
print(data.head())

import pandas as pd

# Define recession periods as tuples of (start, end)
recession_periods = [
    ('1948-11-01', '1949-10-31'),
    ('1953-07-01', '1954-05-31'),
    ('1957-08-01', '1958-04-30'),
    ('1960-04-01', '1961-02-28'),
    ('1969-12-01', '1970-11-30'),
    ('1973-11-01', '1975-03-31'),
    ('1980-01-01', '1980-07-31'),
    ('1981-07-01', '1982-11-30'),
    ('1990-07-01', '1991-03-31'),
    ('2001-03-01', '2001-11-30'),
    ('2007-12-01', '2009-06-30'),
    ('2020-02-01', '2020-04-30')
]

# Convert the recession periods to datetime format
recession_periods = [(pd.to_datetime(start), pd.to_datetime(end)) for start, end in recession_periods]

# Ensure 'date' is in datetime format and remove any timezone info
data['date'] = pd.to_datetime(data['date']).dt.tz_localize(None)

# Function to flag recession periods
def is_recession(date, recession_periods):
    for start, end in recession_periods:
        if start <= date <= end:
            return 1
    return 0

# Apply the recession flagging function
data['is_recession'] = data['date'].apply(lambda x: is_recession(x, recession_periods))

# Display the updated data with recession flag
print(data.head())

# Calculate mean values during and outside recessions
mean_values = data.groupby('is_recession')['value'].mean()

# Display the mean values
print(mean_values)

from scipy import stats

# Separate the data into two groups: recession and non-recession
recession_data = data[data['is_recession'] == 1]['value']
non_recession_data = data[data['is_recession'] == 0]['value']

# Perform an independent t-test
t_stat, p_value = stats.ttest_ind(recession_data, non_recession_data)

# Display the t-statistic and p-value
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

import numpy as np  # Import NumPy

# Check for missing values
print(f"Missing values in recession data: {recession_data.isnull().sum()}")
print(f"Missing values in non-recession data: {non_recession_data.isnull().sum()}")

# Check for infinite values
print(f"Infinite values in recession data: {np.isinf(recession_data).sum()}")
print(f"Infinite values in non-recession data: {np.isinf(non_recession_data).sum()}")

# Check basic statistics
print("Recession Data Statistics:")
print(recession_data.describe())
print("\nNon-Recession Data Statistics:")
print(non_recession_data.describe())

# Remove missing values from both datasets
recession_data_clean = recession_data.dropna()
non_recession_data_clean = non_recession_data.dropna()

# Perform an independent t-test on the cleaned data
t_stat, p_value = stats.ttest_ind(recession_data_clean, non_recession_data_clean)

# Display the t-statistic and p-value
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

# Remove missing values from the data
cleaned_data = data.dropna()

# Calculate mean values during and outside recessions on the cleaned data
cleaned_mean_values = cleaned_data.groupby('is_recession')['value'].mean()

# Display the cleaned mean values
print(cleaned_mean_values)

from sklearn.metrics import r2_score

# Fit a simple linear regression model for the data
# Assuming 'recession_data_clean' and 'non_recession_data_clean' are defined as before

# Combine the data back into one dataset for regression
combined_data = pd.concat([recession_data_clean, non_recession_data_clean])

# Create labels for recession (1) and non-recession (0)
labels = pd.concat([pd.Series([1] * len(recession_data_clean)), pd.Series([0] * len(non_recession_data_clean))])

# Fit the linear model
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(labels.values.reshape(-1, 1), combined_data)

# Calculate the predicted values
predictions = model.predict(labels.values.reshape(-1, 1))

# Calculate the R^2 value
r2 = r2_score(combined_data, predictions)
print(f"R^2 value: {r2}")

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Patch

# Create a figure and axes for the dashboard
fig, axs = plt.subplots(2, 2, figsize=(18, 14), facecolor='#000000')
plt.subplots_adjust(hspace=0.4, wspace=0.4)

# Plot 1: Time Series Line Plot (with Shaded Recession Periods)
axs[0, 0].plot(data['date'], data['value'], color='#00FFFF', label='Employment-Population Ratio', zorder=3)

# Shade the recession periods
for start, end in recession_periods:
    axs[0, 0].axvspan(start, end, color='#FF00FF', alpha=0.2, label='Recession' if start == recession_periods[0][0] else "")

axs[0, 0].set_facecolor('#000000')
axs[0, 0].set_xlabel('Date', fontsize=18, color='white')
axs[0, 0].set_ylabel('Employment-Population Ratio (%)', fontsize=18, color='white')
axs[0, 0].tick_params(axis='both', labelsize=14, colors='white')
axs[0, 0].spines['left'].set_color('white')
axs[0, 0].spines['bottom'].set_color('white')
axs[0, 0].spines['top'].set_color('white')
axs[0, 0].spines['right'].set_color('white')
axs[0, 0].set_title('Employment-Population Ratio Over Time (with Recessions)', fontsize=22, color='white', pad=35)

legend_elements = [Patch(facecolor='#FF00FF', edgecolor='white', label='Recession')]
axs[0, 0].legend(handles=legend_elements, loc='upper right', fontsize=14, facecolor='#181818', edgecolor='white', labelcolor=['#FF00FF'])

# Plot 2: Boxplot of Recession vs. Non-Recession Periods
sns.boxplot(x='is_recession', y='value', data=data, ax=axs[0, 1], palette=['#00FFFF', '#FF00FF'])
axs[0, 1].set_facecolor('#000000')
axs[0, 1].set_xlabel('Recession Status (0 = Non-Recession, 1 = Recession)', fontsize=18, color='white')
axs[0, 1].set_ylabel('Employment-Population Ratio (%)', fontsize=18, color='white')
axs[0, 1].tick_params(axis='both', labelsize=14, colors='white')
axs[0, 1].spines['left'].set_color('white')
axs[0, 1].spines['bottom'].set_color('white')
axs[0, 1].spines['top'].set_color('white')
axs[0, 1].spines['right'].set_color('white')
axs[0, 1].set_title('Boxplot: Recession vs. Non-Recession Periods', fontsize=22, color='white', pad=35)

# Plot 3: Bar Chart of Mean Values in Recession and Non-Recession
axs[1, 0].bar(['Non-Recession', 'Recession'], mean_values, color=['#00FFFF', '#FF00FF'])
axs[1, 0].set_facecolor('#000000')
axs[1, 0].set_xlabel('Period', fontsize=18, color='white')
axs[1, 0].set_ylabel('Mean Employment-Population Ratio (%)', fontsize=18, color='white')
axs[1, 0].tick_params(axis='both', labelsize=14, colors='white')
axs[1, 0].spines['left'].set_color('white')
axs[1, 0].spines['bottom'].set_color('white')
axs[1, 0].spines['top'].set_color('white')
axs[1, 0].spines['right'].set_color('white')
axs[1, 0].set_title('Mean Employment-Population Ratio During Recession/Non-Recession', fontsize=22, color='white', pad=35)

# Plot 4: T-Test Results and R² Score (as text output)
t_test_text = f"T-statistic: {t_stat:.4f}\nP-value: {p_value:.4e}\nR² Value: {r2:.4f}"
axs[1, 1].text(0.5, 0.5, t_test_text, transform=axs[1, 1].transAxes, fontsize=16, color='white',
               verticalalignment='center', horizontalalignment='center', bbox=dict(boxstyle='round', facecolor='#000000', edgecolor='#FF00FF'))
axs[1, 1].axis('off')

# Adjust layout to prevent overlap
plt.tight_layout()

# Show the complete dashboard
plt.show()