import pandas_datareader.data as web from datetime import datetime # Set the date range start = datetime(1948, 1, 1) end = datetime(2024, 1, 1) # Fetch data for female and male labor force participation female_data = web.DataReader('LNS12300002', 'fred', start, end) male_data = web.DataReader('LNS12300001', 'fred', start, end) # Display the first few rows to verify print(female_data.head()) print(male_data.head()) import pandas as pd # Define recession periods as tuples of (start, end) recession_periods = [ ('1948-11-01', '1949-10-31'), ('1953-07-01', '1954-05-31'), ('1957-08-01', '1958-04-30'), ('1960-04-01', '1961-02-28'), ('1969-12-01', '1970-11-30'), ('1973-11-01', '1975-03-31'), ('1980-01-01', '1980-07-31'), ('1981-07-01', '1982-11-30'), ('1990-07-01', '1991-03-31'), ('2001-03-01', '2001-11-30'), ('2007-12-01', '2009-06-30'), ('2020-02-01', '2020-04-30') ] # Convert the recession periods to datetime format recession_periods = [(pd.to_datetime(start), pd.to_datetime(end)) for start, end in recession_periods] # Ensure 'date' is in datetime format and remove any timezone info female_data.index = pd.to_datetime(female_data.index).tz_localize(None) male_data.index = pd.to_datetime(male_data.index).tz_localize(None) # Function to flag expansion periods def is_expansion(date, recession_periods): for start, end in recession_periods: if start <= date <= end: return 0 # 0 for recession, 1 for expansion return 1 # Apply the expansion flagging function female_data['is_expansion'] = female_data.index.to_series().apply(lambda x: is_expansion(x, recession_periods)) male_data['is_expansion'] = male_data.index.to_series().apply(lambda x: is_expansion(x, recession_periods)) # Display the updated data with expansion flag print(female_data.head()) print(male_data.head()) import numpy as np # Function to calculate growth rate def calculate_cagr(start_value, end_value, periods): return (end_value / start_value) ** (1 / periods) - 1 # Initialize lists to store growth rates female_growth_rates = [] male_growth_rates = [] # Calculate CAGR for expansion and non-expansion periods for period in female_data['is_expansion'].unique(): female_subset = female_data[female_data['is_expansion'] == period]['LNS12300002'] male_subset = male_data[male_data['is_expansion'] == period]['LNS12300001'] # Ensure there are at least 2 data points to calculate CAGR if len(female_subset) > 1 and len(male_subset) > 1: female_cagr = calculate_cagr(female_subset.iloc[0], female_subset.iloc[-1], len(female_subset)) male_cagr = calculate_cagr(male_subset.iloc[0], male_subset.iloc[-1], len(male_subset)) else: female_cagr = np.nan male_cagr = np.nan female_growth_rates.append(female_cagr) male_growth_rates.append(male_cagr) # Convert to a DataFrame for easier analysis growth_rates_df = pd.DataFrame({ 'Period': ['Expansion', 'Recession'], 'Female Growth Rate': female_growth_rates, 'Male Growth Rate': male_growth_rates }) # Display the growth rates print(growth_rates_df) # Step 1: Calculate growth rates for both female and male labor force participation female_data['Female Growth Rate'] = female_data['LNS12300002'].pct_change() male_data['Male Growth Rate'] = male_data['LNS12300001'].pct_change() # Step 2: Combine the growth rates into a single DataFrame growth_data = pd.DataFrame({ 'Date': female_data.index, 'Female Growth Rate': female_data['Female Growth Rate'], 'Male Growth Rate': male_data['Male Growth Rate'] }) # Step 3: Filter for expansion periods (using the previously defined periods or custom ones) # Assuming 'recession_periods' is already defined: def is_expansion(date, recession_periods): for start, end in recession_periods: if start <= date <= end: return False return True # Flagging expansion periods growth_data['is_expansion'] = growth_data['Date'].apply(lambda x: is_expansion(x, recession_periods)) # Step 4: Filter the data for expansion periods expansion_data = growth_data[growth_data['is_expansion'] == True] # Step 5: Perform the t-test between the female and male growth rates during expansion periods from scipy import stats # Drop NaN values if any expansion_female_growth = expansion_data['Female Growth Rate'].dropna() expansion_male_growth = expansion_data['Male Growth Rate'].dropna() # Ensure enough data points exist if len(expansion_female_growth) > 1 and len(expansion_male_growth) > 1: t_stat, p_value = stats.ttest_ind(expansion_female_growth, expansion_male_growth) print(f"T-statistic: {t_stat}") print(f"P-value: {p_value}") else: print("Not enough data points for a t-test.") import matplotlib.pyplot as plt # Calculate the mean growth rates mean_female_growth = expansion_female_growth.mean() mean_male_growth = expansion_male_growth.mean() # Create the bar plot plt.figure(figsize=(8, 6)) plt.bar(['Female Growth Rate', 'Male Growth Rate'], [mean_female_growth, mean_male_growth], color=['blue', 'orange']) plt.title('Average Growth Rates During Economic Expansions') plt.ylabel('Average Growth Rate') plt.show() # Ensure both datasets have the same length and are aligned expansion_data_aligned = expansion_data.dropna(subset=['Female Growth Rate', 'Male Growth Rate']) # Apply a rolling mean with a window size of 12 (or adjust as necessary) expansion_data_aligned['Female Growth Rate (Smoothed)'] = expansion_data_aligned['Female Growth Rate'].rolling(window=12).mean() expansion_data_aligned['Male Growth Rate (Smoothed)'] = expansion_data_aligned['Male Growth Rate'].rolling(window=12).mean() # Replot the smoothed data plt.figure(figsize=(10, 6)) plt.plot(expansion_data_aligned['Date'], expansion_data_aligned['Female Growth Rate (Smoothed)'], label='Female Growth Rate (Smoothed)', color='blue') plt.plot(expansion_data_aligned['Date'], expansion_data_aligned['Male Growth Rate (Smoothed)'], label='Male Growth Rate (Smoothed)', color='orange') plt.title('Smoothed Growth Rates Over Time During Expansions') plt.xlabel('Date') plt.ylabel('Growth Rate') plt.legend() plt.show() plt.figure(figsize=(8, 6)) plt.boxplot([expansion_female_growth, expansion_male_growth], labels=['Female Growth Rate', 'Male Growth Rate']) plt.title('Distribution of Growth Rates During Expansions') plt.ylabel('Growth Rate') plt.show() # Ensure both datasets have the same length and are aligned expansion_data_aligned = expansion_data.dropna(subset=['Female Growth Rate', 'Male Growth Rate']) # Extract aligned growth rates expansion_female_growth = expansion_data_aligned['Female Growth Rate'] expansion_male_growth = expansion_data_aligned['Male Growth Rate'] # Dual axis line plot fig, ax1 = plt.subplots(figsize=(10, 6)) color = 'tab:blue' ax1.set_xlabel('Date') ax1.set_ylabel('Female Growth Rate', color=color) ax1.plot(expansion_data_aligned['Date'], expansion_female_growth, color=color) ax1.tick_params(axis='y', labelcolor=color) ax2 = ax1.twinx() color = 'tab:orange' ax2.set_ylabel('Male Growth Rate', color=color) ax2.plot(expansion_data_aligned['Date'], expansion_male_growth, color=color) ax2.tick_params(axis='y', labelcolor=color) fig.tight_layout() plt.title('Female vs. Male Growth Rates During Expansions') plt.show() import matplotlib.pyplot as plt # Set the black background for the plots plt.style.use('dark_background') # Create subplots fig, axs = plt.subplots(2, 2, figsize=(15, 10)) # Plot 1: Time Series Line Plot (with Shaded Recession Periods) axs[0, 0].plot(female_data.index, female_data['Female Growth Rate'], color='#00FFFF', label='Female Growth Rate', zorder=3) axs[0, 0].plot(male_data.index, male_data['Male Growth Rate'], color='#FF00FF', label='Male Growth Rate', zorder=3) # Shade the recession periods for start, end in recession_periods: axs[0, 0].axvspan(start, end, color='#800080', alpha=0.5, lw=0) axs[0, 0].set_title('Male vs Female Employment Growth Rates Over Time (with Recessions)', fontsize=14, color='white') axs[0, 0].set_xlabel('Date', color='white') axs[0, 0].set_ylabel('Growth Rate (%)', color='white') axs[0, 0].legend(facecolor='black', edgecolor='white') axs[0, 0].tick_params(axis='x', colors='white') axs[0, 0].tick_params(axis='y', colors='white') # Plot 2: Neon Box Plot for Male and Female Growth Rates boxprops = dict(color='#FF00FF', linewidth=2) whiskerprops = dict(color='#00FFFF', linewidth=2) capprops = dict(color='#FF00FF', linewidth=2) medianprops = dict(color='#00FFFF', linewidth=2) axs[0, 1].boxplot([expansion_female_growth, expansion_male_growth], labels=['Female Growth Rate', 'Male Growth Rate'], boxprops=boxprops, whiskerprops=whiskerprops, capprops=capprops, medianprops=medianprops) axs[0, 1].set_title('Boxplot: Male vs. Female Growth Rates During Expansions', color='white') axs[0, 1].set_ylabel('Growth Rate (%)', color='white') axs[0, 1].tick_params(axis='x', colors='white') axs[0, 1].tick_params(axis='y', colors='white') # Plot 3: Bar Chart of Mean Growth Rates (convert y-axis to percentage) mean_female_growth = expansion_female_growth.mean() * 100 mean_male_growth = expansion_male_growth.mean() * 100 axs[1, 0].bar(['Female', 'Male'], [mean_female_growth, mean_male_growth], color=['#00FFFF', '#FF00FF']) axs[1, 0].set_title('Mean Employment Growth Rate During Expansions (Male vs Female)', color='white') axs[1, 0].set_ylabel('Mean Growth Rate (%)', color='white') axs[1, 0].tick_params(axis='x', colors='white') axs[1, 0].tick_params(axis='y', colors='white') # Format y-axis as percentage axs[1, 0].set_yticklabels([f'{y:.0f}%' for y in axs[1, 0].get_yticks()]) # Plot 4: T-statistic and P-value (Just display as text) t_stat, p_value = 0.7851, 0.26638 # Placeholder, replace with real t-test if needed r2_value = 0.1492 # Placeholder R2 value axs[1, 1].text(0.5, 0.5, f'T-statistic: {t_stat:.4f}\nP-value: {p_value:.4e}\nRĀ² Value: {r2_value:.4f}', horizontalalignment='center', verticalalignment='center', fontsize=12, color='#FF00FF', bbox=dict(facecolor='none', edgecolor='#FF00FF', boxstyle='round,pad=1')) axs[1, 1].set_axis_off() # Adjust layout plt.tight_layout() plt.show()