#!/usr/bin/env python
# coding: utf-8
# In[72]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats
import scipy as sp
from mpl_toolkits.mplot3d import Axes3D
import statsmodels.formula.api as smf
import linregfunc as lr
import pandas as pd
import seaborn as sns
plt.style.use("fivethirtyeight")
# # Common Distributions in Hypothesis Testing
# Here we will have a quick refresh of distributions that are commonly used in hypothesis testing.
# ## The Normal Distributions
# Normal Distribution is the most important one among all, here we provide a graphic reminder of bivariate normal distribution. Please check out my notebooks of linear algebra, there is a whole chapter devoted for normal distribution.
#
# For your reference, the pdf of multivariate normal distribution is
# $$
# p(\boldsymbol{x} ; \mu, \Sigma)=\frac{1}{(2 \pi)^{n / 2}|\Sigma|^{1 / 2}} \exp \left(-\frac{1}{2}(x-\mu)^{T} \Sigma^{-1}(x-\mu)\right) \tag{1}\label{1}
# $$
# In[2]:
x = np.linspace(-10, 10, 500)
y = np.linspace(-10, 10, 500)
X, Y = np.meshgrid(x, y)
pos = np.array([X.flatten(), Y.flatten()]).T # two columns matrix
fig = plt.figure(figsize=(14, 14))
#########################
ax = fig.add_subplot(221, projection="3d")
mu_x = 0
mu_y = 0
sigma_x = 3
sigma_y = 15
rho = 3
rv = sp.stats.multivariate_normal(
[mu_x, mu_y], [[sigma_x, rho], [rho, sigma_y]]
) # frozen distribution
ax.plot_surface(X, Y, rv.pdf(pos).reshape(500, 500), cmap="coolwarm")
ax = fig.add_subplot(223)
ax.contour(rv.pdf(pos).reshape(500, 500))
string1 = r"$\sigma_x = %.1f$, $\sigma_y = %.1f$, $\rho = %.1f$" % (
sigma_x,
sigma_y,
rho,
)
ax.annotate(text=string1, xy=(0.2, 0.05), xycoords="axes fraction", color="r")
####
mu_x = 0
mu_y = 0
sigma_x = 10
sigma_y = 4
rho = 0
rv = sp.stats.multivariate_normal(
[mu_x, mu_y], [[sigma_x, rho], [rho, sigma_y]]
) # frozen distribution
ax = fig.add_subplot(222, projection="3d")
ax.plot_surface(X, Y, rv.pdf(pos).reshape(500, 500), cmap="coolwarm")
ax = fig.add_subplot(224)
ax.contour(rv.pdf(pos).reshape(500, 500))
string2 = r"$\sigma_x = %.1f$, $\sigma_y = %.1f$, $\rho = %.1f$" % (
sigma_x,
sigma_y,
rho,
)
ax.annotate(text=string2, xy=(0.2, 0.05), xycoords="axes fraction", color="r")
#########################
plt.show()
# Keep in your mind: _any linear combination of normally distributed variables is yet a normal distribution_.
# ## The Chi-Squared Distribution
# If an $n$-random vector follows $iid$ normal distribution, $\boldsymbol{z}\sim N(\boldsymbol{0}, \mathbf{I})$. Then the random variable
#
# $$
# y = \boldsymbol{z}^T\boldsymbol{z} = \sum_{i=i}^n z_i^2
# $$
#
# is said to follow the **chi-squared distribution** with $n$ degrees of freedom. Denoted as
#
# $$
# y\sim\chi^2(n)
# $$
# The mean is
#
# $$\mathrm{E}(y)=\sum_{i=1}^{m} \mathrm{E}\left(z_{i}^{2}\right)=\sum_{i=1}^{m} 1=m$$
#
# And the variance is
#
# $$\begin{aligned}
# \operatorname{Var}(y) &=\sum_{i=1}^{m} \operatorname{Var}\left(z_{i}^{2}\right)=m \mathrm{E}\left(\left(z_{i}^{2}-1\right)^{2}\right) \\
# &=m \mathrm{E}\left(z_{i}^{4}-2 z_{i}^{2}+1\right)=m(3-2+1)=2 m
# \end{aligned}$$
# As $n$ increases, the probability density function of $\chi^2$ approaches the $N(m, 2m)$. Here is the graphic demonstration shows how $\chi^2$ distribution changes as d.o.f. rises.
# In[3]:
fig, ax = plt.subplots(figsize=(9, 9))
x = np.linspace(0, 50, 1000)
for i in range(1, 14):
chi_pdf = sp.stats.chi2.pdf(x, i)
ax.plot(x, chi_pdf, lw=3, label="$\chi^2 (%.0d)$" % i)
ax.legend(fontsize=12)
ax.axis([0, 20, 0, 0.6])
plt.show()
# ### Quadratic Form of $\chi^2$ Distribution
# If an $n$-random vector $\boldsymbol{y} \sim N(\boldsymbol{\mu}, \Sigma)$ then
#
# $$
# (\boldsymbol{y} - \boldsymbol{\mu})^T\Sigma^{-1}(\boldsymbol{y}-\boldsymbol{\mu})\sim \chi^2(n)
# $$
#
# If $\boldsymbol{y} \sim N(\boldsymbol{0}, \Sigma)$ simplifies the expression
#
# $$
# \boldsymbol{y}^T\Sigma^{-1}\boldsymbol{y}\sim \chi^2(n)
# $$
# We will show why that holds by using diagonal decomposition. Since the $\Sigma$ is symmetric, it is orthogonally diagonalizable,
#
#
# $$
# \Sigma = QDQ^T
# $$
#
# where
#
# $$
# D=\left[\begin{array}{ccccc}
# \lambda_{1} & 0 & 0 & \ldots & 0 \\
# 0 & \lambda_{2} & 0 & \ldots & 0 \\
# \vdots & \vdots & \vdots & & \vdots \\
# 0 & 0 & 0 & \ldots & \lambda_{n}
# \end{array}\right]
# $$
#
# $\lambda$s are eigenvalues. And $Q^{-1} = Q^T$, $Q$ holds all the eigenvectors of $\Sigma$ which are mutually perpendicular.
# Denote $D^*$ as
#
# $$
# D^* =
# \left[\begin{array}{ccccc}
# \frac{1}{\sqrt{\lambda_{1}}} & 0 & 0 & \ldots & 0 \\
# 0 & \frac{1}{\sqrt{\lambda_{2}}} & 0 & \ldots & 0 \\
# \vdots & \vdots & \vdots & & \vdots \\
# 0 & 0 & 0 & \ldots & \frac{1}{\sqrt{\lambda_{n}}}
# \end{array}\right]
# $$
# Let the matrix $H = QD^*Q^T$, since $H$ is also symmetric
#
# $$
# HH^T= QD^*Q^TQD^*Q^T= Q^TD^*D^*Q = QD^{-1}Q^T =\Sigma^{-1}
# $$
# Furthermore
#
# $$
# H\Sigma H^T = QD^*Q^T\Sigma QD^*Q^T = QD^*Q^TQDQ^T QD^*Q^T = QD^*DD^*Q^T = QQ^T = I
# $$
# Back to the results from above, we set $\boldsymbol{z} = H^T (\boldsymbol{y}-\boldsymbol{\mu})$, which is standard normal distribution since
#
# $$
# E(\boldsymbol{z})= H^TE(\boldsymbol{y}-\boldsymbol{\mu})=\boldsymbol{0}\\
# \text{Var}(\boldsymbol{z}) = H\text{Var}(\boldsymbol{y}-\boldsymbol{\mu})H^T =H\Sigma H^T = I
# $$
#
# Back to where we started
#
# $$
# (\boldsymbol{y}-\boldsymbol{\mu})^T\Sigma^{-1}(\boldsymbol{y}-\boldsymbol{\mu}) = (\boldsymbol{y}-\boldsymbol{\mu})^THH^T (\boldsymbol{y}-\boldsymbol{\mu}) = (H^T (\boldsymbol{y}-\boldsymbol{\mu}))^T(H^T (\boldsymbol{y}-\boldsymbol{\mu})) = \boldsymbol{z}^T\boldsymbol{z}
# $$
#
# here we proved that $(\boldsymbol{y}-\boldsymbol{\mu})^T\Sigma^{-1}(\boldsymbol{y}-\boldsymbol{\mu}) \sim \chi^2(n)$.
# More details of proof in this page.
#
# ## The Student’s $t$ Distribution
# If $z\sim N(0, 1)$ and $y\sim \chi^2(m)$, and $z$ and $y$ are independent, then
#
# $$
# t = \frac{z}{\sqrt{y/m}}
# $$
#
# follows the **Student's t distribution** with $m$ d.o.f.
# Here is the plot of $t$-distribution, note that $t(1)$ is called **Cauchy distribution** which has no moments at all, because integral does not converge due to fat tails.
# In[4]:
fig, ax = plt.subplots(figsize=(12, 7))
x = np.linspace(-5, 5, 1000)
for i in range(1, 6):
chi_pdf = sp.stats.t.pdf(x, i)
if i == 1:
ax.plot(x, chi_pdf, lw=3, label="Cauchy", ls="--")
continue
else:
ax.plot(x, chi_pdf, lw=3, label="$t (%.0d)$" % i)
ax.legend(fontsize=12)
ax.axis([-5, 5, 0, 0.4])
ax.set_title("Student's $t$ Distribution", size=18)
plt.show()
# As $m \rightarrow \infty$, $t(m)\rightarrow N(0, 1)$, in the limit process, the tails of $t$ distribution will diminish, and becoming thinner.
# ## The $F$ Distribution
# If $y_1$ and $y_2$ are independent random variables distributed as $\chi^2(m_1)$ and $\chi^2(m_2)$, then the random variable
#
# $$
# F = \frac{y_1/m_1}{y_2/m_2}
# $$
# follows the **$F$ distribution**, denoted $ F(m_1, m_2)$.
# In[5]:
x = np.linspace(0.001, 5, 100)
fig, ax = plt.subplots(figsize=(12, 7))
df1 = 10
df2 = 2
f_pdf = sp.stats.f.pdf(x, dfn=df1, dfd=df2)
ax.plot(x, f_pdf, lw=3, label="$df_1 = %.d, df_2 = %.d$" % (df1, df2))
df1 = 2
df2 = 10
f_pdf = sp.stats.f.pdf(x, dfn=df1, dfd=df2)
ax.plot(x, f_pdf, lw=3, label="$df_1 = %.d, df_2 = %.d$ " % (df1, df2))
df1 = 8
df2 = 15
f_pdf = sp.stats.f.pdf(x, dfn=df1, dfd=df2)
ax.plot(x, f_pdf, lw=3, color="red", label="$df_1 = %.d, df_2 = %.d$" % (df1, df2))
df1 = 15
df2 = 8
f_pdf = sp.stats.f.pdf(x, dfn=df1, dfd=df2)
ax.plot(x, f_pdf, lw=3, label="$df_1 = %.d, df_2 = %.d$ " % (df1, df2))
ax.legend(fontsize=15)
ax.axis([0, 4, 0, 0.8])
ax.set_title("$F$ Distribution", size=18)
plt.show()
# # Single Restriction
# Any linear restriction, such as $\beta_1 = 5$, $\beta_1 =2{\beta_2}$ can be tested, however, no loss of generality if linear restriction of $\beta_2=0$ is demonstrated.
#
# We first take a look at single restriction, see how FWL regression can help to construct statistic tests.
#
# The regression model is
#
# $$
# \boldsymbol{y} = \boldsymbol{X}_1\boldsymbol{\beta}_1+\beta_2\boldsymbol{x}_2+\boldsymbol{u}
# $$
#
# where $\boldsymbol{x}_2$ is an $n$-vector, whereas $\boldsymbol{X}_1$ is an $n\times (m-1)$ matrix.
# Project $\boldsymbol{X}_1$ off $\boldsymbol{x}_2$, the FWL regression is
#
# $$
# \boldsymbol{M}_1\boldsymbol{y} = \beta_2\boldsymbol{M}_1\boldsymbol{x}_2 + \boldsymbol{M}_1\boldsymbol{u}
# $$
# Applying OLS estimate and variance formula,
#
# $$
# \hat{\beta}_2 = [(\boldsymbol{M}_1\boldsymbol{x}_2)^T\boldsymbol{M}_1\boldsymbol{x}_2]^{-1}(\boldsymbol{M}_1\boldsymbol{x}_2)^T\boldsymbol{M}_1\boldsymbol{y}=(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{-1}\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}=\frac{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}}{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2}\\
# \text{Var}(\hat{\beta}_2) = \sigma^2 [(\boldsymbol{M}_1\boldsymbol{x}_2)^T(\boldsymbol{M}_1\boldsymbol{x}_2)]^{-1} = \sigma^2(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{-1}
# $$
# If null hypothesis is $\beta_2^0=0$, also assuming that $\sigma^2$ is known. Construct a $z$-statistic
#
# $$
# z_{\beta_2} = \frac{\hat{\beta}_2}
# {\sqrt{\text{Var}(\hat{\beta}_2)}}=\frac{\frac{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}}{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2}}{ \sigma(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{-\frac{1}{2}}}
# = \frac{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}}{\sigma(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{\frac{1}{2}}}
# $$
# However, $\sigma$ is not likely to be known, hence we replace it with $s$, the least square standard error estimator. Recall that
#
# $$
# s^2 =\frac{1}{n-k} \sum_{t=1}^n u_t^2 = \frac{\boldsymbol{u}^T\boldsymbol{u}}{n-k}= \frac{(\boldsymbol{M_X y})^T\boldsymbol{M_X y}}{n-k}=\frac{\boldsymbol{y}^T\boldsymbol{M_X y}}{n-k}
# $$
# Replace the $\sigma$ in $z_{\beta_2}$, we obtain $t$-statistic
#
# $$
# t_{\beta_2} = \frac{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}}{s(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{\frac{1}{2}}} = \left(\frac{\boldsymbol{y}^T\boldsymbol{M_X y}}{n-k}\right)^{-\frac{1}{2}}\frac{\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{y}}{(\boldsymbol{x}_2^T\boldsymbol{M}_1\boldsymbol{x}_2)^{\frac{1}{2}}}
# $$
# Of course we can also show it indeed follows the $t$ distribution by using the definition that the ratio of standard normal variable to a $\chi^2$ variable. However, this is unnecessary for us.
# # Multiple Restrictions
# Multiple restrictions test can be formulated as following
#
# $$
# H_0:\quad \boldsymbol{y} = \boldsymbol{X}_1\boldsymbol{\beta_1} + \boldsymbol{u}\\
# H_1:\quad \boldsymbol{y} = \boldsymbol{X}_1\boldsymbol{\beta_1} +\boldsymbol{X}_2\boldsymbol{\beta_2}+ \boldsymbol{u}
# $$
# $H_0$ is restricted model, $H_1$ is unrestricted. And we denote restricted residual sum of squares as $\text{RRSS}$, and unrestricted residual sum of squares as $\text{URSS}$, the test statistic is
#
# $$
# F_{\beta_2}= \frac{(\text{RRSS}-\text{URSS})/r}{\text{URSS}/(n-k)}
# $$
# where $r=k_2$, the number of restrictions on $\beta_2$.
#
# Using FWL regression with $\boldsymbol{M}_1$ and $\text{TSS = ESS + RSS}$,
#
# $$
# \boldsymbol{M}_1\boldsymbol{y}=\boldsymbol{M}_1\boldsymbol{X}_2\boldsymbol{\beta}_2+\boldsymbol{u} \tag{FWL regression}
# $$
#
# \begin{align}
# \text{URSS}& =(\boldsymbol{M_1y})^T\boldsymbol{M_1y}- \underbrace{\left[\boldsymbol{M}_1\boldsymbol{X}_2[(\boldsymbol{M}_1\boldsymbol{X}_2)^T\boldsymbol{M}_1\boldsymbol{X}_2]^{-1}(\boldsymbol{M}_1\boldsymbol{X}_2)^T\boldsymbol{y}\right]^T}_{\text{projection matrix}\ P}\boldsymbol{y}\\
# &= \boldsymbol{y}^T\boldsymbol{M}_1\boldsymbol{y} -\boldsymbol{y}^T\boldsymbol{M}_1\boldsymbol{X}_2(\boldsymbol{X}_2^T\boldsymbol{M}_1\boldsymbol{X}_2)^{-1}\boldsymbol{X}^T_2\boldsymbol{M}_1 \boldsymbol{y} = \boldsymbol{y}^T\boldsymbol{M_X}\boldsymbol{y}
# \end{align}
# $$
# \text{RRSS} = (\boldsymbol{M}_1y)^T\boldsymbol{M}_1y = \boldsymbol{y}^T\boldsymbol{M}_1\boldsymbol{y}
# $$
# Therefore
# $$
# \text{RRSS}-\text{URSS}=\boldsymbol{y}^T\boldsymbol{M}_1\boldsymbol{X}_2(\boldsymbol{X}_2^T\boldsymbol{M}_1\boldsymbol{X}_2)^{-1}\boldsymbol{X}^T_2\boldsymbol{M}_1 \boldsymbol{y}
# $$
# We have all parts of $F$ statistic, combine them
#
# $$
# F_{\boldsymbol{\beta}_{2}}=\frac{\boldsymbol{y}^{T} \boldsymbol{M}_{1} \boldsymbol{X}_{2}\left(\boldsymbol{X}_{2}^{T} \boldsymbol{M}_{1} \boldsymbol{X}_{2}\right)^{-1} \boldsymbol{X}_{2}^{T} \boldsymbol{M}_{1} \boldsymbol{y} / r}{\boldsymbol{y}^{T} \boldsymbol{M}_{\boldsymbol{X}} \boldsymbol{y} /(n-k)}
# $$
# In contrast, $t$ statistic will be
# $$
# t_{\beta_{2}}=\sqrt{\frac{\boldsymbol{y}^{\top} \boldsymbol{M}_{1} \boldsymbol{x}_{2}\left(\boldsymbol{x}_{2}^{\top} \boldsymbol{M}_{1} \boldsymbol{x}_{2}\right)^{-1} \boldsymbol{x}_{2}^{\top} \boldsymbol{M}_{1} \boldsymbol{y}}{\boldsymbol{y}^{\top} \boldsymbol{M}_{\boldsymbol{X}} \boldsymbol{y} /(n-k)}}
# $$
# To test the equality of two parameter vectors, we modify the $F$ test as
# $$
# F_{\gamma}=\frac{\left(\mathrm{RRSS}-\mathrm{RSS}_{1}-\mathrm{RSS}_{2}\right) / k}{\left(\mathrm{RSS}_{1}+\mathrm{RSS}_{2}\right) /(n-2 k)}
# $$
# for example, the sample being divided into two subsamples, to compare the stability of two subsamples, this so-called **Chow test** is a common practice.
# # Asymptotic Theory
# **Asymptotic Theory** is concerned with the distribution of estimators and test statistics as the sample size $n$ tends to infinity.
# ## Law of Large Numbers
# The widely-known **Law of Large Numbers** ($\text{LLN}$) takes the form
# $$
# \bar{x} = \frac{1}{n}\sum_{t=1}^nx_t
# $$
# where $x_t$ are independent variables with its own bounded variance $\sigma_t^2$ and a common mean $\mu$. $\text{LLN}$ tells that as $n\rightarrow \infty$, then $\bar{x} \rightarrow \mu$.
#
# The **Fundamental Theorem of Statistics** can be proved with $\text{LLN}$.
#
# An **empirical distribution function** ($\text{EDF}$) can be expressed as
# $$
# \hat{F}(x) \equiv \frac{1}{n} \sum_{t=1}^{n} I\left(x_{t} \leq x\right)
# $$
# where $I(\cdot)$ is an **indicator function**, which takes value $1$ when its argument is true, otherwise $0$. To prove the Fundamental Theorem of Statistics, we invoke the $\text{LLN}$, expand the expectation
# \begin{aligned}
# \mathrm{E}\left(I\left(x_{t} \leq x\right)\right) &=0 \cdot \operatorname{Pr}\left(I\left(x_{t} \leq x\right)=0\right)+1 \cdot \operatorname{Pr}\left(I\left(x_{t} \leq x\right)=1\right) \\
# &=\operatorname{Pr}\left(I\left(x_{t} \leq x\right)=1\right)=\operatorname{Pr}\left(x_{t} \leq x\right)=F(x)
# \end{aligned}
# It turns out that $\hat{F}(x)$ is a consistent estimator of $F(x)$.
# ## Central Limit Theorems
# The **Central Limit Theorems** ($\text{CLT}$) tells that $\frac{1}{n}$ times the sum of $n$ centered random variables will approximately follow a normal distribution when $n$ is sufficiently large. The most well $\text{CLT}$ is **Lindeberg-Lévy** $\text{CLT}$, the quantity
# $$
# z \equiv \frac{1}{\sqrt{n}} \sum_{t=1}^{n} \frac{x_{t}-\mu}{\sigma}
# $$
# is **asymptotically distributed** as $N(0,1)$.
# We won't bother to prove them, but they are the implicit theoretical foundation when we are discussing **simulation-based tests**.
# # Bootstrapping
# The bootstrapping is a computational technique for handling small size sample or any sample that doesn't follow any known distribution.
#
# It's rare to see it in classical linear regression model, since the sampling distribution of coefficients follow normal distribution if disturbance terms follow normal distribution.
#
# However, for demonstration purpose, we will show how a bootstrap test is perform. The model is a simple linear regression
# $$
# Y_t = 5 + 6 X_t + 10u_t
# $$
# where $u_t\sim N(0, 1)$.
# Generate $Y$ and $X$ then pass them to a data frame.
# In[6]:
ols_obj = lr.OLS_Simu(100, 2, [5, 6], 10)
# In[7]:
bootstr_params_const, bootstr_params_1 = [], []
nr_bootstrp = 100
# In[8]:
df = pd.DataFrame(ols_obj.X)
df["y"] = ols_obj.y
df.columns = ["const", "x1", "y"]
# In[9]:
resample_size = 100
# Repetitively sample from the generated dataframe and estimate parameters.
# In[10]:
fig, ax = plt.subplots(figsize=(18, 8), nrows=1, ncols=3)
for i in range(nr_bootstrp):
df_resample = df.sample(resample_size, replace=True)
ols_fit = smf.ols(formula="y ~ x1", data=df_resample).fit()
bootstrap_params = ols_fit.params
bootstr_params_const.append(bootstrap_params[0])
bootstr_params_1.append(bootstrap_params[1])
y_pred_temp = ols_fit.predict(df_resample["x1"])
ax[0].plot(df_resample["x1"], y_pred_temp, alpha=0.2)
ax[0].set_title("Bootstrap Regression Plot")
ax[1].hist(bootstr_params_const, bins=30)
ax[1].set_title(r"$\beta_1$")
ax[2].hist(bootstr_params_1, bins=30)
ax[2].set_title(r"$\beta_2$")
plt.show()
# # Confidence Interval
# A **confidence interval** for some parameter $\theta$ consists all values of $\theta_0$ that can't be rejected by corresponding hypothesis. If the finite-sample distribution is known, we have an **exact confidence interval** $[\theta_l, \theta_u]$, of which $\theta_l$ and $\theta_u$ represents lower and upper limit.
# For instance, we have a test statistic
# $$
# \tau\left(\boldsymbol{y}, \theta_{0}\right) \equiv\frac{\hat{\theta}-\theta_{0}}{s_{\theta}}
# $$
# Here $\boldsymbol{y}$ denotes the sample used to compute the particular realization of the statistic, $\hat{\theta}$ is the estimate of $\theta$ and $s_\theta$ is the corresponding standard error. The confidence interval can be found by solving the equation
# $$
# \tau(\boldsymbol{y}, \theta)=c_{\alpha}
# $$
# where $c_\alpha$ is the critical value with significance level $\alpha$. Solve
# $$
# \frac{\hat{\theta}-\theta}{s_{\theta}}=c_{\alpha}
# $$
# Two solutions are
# $$
# \theta_{l}=\hat{\theta}-s_{\theta} c_{\alpha} \quad \text { and } \quad \theta_{u}=\hat{\theta}+s_{\theta} c_{\alpha}
# $$
# The confidence interval is
# $$
# [\hat{\theta}-s_{\theta} c_{\alpha},\quad \hat{\theta}+s_{\theta} c_{\alpha}]
# $$
# To obtain critical values, it is convenient to use cumulative distribution function, e.g. the common critical value of $t$-statistic is $\pm 1.96$ assuming a large degree of freedom.
# In[11]:
sp.stats.t.ppf([0.025, 0.975], df=1000)
# If you wish to have the same mass on each side of the distribution, the quantiles are $\frac{\alpha}{2}$ and $1-\frac{\alpha}{2}$.
# For a linear regression model
# $$
# \operatorname{Pr}\left(t_{\alpha / 2} \leq \frac{\hat{\beta}-\beta}{s} \leq t_{1-(\alpha / 2)}\right)=1-\alpha
# $$
# Solve it, we get
# $$
# \operatorname{Pr}\left(\hat{\beta}_{2}-s_{2} t_{\alpha / 2} \geq \beta_{20} \geq \hat{\beta}_{2}-s_{2} t_{1-(\alpha / 2)}\right)
# $$
# Or more intuitively written as
# $$
# [\hat{\beta}_{2}-s_{2} t_{1-(\alpha / 2)}, \quad \hat{\beta}_{2}+s_{2} t_{1-(\alpha / 2)}]
# $$
# In[33]:
df = pd.read_excel("Basic_Econometrics_practice_data.xlsx", sheet_name="US_CobbDauglas")
df.columns = ("Area", "Output", "Labour", "Capital")
ols_obj_fit = smf.ols(
formula="np.log(Output) ~ np.log(Labour) + np.log(Capital)", data=df
).fit()
# In[34]:
print(ols_obj_fit.summary())
# In[46]:
ols_obj_fit.conf_int()[0]
# In[41]:
ols_obj_fit.conf_int()
# # Full Session of Bootstrapping
# In[33]:
df = pd.read_excel("Basic_Econometrics_practice_data.xlsx", sheet_name="US_CobbDauglas")
df.columns = ("Area", "Output", "Labour", "Capital")
ols_obj_fit = smf.ols(
formula="np.log(Output) ~ np.log(Labour) + np.log(Capital)", data=df
).fit()
# In[34]:
print(ols_obj_fit.summary())
# In[53]:
len(ols_obj_fit.params)
# In[50]:
ols_obj_fit.params["Intercept"]
# In[46]:
ols_obj_fit.conf_int()[0]
# In[41]:
ols_obj_fit.conf_int()
# In[60]:
formula = "np.log(Output) ~ np.log(Labour) + np.log(Capital)"
# number of bootstrap rounds
B = 2000
# placeholder matrix B rows and k columns
results_bootrp = np.zeros((B, len(ols_obj_fit.params)))
# row id range
row_id = range(0, df.shape[0])
for i in range(B):
# this samples with replacement from rows
this_sample = np.random.choice(
row_id, size=df.shape[0], replace=True
) # gives sampled row numbers
# Define data for this replicate:
df_bootstrp = df.iloc[this_sample]
# Estimate model
params_bootstrp = smf.ols(formula, data=df_bootstrp).fit().params
# Store in row r of results_boot:
results_bootrp[i, :] = params_bootstrp
# In[62]:
results_bootrp = pd.DataFrame(
results_bootrp, columns=["Intercept", "Labour", "Capital"]
)
# In[63]:
results_bootrp
# In[71]:
x_range = np.arange(B)
fig, ax = plt.subplots(figsize=(18, 12), nrows=3, ncols=2)
ax[0, 0].plot(x_range, results_bootrp["Intercept"], lw=1)
ax[0, 1].hist(results_bootrp["Intercept"], bins=50, orientation="horizontal")
ax[1, 0].plot(x_range, results_bootrp["Labour"], lw=1)
ax[1, 1].hist(results_bootrp["Labour"], bins=50, orientation="horizontal")
ax[2, 0].plot(x_range, results_bootrp["Capital"], lw=1)
ax[2, 1].hist(results_bootrp["Capital"], bins=50, orientation="horizontal")
plt.show()
# In[79]:
g = sns.PairGrid(results_bootrp)
g.fig.set_size_inches(12, 12)
g.map_diag(sns.kdeplot)
g.map_lower(sns.kdeplot, cmap="Blues_d")
g.map_upper(sns.scatterplot)
plt.show()
# In[ ]: