#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import matplotlib.pyplot as plt from scipy.optimize import curve_fit # # Linear VS Nonlinear Model # Take a look at these models # $$ # \begin{aligned} # &Y_{i}=\beta_{1}+\beta_{2}\left(\frac{1}{X_{i}}\right)+u_{i}\\ # &Y_{i}=\beta_{1}+\beta_{2} \ln X_{i}+u_{i}\\ # &\text { In } Y_{i}=\beta_{1}+\beta_{2} X_{i}+u_{i}\\ # &\ln Y_{i}=\ln \beta_{1}+\beta_{2} \ln X_{i}+u_{i}\\ # &\ln Y_{i}=\beta_{1}-\beta_{2}\left(\frac{1}{X_{i}}\right)+u_{i} # \end{aligned} # $$ # The variables might have some nonlinear form, but parameters are all linear (the $4$th model can denote $\alpha_1=\ln{\beta_1}$), as long as we can convert them into linear form with some mathematical manipulation, we call them **intrinsically linear models**. # # How about these two models? # \begin{aligned} # &Y_{i}=e^{\beta_{1}+\beta_{2} X_{i}+u_{i}} \\ # &Y_{i}=\frac{1}{1+e^{\beta_{1}+\beta_{2} X_{i}+u_{i}}} \\ # \end{aligned} # The first one can be easily converted into linear one by taking natural log # $$ # \ln{Y_i}=\beta_{1}+\beta_{2} X_{i}+u_{i} # $$ # The second one is bit tricky, we will deal with it in more details in chapter of binary choice model. But you can be assured that with a little manipulation the model becomes # $$ # \ln \left(\frac{1-Y_{i}}{Y_{i}}\right)=\beta_{1}+\beta_{2} X_{i}+u_{i} # $$ # which is also intrinsically linear. # These two models are **intrinsically nonlinear model**, there is no way to turn them into linear form. # \begin{aligned} # &Y_{i}=\beta_{1}+\left(0.75-\beta_{1}\right) e^{-\beta_{2}\left(X_{i}-2\right)}+u_{i} \\ # &Y_{i}=\beta_{1}+\beta_{2}^{3} X_{i}+u_{i}\\ # \end{aligned} # Can we transform Cobb-Douglas model into linear form? The first one can, by taking natural log. But the second one has an additive disturbance term, which make it intrinsically nonlinear. # \begin{aligned} # &Y_{i}=\beta_{1} X_{2 i}^{\beta_{2}} X_{3 i}^{\beta_{3}} u_{i}\\ # &Y_{i}=\beta_{1} X_{2 i}^{\beta_{2}} X_{3 i}^{\beta_{3}}+ u_{i}\\ # \end{aligned} # Here is another famous economic model, _constant elasticity of substitution_ (CES) production function. # $$ # Y_{i}=A\left[\delta K_{i}^{-\beta}+(1-\delta) L_{i}^{-\beta}\right]^{-1 / \beta}u_i # $$ # No matter what you do with it, it can't be transformed into linear form, thus it is intrinsically nonlinear # # OLS On A Nonlinear Model # Consider an intrinsically nonlinear model # $$ # Y_{i}=\beta_{1} e^{\beta_{2}X_{i}}+u_{i} # $$ # Use the OLS algorithm that minimize $RSS$ # $$ # \begin{gathered} # \sum_{i=0}^n u_{i}^{2}=\sum_{i=0}^n\left(Y_{i}-\beta_{1} e^{\beta_{2} X_{i}}\right)^{2} # \end{gathered} # $$ # Take partial derivative with respect to both $\beta_1$ and $\beta_2$, the first order conditions are # $$ # \begin{gathered} # \frac{\partial \sum_{i=0}^n u_{i}^{2}}{\partial \beta_{1}}=2 \sum_{i=0}^n\left(Y_{i}-\beta_{1} e^{\beta_{2} X_{i}}\right)\left(-1 e^{\beta_{2} X_{i}}\right) =0\\ # \frac{\partial \sum_{i=0}^n u_{i}^{2}}{\partial \beta_{2}}=2 \sum_{i=0}^n\left(Y_{i}-\beta_{1} e^{\beta_{2} X_{i}}\right)\left(-\beta_{1} e^{\beta_{2} X_{i}} X_{i}\right)=0 # \end{gathered} # $$ # Collecting terms and denote the estimated coefficients as $b_1$ and $b_2$ # $$ # \begin{aligned} # \sum_{i=0}^n Y_{i} e^{b_{2} X_{i}} &=b_{1} e^{2 {b}_{2} X_{i}} \\ # \sum_{i=0}^n Y_{i} X_{i} e^{b_{2} X_{i}} &={b}_{1} \sum_{i=0}^n X_{i} e^{2 {b}_{2} X_{i}} # \end{aligned} # $$ # These are solutions, but not **closed-form solution**, i.e. solve by plugging in data. So even if you have these formula, we can't input in Python, because unknowns are expressed in terms of unknowns. # # Gauss-Newton Iterative Method # We will not talk about details of this algorithm, it only confuses you more than clarification. But this **Gauss-Newton Iterative Method** is kind of trial and error method that gradually approaching the optimized coefficients. It feeds the $RSS$ formula with parameters, record the result, then try another set of parameters, if $RSS$ gets smaller, the algorithm keeps feed parameters until the $RSS$ have no significant improvement. # Define the function # $$ # Y_{i}=\beta_{1} e^{\beta_{2}X_{i}} # $$ # In[2]: def exp_func(x, beta1, beta2): return beta1 * np.exp(beta2 * x) # Simulate data $Y$ then estimate the parameters with ```curve_fit``` function # In[3]: xdata = np.linspace(0, 1, 50) y = exp_func(xdata, 2, 3) y_noise = 5 * np.random.randn(len(y)) ydata = y + y_noise fig, ax = plt.subplots(figsize=(12, 8)) ax.scatter(xdata, ydata, label="data", color="purple") popt, pcov = curve_fit(exp_func, xdata, ydata) ax.plot(xdata, exp_func(xdata, popt[0], popt[1]), lw=3, color="tomato") ax.set_title(r"$Y_{i}=\beta_{1} e^{\beta_{2}X_{i}}$") plt.show() # Given the fact that this is elementary course on econometrics, we will not go any deeper in this topic. In Advanced Econometrics, we will have a very extensive discussion of nonlinear regression. # # Shanghai Covid # In[62]: df_shcovid = pd.read_excel("Shanghai Covid.xlsx") # In[63]: df_shcovid.columns = ["Date", "Cases"] df_shcovid = df_shcovid.dropna() # Define the function # $$ # Y_{i}= \beta_1 e^{\beta_{2}X_{i}} # $$ # Take log on both sides. # $$ # \ln{Y_i}=\ln{\beta_1}+\beta_2 X_i # $$ # In[80]: logY = np.log(df_shcovid["Cases"]) X = np.arange(len(Y)) X = sm.add_constant(X) model = sm.OLS(Y, X).fit() print_model = model.summary() print(print_model) # In[90]: beta_1 = np.exp(model.params[0]) beta_2 = model.params[1] Y = beta_1 * np.exp(beta_2 * X[:, 1]) # In[94]: fig, ax = plt.subplots(figsize=(12, 5), nrows=1, ncols=2) fig.autofmt_xdate(rotation=45) ax[0].scatter(df_shcovid["Date"], df_shcovid["Cases"]) ax[0].set_ylabel("No-Symptom Case") ax[1].plot(df_shcovid["Date"], model.fittedvalues) ax[0].plot(df_shcovid["Date"], Y) ax[1].set_ylabel("Log No-Symptom Case") ax[1].scatter(df_shcovid["Date"], ln_case) plt.show() # $$ # Y_{i}= 5.4 e^{0.21X_{i}} # $$