#!/usr/bin/env python # coding: utf-8 # ## CSCI-UA 9473 Introduction to Machine Learning # ### Assignment 1: Gradient descent # # # __Given date:__ May 29 # # __Due date:__ June 7 # # __Total:__ 10pts # ### Question 1. (5pts) Local vs global minimas and gradient descent # # We consider the following function. # # \begin{align} # F(x_1, x_2) = 3(1-x_1)^2\exp(-(x_1^2) - (x_2+1)^2)\\ # - 10(x_1/5 - x_1^3 - x_2^5)\exp(-x_1^2-x_2^2)\\ # - (1/3)\exp(-(x_1+1)^2 - x_2^2) # \end{align} # # The surface plot of this function is given below together with its contour plot. The function has a single global minimum located near $(0.23, -1.62)$ and shown in red in the contour plot. # # We want to implement gradient descent iterations on that function. Starting from a random initial point $(x_1, x_2)$, code the following updates # # \begin{align} # x_1^{(k+1)} = x_1^{(k)} - \eta * \text{grad}_{x_1} F(x_1, x_2)\\ # x_2^{(k+1)} = x_2^{(k)} - \eta * \text{grad}_{x_2} F(x_1, x_2) # \end{align} # # where $\text{grad}_{x_i}$ represents the gradient of $F(x_1, x_2)$ with respect to $x_i$. Choose a sufficiently small learning rate and plot the iterates (in white) on the contour plot. Repeat your experiments for various initial iterates. # In[15]: from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt from matplotlib import cm from matplotlib.ticker import LinearLocator, FormatStrFormatter import numpy as np fig = plt.figure() ax = fig.gca(projection='3d') # Make data. x = np.linspace(-3, 3, 100) y = np.linspace(-3, 3, 100) x1, x2 = np.meshgrid(x, y) F = 3*(1-x1)**2 * np.exp(-(x1**2) - (x2+1)**2)\ - 10*(np.true_divide(x1,5) - x1**3 - x2**5)*np.exp(-x1**2 - x2**2)\ - np.true_divide(1,3)*np.exp(-(x1+1)**2 - x2**2) # Plot the surface. surf = ax.plot_surface(x1, x2, F, linewidth=0, alpha=1, cmap = 'viridis') plt.show() # In[27]: fig1, ax = plt.subplots(constrained_layout=True) contour = ax.contourf(x1, x2, F,cmap = 'viridis') plt.scatter(0.23, -1.62,c='r',marker='X') plt.show() # In[ ]: # put your solution here # ### Question 2. (5pts) Regression through the normal equations # # We consider the simple regression problem below, similar to the one discussed in class. # Find the model that minimizes the sum of squares loss # # \begin{align} # \ell(\boldsymbol \beta) = \frac{1}{2N}\sum_{i=1}^N (t_{\text{noisy}}^{(i)} - (\beta_0 +\beta_1 x^{(i)}))^2 # \end{align} # # __using the Normal Equations__. To do this: # # - Start by building the matrix $\tilde{\boldsymbol X}$ with # \begin{align} # \tilde{\boldsymbol X} = \left[\begin{array}{cc} # 1 & x^{(1)} \\ # \vdots & \vdots \\ # 1 & x^{(N)} \end{array}\right] # \end{align} # - Then compute the matrix $\tilde{\boldsymbol X}^T\tilde{\boldsymbol X}$ and the vector $\tilde{\boldsymbol X}^T\boldsymbol t$ where $\boldsymbol t = [t_{\text{noisy}}^{(1)}, \ldots , t^{(N)}_{\text{noisy}}]^T$ # # - Finally solve the equations through # # \begin{align} # \boldsymbol \beta_{\text{OLS}} = \left(\tilde{\boldsymbol X}^T\tilde{\boldsymbol X}\right)^{-1}(\tilde{\boldsymbol X}^T\boldsymbol t) # \end{align} # # using the function __np.linalg.inv__ from the linear algebra package. Plot the result in green on top of the plot below and compare with the true (blue) (unknown) model. # # In[29]: import numpy as np import matplotlib.pyplot as plt x = np.linspace(0,5,10) noise = np.random.normal(0,.3,len(x)) beta_true = np.random.normal(0,1,2) t = beta_true[0] + beta_true[1]*x tnoisy = t+noise plt.scatter(x, tnoisy, c='r') plt.plot(x, t) plt.show() # In[ ]: # put your code here