Chapter 15: Classifying data with logistic regression¶

In [1]:

import matplotlib.pyplot as plt
import numpy as np
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D

In [2]:

def plot_function(f,xmin,xmax,**kwargs):
    ts = np.linspace(xmin,xmax,1000)
    plt.plot(ts,[f(t) for t in ts],**kwargs)
    
def scalar_field_heatmap(f,xmin,xmax,ymin,ymax,xsteps=100,ysteps=100):
    fig = plt.figure()
    fig.set_size_inches(7,7)
    fv = np.vectorize(f)
    X = np.linspace(xmin, xmax, xsteps)
    Y = np.linspace(ymin, ymax, ysteps)
    X, Y = np.meshgrid(X, Y)
    z = fv(X,Y)
    fig, ax = plt.subplots()
    c = ax.pcolormesh(X,Y,z, cmap='plasma')
    ax.axis([X.min(), X.max(), Y.min(), Y.max()])
    fig.colorbar(c, ax=ax)
    
def plot_scalar_field(f,xmin,xmax,ymin,ymax,xsteps=100,ysteps=100,c=None,cmap=cm.coolwarm,alpha=1,antialiased=False, zorder=0):
    fig = plt.gcf()
    fig.set_size_inches(7,7)
    ax = fig.gca(projection='3d')

    fv = np.vectorize(f)
    
    # Make data.
    X = np.linspace(xmin, xmax, xsteps)
    Y = np.linspace(ymin, ymax, ysteps)
    X, Y = np.meshgrid(X, Y)
    Z = fv(X,Y)

    # Plot the surface.
    surf = ax.plot_surface(X, Y, Z, cmap=cmap,color=c,alpha=alpha,
                           linewidth=0, antialiased=antialiased, zorder=zorder)

15.1 Testing a classification function on real data¶

In [3]:

def bmw_finder(mileage,price):
    if price > 25000:
        return 1
    else:
        return 0

15.1.1 Loading the car data¶

In [4]:

from car_data import bmws, priuses

In [5]:

all_car_data = []
for bmw in bmws:
    all_car_data.append((bmw.mileage,bmw.price,1))
for prius in priuses:
    all_car_data.append((prius.mileage,prius.price,0))

In [6]:

all_car_data

Out[6]:

[(93404.0, 13999.0, 1),
 (110890.0, 13995.0, 1),
 (94133.0, 13982.0, 1),
 (46778.0, 14599.0, 1),
 (53106.0, 22500.0, 1),
 (58761.0, 24998.0, 1),
 (108816.0, 24947.0, 1),
 (81100.0, 13995.0, 1),
 (90000.0, 8400.0, 1),
 (68613.0, 14995.0, 1),
 (94000.0, 11995.0, 1),
 (92500.0, 10995.0, 1),
 (112081.0, 11995.0, 1),
 (105121.0, 11500.0, 1),
 (92000.0, 2013.0, 1),
 (107953.0, 12999.0, 1),
 (56000.0, 18995.0, 1),
 (101191.0, 18900.0, 1),
 (64365.0, 16998.0, 1),
 (66000.0, 10000.0, 1),
 (76675.0, 12500.0, 1),
 (93015.0, 15995.0, 1),
 (80917.0, 14970.0, 1),
 (96000.0, 16795.0, 1),
 (70000.0, 12999.0, 1),
 (107000.0, 11950.0, 1),
 (78000.0, 18995.0, 1),
 (78000.0, 15000.0, 1),
 (92000.0, 2013.0, 1),
 (57624.0, 21963.0, 1),
 (77854.0, 16995.0, 1),
 (48310.0, 22998.0, 1),
 (51656.0, 20998.0, 1),
 (62410.0, 19991.0, 1),
 (39332.0, 29995.0, 1),
 (31420.0, 21000.0, 1),
 (41267.0, 22450.0, 1),
 (73000.0, 19999.0, 1),
 (94608.0, 11995.0, 1),
 (67000.0, 24964.0, 1),
 (50000.0, 18985.0, 1),
 (73601.0, 16999.0, 1),
 (50000.0, 18985.0, 1),
 (95229.0, 15980.0, 1),
 (69000.0, 27247.0, 1),
 (73309.0, 13790.0, 1),
 (41573.0, 21970.0, 1),
 (42172.0, 25842.0, 1),
 (41085.0, 18750.0, 1),
 (81625.0, 13990.0, 1),
 (38000.0, 24900.0, 1),
 (65000.0, 12000.0, 1),
 (103000.0, 16495.0, 1),
 (76000.0, 18500.0, 1),
 (34000.0, 19950.0, 1),
 (66000.0, 29500.0, 1),
 (36756.0, 18999.0, 1),
 (42681.0, 26697.0, 1),
 (29000.0, 29997.0, 1),
 (51000.0, 26968.0, 1),
 (26000.0, 25980.0, 1),
 (88262.0, 18338.0, 1),
 (48321.0, 21322.0, 1),
 (93762.0, 16991.0, 1),
 (79972.0, 16499.0, 1),
 (54177.0, 20774.0, 1),
 (43294.0, 21800.0, 1),
 (55736.0, 24985.0, 1),
 (74896.0, 22599.0, 1),
 (72282.0, 20770.0, 1),
 (58328.0, 22000.0, 1),
 (49856.0, 20998.0, 1),
 (42229.0, 21998.0, 1),
 (38126.0, 25998.0, 1),
 (46000.0, 21900.0, 1),
 (95000.0, 18500.0, 1),
 (30000.0, 19899.0, 1),
 (63000.0, 21500.0, 1),
 (39835.0, 27629.0, 1),
 (51043.0, 25964.0, 1),
 (27000.0, 30998.0, 1),
 (39591.0, 25000.0, 1),
 (37703.0, 35990.0, 1),
 (41467.0, 30254.0, 1),
 (83000.0, 15900.0, 1),
 (58623.0, 21998.0, 1),
 (57276.0, 21998.0, 1),
 (27620.0, 23998.0, 1),
 (40237.0, 21998.0, 1),
 (52961.0, 22998.0, 1),
 (36707.0, 34888.0, 1),
 (42363.0, 29891.0, 1),
 (44943.0, 23697.0, 1),
 (90000.0, 11000.0, 1),
 (90000.0, 13000.0, 1),
 (42470.0, 30977.0, 1),
 (27000.0, 20999.0, 1),
 (37000.0, 31990.0, 1),
 (63000.0, 2016.0, 1),
 (51000.0, 2016.0, 1),
 (59014.0, 13995.0, 0),
 (70000.0, 9900.0, 0),
 (84507.0, 14998.0, 0),
 (76000.0, 8300.0, 0),
 (41868.0, 15998.0, 0),
 (40631.0, 13991.0, 0),
 (74000.0, 10532.0, 0),
 (69703.0, 12900.0, 0),
 (37000.0, 15850.0, 0),
 (33000.0, 13995.0, 0),
 (75000.0, 9500.0, 0),
 (35000.0, 11100.0, 0),
 (97109.0, 11991.0, 0),
 (128882.0, 8359.0, 0),
 (39000.0, 13759.0, 0),
 (65000.0, 13990.0, 0),
 (37000.0, 12950.0, 0),
 (130000.0, 9500.0, 0),
 (63430.0, 12995.0, 0),
 (120000.0, 7600.0, 0),
 (74000.0, 9000.0, 0),
 (46000.0, 13982.0, 0),
 (73428.0, 14991.0, 0),
 (200000.0, 7950.0, 0),
 (88180.0, 12998.0, 0),
 (89466.0, 14599.0, 0),
 (76985.0, 11990.0, 0),
 (65000.0, 13990.0, 0),
 (63621.0, 12395.0, 0),
 (96000.0, 7900.0, 0),
 (74000.0, 9000.0, 0),
 (75000.0, 11000.0, 0),
 (143000.0, 8999.0, 0),
 (112000.0, 8900.0, 0),
 (74000.0, 9000.0, 0),
 (185000.0, 5950.0, 0),
 (80424.0, 13000.0, 0),
 (48655.0, 17998.0, 0),
 (102000.0, 9997.0, 0),
 (67508.0, 14991.0, 0),
 (39395.0, 18998.0, 0),
 (44531.0, 16577.0, 0),
 (68000.0, 12495.0, 0),
 (35000.0, 11999.0, 0),
 (68000.0, 12495.0, 0),
 (115000.0, 10900.0, 0),
 (66293.0, 14472.0, 0),
 (141000.0, 10000.0, 0),
 (109496.0, 9677.0, 0),
 (82000.0, 7500.0, 0),
 (106361.0, 10953.0, 0),
 (187000.0, 8800.0, 0),
 (76912.0, 11993.0, 0),
 (117000.0, 9450.0, 0),
 (113963.0, 11494.0, 0),
 (72000.0, 12490.0, 0),
 (71218.0, 11994.0, 0),
 (48924.0, 13786.0, 0),
 (100000.0, 7800.0, 0),
 (98531.0, 10979.0, 0),
 (43015.0, 16977.0, 0),
 (29695.0, 16599.0, 0),
 (44640.0, 16900.0, 0),
 (56478.0, 13990.0, 0),
 (114959.0, 11060.0, 0),
 (83861.0, 13000.0, 0),
 (91314.0, 11911.0, 0),
 (37000.0, 16375.0, 0),
 (46000.0, 14500.0, 0),
 (45358.0, 16991.0, 0),
 (68432.0, 16998.0, 0),
 (37739.0, 19998.0, 0),
 (89352.0, 12995.0, 0),
 (65000.0, 13500.0, 0),
 (61000.0, 13000.0, 0),
 (35385.0, 13988.0, 0),
 (65000.0, 13500.0, 0),
 (57000.0, 16791.0, 0),
 (90019.0, 14790.0, 0),
 (136095.0, 11595.0, 0),
 (27000.0, 16235.0, 0),
 (54480.0, 17910.0, 0),
 (100000.0, 16000.0, 0),
 (65899.0, 16482.0, 0),
 (27000.0, 17734.0, 0),
 (144000.0, 9800.0, 0),
 (26989.0, 17925.0, 0),
 (67662.0, 13400.0, 0),
 (38401.0, 16998.0, 0),
 (80982.0, 13999.0, 0),
 (43000.0, 17492.0, 0),
 (87428.0, 11794.0, 0),
 (34028.0, 17491.0, 0),
 (37000.0, 15800.0, 0),
 (32000.0, 20995.0, 0),
 (89000.0, 12999.0, 0),
 (64000.0, 15000.0, 0),
 (45000.0, 16900.0, 0),
 (38000.0, 13500.0, 0),
 (71000.0, 12500.0, 0)]

15.1.1 Testing the classification function¶

In [7]:

def test_classifier(classifier, data):
    trues = 0
    falses = 0
    for mileage, price, is_bmw in data:
        if classifier(mileage, price) == is_bmw: #1
            trues += 1
        else:
            falses += 1 #1
    return trues / (trues + falses)

In [8]:

test_classifier(bmw_finder, all_car_data)

Out[8]:

0.59

15.1.3 Exercises¶

Exercise: Update the test_classifier function to print the number of true positives, true negatives, false positives, and false negatives. Printing these for the bmw_finder classifier, what can you tell about the performance of the classifier?

In [9]:

def test_classifier(classifier, data, verbose=False): #1
    true_positives = 0 #2
    true_negatives = 0
    false_positives = 0
    false_negatives = 0
    for mileage, price, is_bmw in data:
        predicted = classifier(mileage,price)
        if predicted and is_bmw: #3
            true_positives += 1
        elif predicted:
            false_positives += 1
        elif is_bmw:
            false_negatives += 1
        else:
            true_negatives += 1
            
    if verbose:        
        print("true positives %f" % true_positives) #4
        print("true negatives %f" % true_negatives)
        print("false positives %f" % false_positives)
        print("false negatives %f" % false_negatives)
            
    return (true_positives + true_negatives) / len(data) #5

In [10]:

test_classifier(bmw_finder,all_car_data,verbose=True)

true positives 18.000000
true negatives 100.000000
false positives 0.000000
false negatives 82.000000

Out[10]:

0.59

Exercise: Find a way to update the bmw_finder function slightly to improve its performance, and use the test_classifier function to confirm that your improved function has better than 59% accuracy.

In [11]:

def bmw_finder2(mileage,price):
    if price > 20000:
        return 1
    else:
        return 0

In [12]:

test_classifier(bmw_finder2, all_car_data)

Out[12]:

0.735

15.2 Picturing a decision boundary¶

15.2.1 Picturing the space of cars¶

In [13]:

def plot_data(ds):
    plt.scatter([d[0] for d in ds if d[2]==0],[d[1] for d in ds if d[2]==0],c='C1')
    plt.scatter([d[0] for d in ds if d[2]==1],[d[1] for d in ds if d[2]==1],c='C0',marker='x')
    plt.ylabel("Price ($)",fontsize=16)
    plt.xlabel("Odometer (mi)",fontsize=16)

In [14]:

plot_data(all_car_data)

In [15]:

plot_data(all_car_data)
plot_function(lambda x: 25000, 0, 200000, c='k')

In [16]:

plot_data(all_car_data)
plot_function(lambda x: 21000 - 0.07 * x, 0, 200000, c='k')

15.2.3 Implementing the classification function¶

In [17]:

def decision_boundary_classify(mileage,price):
    if price > 21000 - 0.07 * mileage:
        return 1
    else:
        return 0

In [18]:

test_classifier(decision_boundary_classify, all_car_data)

Out[18]:

0.805

15.2.4 Exercises¶

Mini-project: What is the decision boundary of the form $p = constant$ giving the best classification accuracy on the test data set?

In [19]:

def constant_price_classifier(cutoff_price):
    def c(x,p):
        if p > cutoff_price:
            return 1
        else:
            return 0
    return c

In [20]:

def cutoff_accuracy(cutoff_price):
    c = constant_price_classifier(cutoff_price)
    return test_classifier(c,all_car_data)

In [21]:

all_prices = [price for (mileage,price,is_bmw) in all_car_data]
max(all_prices,key=cutoff_accuracy)

Out[21]:

17998.0

In [22]:

test_classifier(constant_price_classifier(17998.0), all_car_data)

Out[22]:

0.795

15.3 Framing classification as a regression problem¶

15.3.1 Scaling the raw car data¶

In [23]:

def make_scale(data):
    min_val = min(data) #1
    max_val = max(data)
    def scale(x): #2
        return (x-min_val) / (max_val - min_val)
    def unscale(y): #3
        return y * (max_val - min_val) + min_val
    return scale, unscale #4

price_scale, price_unscale = make_scale([x[1] for x in all_car_data]) #5
mileage_scale, mileage_unscale = make_scale([x[0] for x in all_car_data])

In [24]:

scaled_car_data = [(mileage_scale(mileage), price_scale(price), is_bmw) 
                    for mileage,price,is_bmw in all_car_data]

In [25]:

plot_data(scaled_car_data)

15.3.2 Measuring BMWness of a car¶

In [26]:

plot_data(scaled_car_data)
plot_function(lambda x: 0.56 - 0.35*x,0,1,c='k')

In [27]:

scalar_field_heatmap(lambda x,p: p + 0.35*x - 0.56, 0, 1,0,1)
plt.ylabel('Price', fontsize=16)
plt.xlabel('Mileage', fontsize=16)
plot_function(lambda x: 0.56-0.35*x,0,1,c='k')

<Figure size 504x504 with 0 Axes>

15.3.3 Introducing the sigmoid function¶

In [28]:

from math import exp
def sigmoid(x):
    return 1 / (1+exp(-x))

In [29]:

plot_function(sigmoid,-5,5)

15.3.4 Composing the sigmoid function with other functions¶

In [30]:

def f(x,p):
    return p + 0.35 * x - 0.56

def l(x,p):
    return sigmoid(f(x,p))

In [31]:

scalar_field_heatmap(l, 0, 1,0,1)
# plot_data(scaled_car_data,white=True)
plt.ylabel('Price', fontsize=16)
plt.xlabel('Mileage', fontsize=16)
plot_function(lambda x: 0.56-0.35*x,0,1,c='k')

<Figure size 504x504 with 0 Axes>

In [32]:

plot_scalar_field(l, -5, 5, -5, 5)

In [33]:

plot_scalar_field(f,-5,5,-5,5)

15.3.4 Exercises¶

Exercise: Find a function $h(x)$ such that large positive values of $x$ cause $h(x)$ to be very close to $0$, large negative values of $x$ cause $h(x)$ to be very close to $1$, and $h(3) = 0.5$.

In [34]:

def h(x):
    return sigmoid(3-x)

In [35]:

plot_function(h,-5,10)

15.4 Exploring possible logistic functions¶

15.4.1 Parametrizing logistic functions¶

In [36]:

def make_logistic(a,b,c):
    def l(x,p):
        return sigmoid(a*x + b*p - c)
    return l

15.4.2 Measuring the quality of fit for a logistic function¶

In [37]:

def simple_logistic_cost(a,b,c):
    l = make_logistic(a,b,c)
    errors = [abs(is_bmw-l(x,p)) 
              for x,p,is_bmw in scaled_car_data]
    return sum(errors)

In [38]:

from math import log
plot_function(lambda x: -log(x),0.01,1)

In [39]:

-log(0.01)

Out[39]:

4.605170185988091

In [40]:

-log(0.001)

Out[40]:

6.907755278982137

In [41]:

def point_cost(l,x,p,is_bmw): #1
    wrong = 1 - is_bmw
    return -log(abs(wrong - l(x,p)))

def logistic_cost(a,b,c):
    l = make_logistic(a,b,c)
    errors = [point_cost(l,x,p,is_bmw) #2 
              for x,p,is_bmw in scaled_car_data]
    return sum(errors)

15.4.3 Testing different logistic functions¶

In [42]:

def plot_line(acoeff,bcoeff,ccoeff,**kwargs):
    a,b,c = acoeff, bcoeff, ccoeff
    
    # black by default
    if 'c' not in kwargs:
        kwargs['c'] = 'k'
        
    if b == 0:
        plt.plot([c/a,c/a],[0,1])
    else:
        def y(x):
            return (c-a*x)/b
        plt.plot([0,1],[y(0),y(1)],**kwargs)

In [43]:

plot_data(scaled_car_data)
plot_line(0.35,1,0.56,)
plot_line(1,1,1)

In [44]:

logistic_cost(0.35,1,0.56)

Out[44]:

130.92490748700456

In [45]:

logistic_cost(1,1,1)

Out[45]:

135.56446830870456

15.4.4 Exercises¶

Exercise: Implement the function plot_line(a,b,c) referenced in the chapter that plots the line $ax + by = c$ where $0 \leq x \leq 1$ and $0 \leq y \leq 1$.

Solution: (see implementation above)

Mini-project: What does the graph of $k(x,y) = \sigma(x^2 + y^2 - 1)$ look like? What does the decision boundary look like, meaning the set of points where $k(x,y) = 0.5$?

In [46]:

def k(x,y):
    return sigmoid(x**2 + y**2 - 1)
plot_scalar_field(k,-3,3,-3,3)

Mini-project: Two equations $2x + y = 1$ and $4x + 2y = 2$ define the same line, and therefore the same decision boundary. Are the logistic functions $\sigma(2x + y - 1)$ and $\sigma(4x + 2y - 2)$ the same?

In [47]:

plot_scalar_field(lambda x,y:sigmoid(2*x+y-1),-3,3,-3,3)

In [48]:

plot_scalar_field(lambda x,y:sigmoid(4*x+2*y-2),-3,3,-3,3)

15.5 Finding the best logistic function¶

15.5.1 Gradient descent in three dimensions¶

In [49]:

from math import sqrt
def length(v):
    return sqrt(sum([vi*vi for vi in v]))

def secant_slope(f,xmin,xmax):
    return (f(xmax) - f(xmin)) / (xmax - xmin)

def approx_derivative(f,x,dx=1e-6):
    return secant_slope(f,x-dx,x+dx)

In [50]:

def approx_gradient(f,x0,y0,dx=1e-6):
    partial_x = approx_derivative(lambda x:f(x,y0),x0,dx=dx)
    partial_y = approx_derivative(lambda y:f(x0,y),y0,dx=dx)
    return (partial_x,partial_y)

In [51]:

def approx_gradient3(f,x0,y0,z0,dx=1e-6):
    partial_x = approx_derivative(lambda x:f(x,y0,z0),x0,dx=dx)
    partial_y = approx_derivative(lambda y:f(x0,y,z0),y0,dx=dx)
    partial_z = approx_derivative(lambda z:f(x0,y0,z),z0,dx=dx)
    return (partial_x,partial_y,partial_z)

In [52]:

def gradient_descent3(f,xstart,ystart,zstart,
                      tolerance=1e-6,max_steps=1000):
    x = xstart
    y = ystart
    z = zstart
    grad = approx_gradient3(f,x,y,z)
    steps = 0
    while length(grad) > tolerance and steps < max_steps:
        x -= 0.01 * grad[0]
        y -= 0.01 * grad[1]
        z -= 0.01 * grad[2]
        grad = approx_gradient3(f,x,y,z)
        steps += 1
    return x,y,z

15.5.2 Using gradient descent to find the best fit¶

In [53]:

gradient_descent3(logistic_cost,1,1,1,max_steps=100)

Out[53]:

(0.21114493148496014, 5.045439728207488, 2.1260122566471376)

In [54]:

gradient_descent3(logistic_cost,1,1,1,max_steps=200)

Out[54]:

(0.8845715265377516, 6.65754318727634, 2.9550572849988455)

To make a cool graph of the decision boundary moving as gradient descent runs:

In [55]:

plot_data(scaled_car_data)
for i in range(0,1000,100):
    a,b,c = gradient_descent3(logistic_cost,1,1,1,max_steps=i)
    plot_line(a,b,c,alpha=i/1000,c='k')

In [56]:

gradient_descent3(logistic_cost,1,1,1,max_steps=8000)

Out[56]:

(3.7167003088210118, 11.422062406779267, 5.596878365032182)

In [57]:

plot_data(scaled_car_data)
plot_line(0.35,1,0.56)
plot_line(3.7167003153580045, 11.422062409195114, 5.596878367305919)

15.5.3 Testing an understanding the best logistic classifier¶

In [58]:

def best_logistic_classifier(x,p):
    l = make_logistic(3.7167003153580045, 11.422062409195114, 5.596878367305919)
    if l(x,p) > 0.5:
        return 1
    else:
        return 0

In [59]:

test_classifier(best_logistic_classifier,scaled_car_data)

Out[59]:

0.8

In [60]:

plot_scalar_field(make_logistic(0.35,1,0.56), -2, 2, -2, 2)

In [61]:

plot_scalar_field(make_logistic(3.7167003153580045, 11.422062409195114, 5.596878367305919), -2, 2, -2, 2)

15.5.5 Exercises¶

Exercise: Modify the gradient_descent3 function to print out the total number of steps taken before it returns its result. How many steps does the gradient descent take to converge for logistic_cost?

In [62]:

def gradient_descent3(f,xstart,ystart,zstart,tolerance=1e-6,max_steps=1000):
    x = xstart
    y = ystart
    z = zstart
    grad = approx_gradient3(f,x,y,z)
    steps = 0
    while length(grad) > tolerance and steps < max_steps:
        x -= 0.01 * grad[0]
        y -= 0.01 * grad[1]
        z -= 0.01 * grad[2]
        grad = approx_gradient3(f,x,y,z)
        steps += 1
    print(steps)
    return x,y,z

In [63]:

gradient_descent3(logistic_cost,1,1,1,max_steps=8000)

Out[63]:

(3.7167003088210118, 11.422062406779267, 5.596878365032182)

Mini project: Write an approx_gradient function that can calculate the gradient of a function in any number of dimensions. Then write a gradient_descent function that works in any number of dimensions. To test your gradient_descent on an $n$-dimensional function, you can try a function like $f(x_1,x_2, \ldots ,x_n) = (x_1 - 1)^2 + (x_2 - 1)^ + \ldots + (x_n - 1)^2$ where $x_1, x_2, \ldots , x_n$ are the $n$ input variables to the function $f$. The minimum of this function should be $(1,1, \ldots , 1)$, an $n$-dimensional vector with 1 in every entry.

In [64]:

def partial_derivative(f,i,v,**kwargs):
    def cross_section(x):
        arg = [(vj if j != i else x) for j,vj in enumerate(v)]
        return f(*arg)
    return approx_derivative(cross_section, v[i], **kwargs)

In [65]:

def approx_gradient(f,v,dx=1e-6):
    return [partial_derivative(f,i,v) for i in range(0,len(v))]

In [66]:

def gradient_descent(f,vstart,tolerance=1e-6,max_steps=1000):
    v = vstart
    grad = approx_gradient(f,v)
    steps = 0
    while length(grad) > tolerance and steps < max_steps:
        v = [(vi - 0.01 * dvi) for vi,dvi in zip(v,grad)]
        grad = approx_gradient(f,v)
        steps += 1
    return v

In [67]:

def sum_squares(*v):
    return sum([(x-1)**2 for x in v])

In [68]:

v = [2,2,2,2,2]
gradient_descent(sum_squares,v)

Out[68]:

[1.0000002235452137,
 1.0000002235452137,
 1.0000002235452137,
 1.0000002235452137,
 1.0000002235452137]