In [20]:

%run ../../common_functions/import_all.py

from scipy import optimize
from scipy.integrate import quad, odeint
from scipy.interpolate import interp1d
from scipy.signal import detrend
from scipy.spatial import distance
from matplotlib.legend_handler import HandlerLine2D

from functools import partial 
from ipywidgets import interact

from common_functions.setup_notebook import set_css_style, setup_matplotlib, config_ipython
config_ipython()
setup_matplotlib()
set_css_style()

dataset = '../../datasets/oldfaithful.txt'

Out[20]:

Scientific Python: Numpy and Scipy, and some more¶

Some high-level notes to these two libraries, not intended to be comprehensive, plus some function divertissements.

Numpy

Arrays and Matrices
Linear Algebra
Playing with Polynomials
Various

Scipy

Using optimize: fit and solve equations
The stats module: statistical computation
The interpolate module:
Integrate
Signal and noise
Mathematical distances

Note: some of the stuff presented here comes from the Scipy lectures http://www.scipy-lectures.org/index.html

Divertissements with functions

Using the interact widget to visualise how a function can change with parameters.

Numpy¶

"Num" stands for "numerical", this is a library devoted to numerical computation and it's a building block of many other projects.

Arrays and Matrices¶

These objects constitute the basic blocks for manipulation

Attributes, slicing reshaping/resizing/flattening¶

In [3]:

# NOTE: np.array is function to create an ndarray (n-dimensional array), ndarray not to be used to create

# Vector: an array

print('* A vector')
vec = np.array([1, 2, 3, 4, 5])   
print('    Dimension %d and shape %s: %s' %(vec.ndim, vec.shape, vec))
print('    Slice of vector: ', vec[2:4])
print('    Slice of vector with step: ', vec[1:4:2])
print('    Slice with newaxis on column: ', vec[:, np.newaxis])
print('    Slice with newaxis on row: ', vec[np.newaxis, :])

* A vector
    Dimension 1 and shape (5,): [1 2 3 4 5]
    Slice of vector:  [3 4]
    Slice of vector with step:  [2 4]
    Slice with newaxis on column:  [[1]
 [2]
 [3]
 [4]
 [5]]
    Slice with newaxis on row:  [[1 2 3 4 5]]

In [2]:

# Matrix, a 2D array

print('* A matrix')
mat = np.array([[1,2,3, 0], [2,3,4, 1], [4, 0, 1, 0]])
print('    This is an ndarray of dimension %d and shape %s:\n %s' %(mat.ndim, mat.shape, mat))
print('    It contains %d elements (size)' %mat.size)
print('    Its element on row 2, col 3 is %d' %mat[1,2])
print('    A slice of it is (second column)', mat[:,1])

* A matrix
    This is an ndarray of dimension 2 and shape (3, 4):
 [[1 2 3 0]
 [2 3 4 1]
 [4 0 1 0]]
    It contains 12 elements (size)
    Its element on row 2, col 3 is 4
    A slice of it is (second column) [2 3 0]

In [5]:

# Reshaping, resizing and flattening

print('* Reshaping and resizing', ['t_red'])
a = np.arange(6)
print('    Original vector: ', a)
print('    Reshaping vector to shape (2, 3) (change shape, return reshaped ndarray): ')
a.reshape((2, 3))
print('    Resizing vector to shape (2, 2) (change shape and size, do in place and return None): ')
a
print('    Original matrix: ', mat)
print('    Flattened matrix: ', mat.ravel())

* Reshaping and resizing ['t_red']
    Original vector:  [0 1 2 3 4 5]
    Reshaping vector to shape (2, 3) (change shape, return reshaped ndarray):

Out[5]:

array([[0, 1, 2],
       [3, 4, 5]])

    Resizing vector to shape (2, 2) (change shape and size, do in place and return None):

Out[5]:

array([0, 1, 2, 3, 4, 5])

    Original matrix:  [[1 2 3 0]
 [2 3 4 1]
 [4 0 1 0]]
    Flattened matrix:  [1 2 3 0 2 3 4 1 4 0 1 0]

In [6]:

# Matrices of zeros and ones

print('* Matrices of zeros and ones', ['t_red'])
np.zeros((3, 4))
np.ones((1, 5))

* Matrices of zeros and ones ['t_red']

Out[6]:

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

Out[6]:

array([[1., 1., 1., 1., 1.]])

Operations on arrays/matrices¶

In [7]:

# Setting the vars to use

print('* Vectors and matrices to use')
v = np.array([1, 2, 3])
A = np.array([[1,2, 0], [2,3,4]])
B = np.array([[0, 1], [2, 0], [1, 1]])
C = np.array([[3, 5, 1], [0, 0, 2]])
print('    vector v is', v)
print('    Matrix A is')
print(A)
print('    Matrix B is')
print(B)
print('    Matrix C is')
print(C)

* Vectors and matrices to use
    vector v is [1 2 3]
    Matrix A is
[[1 2 0]
 [2 3 4]]
    Matrix B is
[[0 1]
 [2 0]
 [1 1]]
    Matrix C is
[[3 5 1]
 [0 0 2]]

In [8]:

# matrix transpose

print('* Matrix and transpose')
print('    Transpose of A is\n', A.T)

* Matrix and transpose
    Transpose of A is
 [[1 2]
 [2 3]
 [0 4]]

In [9]:

# Some Matrix properties of elements

print('* Properties of matrix elements')
print('    Sum, mean, std of elements: ', A.sum(), A.mean(), A.std())
print('    Sum of A on axis 0: ', A.sum(axis=0))
print('    Sum of A on axis 1: ', A.sum(axis=1))

* Properties of matrix elements
    Sum, mean, std of elements:  12 2.0 1.2909944487358056
    Sum of A on axis 0:  [3 5 4]
    Sum of A on axis 1:  [3 9]

In [10]:

# Product of matrices

print('* Matrix product')
print('    Product A * B is')
print(np.dot(A, B))

* Matrix product
    Product A * B is
[[ 4  1]
 [10  6]]

In [11]:

# scalar * vector; scalar * matrix

print('* Multiplication and sum with scalar (v and A)')
print('    2 * v: ', 2 * v)
print('    2 * A: ', 2 * A)
print('    v + 1: ', v + 1)
print('    A + 1: ', A + 1)

* Multiplication and sum with scalar (v and A)
    2 * v:  [2 4 6]
    2 * A:  [[2 4 0]
 [4 6 8]]
    v + 1:  [2 3 4]
    A + 1:  [[2 3 1]
 [3 4 5]]

In [12]:

# Matrix arithmetics

print('* Arithmetics with matrices')
print('    Summing A and C')
print(A + C)
print('    Subtracting C from A')
print(A - C)

* Arithmetics with matrices
    Summing A and C
[[4 7 1]
 [2 3 6]]
    Subtracting C from A
[[-2 -3 -1]
 [ 2  3  2]]

In [13]:

# sorting a matrix

print('* Sorting matrix')
A_copy = A.copy()
A.sort()
print('    Sorting A: ', A)
A = A_copy.copy()
A.sort(axis=0)
print('    Sorting A on axis 0', A)
A = A_copy.copy()
A.sort(axis=1)
print('    Sorting A on axis 1', A)

* Sorting matrix
    Sorting A:  [[0 1 2]
 [2 3 4]]
    Sorting A on axis 0 [[1 2 0]
 [2 3 4]]
    Sorting A on axis 1 [[0 1 2]
 [2 3 4]]

In [14]:

# array shuffling at random

print('* Randomly shuffling array:')
np.random.shuffle(v)                 # shuffles and returns None
print(v)

* Randomly shuffling array:
[3 2 1]

Some linear Algebra¶

In [15]:

# set the vars to use

v = np.array([1, 2, 1, 1])
A = np.array([[1, 2, 3], [0, 1, 0], [0, 2, 1]])

In [16]:

# determinant & co

print('* Determinant, trace, inverse, norm')
print('    Determinant of matrix A: ', np.linalg.det(A))
print('    Trace of matrix A:', np.trace(A))
print('    Inverse of matrix A: ', np.linalg.inv(A))
print('    Norm of vector v and of matrix A:', np.linalg.norm(v), np.linalg.norm(A))

* Determinant, trace, inverse, norm
    Determinant of matrix A:  1.0
    Trace of matrix A: 3
    Inverse of matrix A:  [[ 1.  4. -3.]
 [ 0.  1.  0.]
 [-0. -2.  1.]]
    Norm of vector v and of matrix A: 2.6457513110645907 4.47213595499958

In [17]:

# eigenthings

print('* Eigenvalues/Eigenvectors')
print('    Eigenvalues and eigenvectors of matrix A:', np.linalg.eig(A))

* Eigenvalues/Eigenvectors
    Eigenvalues and eigenvectors of matrix A: (array([1., 1., 1.]), array([[ 1.00000000e+00, -1.00000000e+00,  1.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  8.21730110e-33],
       [ 0.00000000e+00,  7.40148683e-17, -7.40148683e-17]]))

In [18]:

# SVD

print('* Single Value Decomposition')
print('    SVD of matrix A:', np.linalg.svd(A))

* Single Value Decomposition
    SVD of matrix A: (array([[ 0.86349489,  0.47752005, -0.16233045],
       [ 0.15573818, -0.55857386, -0.81470293],
       [ 0.47971053, -0.67821077,  0.55669378]]), array([4.27194722, 1.3109472 , 0.17856195]), array([[ 0.20213145,  0.66530528,  0.71868753],
       [ 0.36425575, -0.73226084,  0.57542317],
       [-0.9090988 , -0.14547494,  0.39035422]]))

In [19]:

# System of linear equations

A = np.array([[1, 2], [3, 1]])
b = np.array([0, 1])
print('* Linear systems')
print('    Solving linear system Ax = b:', np.linalg.solve(A, b))
print('    Least Square solution for Ax = b: ', np.linalg.lstsq(A, b))

* Linear systems
    Solving linear system Ax = b: [ 0.4 -0.2]
    Least Square solution for Ax = b:  (array([ 0.4, -0.2]), array([], dtype=float64), 2, array([3.61803399, 1.38196601]))

/Users/martina/Desktop/Mallzee/repos/plantation/venv/lib/python3.7/site-packages/ipykernel_launcher.py:7: FutureWarning: `rcond` parameter will change to the default of machine precision times ``max(M, N)`` where M and N are the input matrix dimensions.
To use the future default and silence this warning we advise to pass `rcond=None`, to keep using the old, explicitly pass `rcond=-1`.
  import sys

Playing with polynomials¶

Basics¶

In [20]:

print('y = x^2 -2x + 1')
y = np.poly1d([1, -2, 1])
print('* Order of polynomial is ', y.order)
print('* y(2) = ', y(2))
print('* Roots of polynomial are ', y.roots)

y = x^2 -2x + 1
* Order of polynomial is  2
* y(2) =  1
* Roots of polynomial are  [1. 1.]

A polynomial fit to points¶

In [21]:

# Setting x's to be 50 linearly spaced points in [0, 1]
x = np.linspace(0, 1, num=20)

# Setting y to be a noised cosine 
y = np.cos(x) + 0.3*np.random.rand(20)

# Fitting points (x, y) to a polynomial of deg 3
fit_coeff = np.polyfit(x, y, 3)

# Build the polynomial with the fitting coefficients
p = np.poly1d(fit_coeff)

# Considering another interval of x's and plotting original points and fitting curve
x2 = np.linspace(0, 1, 200)
plt.plot(x, y, 'o', x2, p(x2), '-') 
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.title('A noisy cosine')
plt.show();

Numpy various¶

Loading datasets from file¶

In [2]:

# Load data from text file (including csv) into Numpy matrix
data = np.loadtxt(dataset, 
                  delimiter=' ', 
                  skiprows=1,                                                 # skip the first 1 lines (header)
                  usecols=(1,2)                                             # select cols 1 (x) and 2 (y)
                  )

# If there are missing values, run this instead
# data = np.genfromtxt(data_folder + filename, 
#                      delimiter=delimiter, 
#                      skip_header=1,                                       # skip the first 1 lines (header)
#                      usecols=features,                                    # select which features to use
#                      missing='?'                                          # what missing values are mapped to
#                     )

Playing with NaNs¶

In [9]:

print(np.log(0), np.log(-1))
print(np.isnan([1.,np.log(-1), 2]))

-inf nan
[False  True False]

/usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:1: RuntimeWarning: divide by zero encountered in log
  if __name__ == '__main__':
/usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:1: RuntimeWarning: invalid value encountered in log
  if __name__ == '__main__':
/usr/local/lib/python3.6/site-packages/ipykernel/__main__.py:2: RuntimeWarning: invalid value encountered in log
  from ipykernel import kernelapp as app

IO Numpy data format¶

In [10]:

data = np.ones((3, 3))

# Saving and loading numpy array
np.save('mat.npy', data)
data_loaded = np.load('mat.npy')

SciPy¶

"Sci" stands, quite obviously, for "scientific": this is the library devolted to calculus, statistics and all the blocks of scientific manipulation.

Using `optimize`¶

Finding the minimum of a function¶

In [23]:

# function definition and plot of it
def f(x):
    return x**2 + 10*np.sin(x)

x = np.arange(-10, 10, 0.1)
plt.plot(x, f(x))
plt.show();

# Gradient Descent through BFGS (finds local min)
print('BFGS')
optimize.fmin_bfgs(f, 0)

# Global optimizer though bashinopping (local optimizer + random sampling of starting points)
print('BASINHOPPING')
optimize.basinhopping(f, 0)

# NOTE: there are several other optimizers available

Out[23]:

[<matplotlib.lines.Line2D at 0x10af69048>]

BFGS
Optimization terminated successfully.
         Current function value: -7.945823
         Iterations: 5
         Function evaluations: 18
         Gradient evaluations: 6

Out[23]:

array([-1.30644012])

BASINHOPPING

Out[23]:

                        fun: -7.9458233756152845
 lowest_optimization_result:       fun: -7.9458233756152845
 hess_inv: array([[ 0.08581276]])
      jac: array([  1.19209290e-07])
  message: 'Optimization terminated successfully.'
     nfev: 15
      nit: 3
     njev: 5
   status: 0
  success: True
        x: array([-1.30644001])
                    message: ['requested number of basinhopping iterations completed successfully']
      minimization_failures: 0
                       nfev: 1524
                        nit: 100
                       njev: 508
                          x: array([-1.30644001])

Finding roots of functions¶

In [24]:

roots = []

#f = x**2 + 10*np.sin(x) is the function defined above
root = optimize.fsolve(f, 1)      # 1 is the initial guess
print(root)
roots.append(root)

# from plot, function has another root, so let us start from -2.5
root = optimize.fsolve(f, -2.5)      # 1 is the initial guess
print(root)
roots.append(root)

[ 0.]
[-2.47948183]

Fit curve knowing the functional form, fitting the params¶

In [28]:

x = np.linspace(-10, 10, num=20)
y = f(x) + np.random.randn(x.size)

def f_to_fit(x, a, b):
    return a*x**2 + b*np.sin(x)

guess = [2, 2]
params, params_covariance = optimize.curve_fit(f_to_fit, x, y, guess)

# Plotting all stuff together
f_line, = plt.plot(x, f(x), label='original function')
fitted_points, = plt.plot(x, f_to_fit(x, params[0], params[1]), 'o', label='points of fitted curve')
original, = plt.plot(roots, f(np.array(roots)), 'x', label='roots')
legend = plt.legend(handler_map={f_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();

The `stats` module¶

Histograms and PDFs¶

In [29]:

# Build the bins separators, width 1, from -10 to 10
bins_sep = np.arange(-9, 10)

# Extract 1000 normally distributed points
normal_points = np.random.normal(size=1000)

# Histogram the points and compute the normal PDF over bins
histogram = np.histogram(normal_points, bins=bins_sep, normed=True)[0]
bins = 0.5*(bins_sep[1:] + bins_sep[:-1])    # to get the bins centers

# Normal PDF over specified bins
normal_pdf = stats.norm.pdf(bins)

# Plot both things
hist_line, = plt.plot(bins, histogram, 'o', label='hist norm points') 
pdf_line, = plt.plot(bins, normal_pdf, label='Normal PDF')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();

Quickly compute percentiles¶

In [30]:

# 50 percentile of previously defined normal points

print(stats.scoreatpercentile(normal_points, 50))

-0.0508135291078

The `interpolate` module¶

Linear and cubic interpolation between points¶

In [35]:

# Defining noisy sine wave
t = np.linspace(0, 1, 10)
middle_t = np.linspace(0, 1, 20)
noise = (np.random.random(10)*2 - 1) * 1e-1
noisy_sin = np.sin(2 * np.pi * t) + noise

# Defining linear interpolation between points
linear_int = interp1d(t, noisy_sin)(middle_t)

# Defining cubic interpolation between points
cubic_int = interp1d(t, noisy_sin, kind='cubic')(middle_t)

# Plot all
noisy_sin_line, = plt.plot(t, noisy_sin, label='noisy sine wave')
noisy_sin_points, = plt.plot(t, noisy_sin, 'o', color='blue')
linear_int_points, = plt.plot(middle_t, linear_int, 'x', label='lin int')
cubic_int_points, = plt.plot(middle_t, cubic_int, 'x', label='cub int')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=3)
plt.show();

The `integrate` module¶

Compute definite integral of function¶

In [38]:

res, err = quad(np.sin, 0, np.pi/2)
print(res, err)

# NOTE: there are other integration methods

0.9999999999999999 1.1102230246251564e-14

Solve ODEs¶

In [41]:

# Defining dy/dt, iteration_count is the counter for the iteration till convergence
def y_dot(y, t, iteration_count):
    iteration_count += 1
    return -2 * y

# Defining t, the vector of iteration count, then solve dy/dt = -2y
t = np.linspace(0, 10, 100)
iteration_count = np.zeros((1,), dtype=np.uint16)
y_sol, infodict = odeint(y_dot, 1, t, args=(iteration_count,), full_output=True)

# Plot solution y(t)
plt.plot(t, y_sol)
plt.plot()
plt.show();

# NOTE: odeint solves a system of ODEs as well
# NOTE: there is no partial diff eq solver in Scipy

The `signal` module¶

Remove a linear trend from noisy signal¶

In [45]:

x = t + np.random.normal(size=100)
plt.plot(t, x, label='original')
plt.plot(t, detrend(x), label='detrended')
legend = plt.legend(handler_map={hist_line: HandlerLine2D(numpoints=2)}, loc=2)
plt.show();

Mathematical distances¶

Quick runthrough of many possible ways to compute the distance between points, as supported by SciPy.

In [47]:

# Define two 1D arrays and two matrices

u = np.array([1, 1, 2])
v = np.array([1, 3, 1])
A = np.array([[1, 2, 0], [2,3,4]])
B = np.array([[0, 1, 1], [2, 0, 1]])

In [50]:

print('* Distances between the two vectors')
print('    Euclidean:', distance.euclidean(u, v))
print('    Cosine:', distance.cosine(u, v))
print('    Manhattan:', distance.cityblock(u, v))
print('    Minkowski with p=3:', distance.minkowski(u, v, 3))
print('    Chebyshev:', distance.chebyshev(u, v))
print('    Hamming:', distance.hamming(u, v))

* Distances between the two vectors
    Euclidean: 2.2360679775
    Cosine: 0.261451054124
    Manhattan: 3
    Minkowski with p=3: 2.08008382305
    Chebyshev: 2
    Hamming: 0.666666666667

In [51]:

print('* Distances between each pair of rows in matrices')
print(distance.cdist(A, B, metric='euclidean'))
# cdist is built as (numbers are row indices)
# A0B0 A0B1 A0B2
# A1B0 A1B1 A1B2

* Distances between each pair of rows in matrices
[[ 1.73205081  2.44948974]
 [ 4.12310563  4.24264069]]

In [53]:

print('* Pairwise distances between rows in matrix')
print(distance.pdist(np.array([[0, 0], [1, 1], [1, 2]]), metric='euclidean'))
# pdist is built as (numbers are row indices)
# A0A1 A0A2 A1A2           # len will be N*(N-1) where N is the number of rows

* Pairwise distances between rows in matrix
[ 1.41421356  2.23606798  1.        ]

Divertissements with functions¶

... and interaction.

Jupyter has several wonderful widgets, like interact, which creates UI controls for you to manipulate graphics.

In [66]:

# Define a line and a parabola parametrically

def f_linear(x, a, b): 
    """A parametric linear function."""
    return a*x + b

def quad(x, a, b, c): 
    """A parametric quadratic function (parabola)."""
    return a*x**2 + b*x + c

In [67]:

# define a range for x

x = np.linspace(-10, 10, num=50)

In [88]:

# plot the function with some chosen params and some noisy points (value given by the function with added random noise)

f = partial(f_linear, a=1,b=10)   # this will behave like the called function but with chosen parameters

y = [f_linear(x_, 1, 10) + np.random.randint(-10, 10) for x_ in x]
plt.scatter(x, y, label='noisy points')
plt.plot(x, f(x), c='b', label='f');
plt.title('$x +10$');
plt.legend();

In [90]:

# now do the same but use the widget to have choseable params
# the noisy points will be the same as earlier 

@interact(a_=1, b_=10)

def plot_with_widget(a_,b_):

    f = partial(f_linear, a=a_,b=b_)

    plt.scatter(x, y, label='noisy points')
    plt.plot(x, f(x), c='b', label='f');
    plt.title('${a}x + {b}$'.format(a=a_, b=b_));
    plt.legend();

interactive(children=(IntSlider(value=1, description='a_', max=3, min=-1), IntSlider(value=10, description='b_…

In [ ]:

Scientific Python: Numpy and Scipy, and some more¶

Numpy¶

Arrays and Matrices¶

Attributes, slicing reshaping/resizing/flattening¶

Operations on arrays/matrices¶

Some linear Algebra¶

Playing with polynomials¶

Basics¶

A polynomial fit to points¶

Numpy various¶

Loading datasets from file¶

Playing with NaNs¶

IO Numpy data format¶

SciPy¶

Using optimize¶

Finding the minimum of a function¶

Finding roots of functions¶

Fit curve knowing the functional form, fitting the params¶

The stats module¶

Histograms and PDFs¶

Quickly compute percentiles¶

The interpolate module¶

Linear and cubic interpolation between points¶

The integrate module¶

Compute definite integral of function¶

Solve ODEs¶

The signal module¶

Remove a linear trend from noisy signal¶

Mathematical distances¶

Divertissements with functions¶

Using `optimize`¶

The `stats` module¶

The `interpolate` module¶

The `integrate` module¶

The `signal` module¶