#!/usr/bin/env python # coding: utf-8 # # Numpy tutorials # # ## Content # # - Basic # - Matrix # - Vector Math # # ## Basic # # In[1]: # import libraries import numpy as np import matplotlib.pyplot as plt # In[2]: # name it a1 = np.array([9, 5, 1, 5, 67, 7, 8, 6, 2, 3, 4, 5]) # In[3]: a1 # ### Indexing / Slicing # # In[4]: a1[2] # In[5]: a1[2:] # In[6]: a1[:-4] # In[7]: a1[1:-4] # In[8]: a1[1:] # In[9]: a1[:-1] # In[10]: a1 > 3 # bool # In[11]: a1[a1 > 3] # In[12]: a1[a1 % 4 == 0] # In[13]: name = np.array(['Jim', 'Luke', 'Josh', 'Pete']) # In[14]: # fist_value = lambda s:s[0] first_letter = np.vectorize(lambda s: s[0])(name) == 'J' # In[15]: first_letter # In[16]: name[first_letter] # ### `np.zeros` # # In[17]: # a2 = np.zeros(10) no name for coviences np.zeros(10) # ### `random` and `randn` # # In[18]: np.random.random(10) # 0-1 # In[19]: np.random.randn(10) # -1 ~ 1 normal # ### `linspace` and `arange` # # In[20]: np.linspace(0, 10, 21) # 21: number of elements # In[21]: np.linspace(0, 10, 20) # In[22]: np.arange(0, 2, 0.02) # 0.02: number of spacing, step length. # ### array operations # # In[23]: a1 # In[24]: 2 * a1 # In[25]: 1 / a1 # In[ ]: # In[26]: 1 / a1 + 7 # In[27]: 1 / a1 + a1 + 7 # In[28]: a1 ** 2 # In[29]: x = np.linspace(0, 1, 100) y = np.sin(x) # In[30]: plt.plot(x, y) # In[31]: # More math function check the np doc "Mathematical functions" # logistc funtion def logistic_function(a, b, t): return np.exp(-a + b * t)/(1 + np.exp(-a + b*t)) # In[32]: # let a = 5, b = 0.02 a = 1 b = 1 t = np.arange(0, 100) y_1 = logistic_function(a, b, t) # In[33]: plt.plot(t, y_1) # ### Calculus and Statistics in np # # In[34]: a1 = 3 * np.random.randn(100000) + 13 # mean 13 and std =3 # In[35]: np.mean(a1) # In[36]: np.std(a1) # In[37]: np.percentile(a1, 80) # the number of percentiles # ### Derivative and integral # # In[38]: x = np.linspace(0, 4, 100) y = x ** 2 dydx = np.gradient(y, x) # x is the spcae\steps dy2dx2 = np.gradient(np.gradient(y, x), x) # In[39]: plt.plot(x, y) plt.plot(x, dydx) plt.plot(x, dy2dx2) # In[40]: y_2 = np.cumsum(dydx) # integral plt.plot(x, y_2) plt.plot(x, dydx) # In[41]: y_t1 = np.exp(-x/10) * np.cos(3*x) plt.plot(x, y_t1) # In[42]: np.mean(y_t1), np.std(y_t1) # In[43]: # for x domain [0.5,2] here * NOT & y_t1[(x >= 0.5) * (x <= 2)] # In[44]: y_t1_range = y_t1[(x >= 0.5) * (x <= 2)] # In[45]: np.mean(y_t1_range), np.std(y_t1_range) # In[46]: np.percentile(y_t1_range, 80) # In[47]: dy_t1dx = np.gradient(y_t1) plt.plot(x, dy_t1dx) # In[48]: x[1:][dy_t1dx[1:]*dy_t1dx[:-1] < 0] # dydx=0 # In[49]: # sum up 0 to 10000 except for those can be divided by 4 or 7 in one line # nums_ls = [i for i in range(10000) if i % 4 != 0 & i % 7 != 0] nums = np.arange(0, 10001, 1) sum(nums[(nums % 4 != 0)*(nums % 7 != 0)]) # In[50]: # Flower petal theta = np.linspace(0, 2 * np.pi, 1000) r_theta = [1 + 3/4 * np.sin(3 * _) for _ in theta] plt.plot(theta, r_theta) # In[51]: # polar cordinates x = r_theta * np.cos(theta) y = r_theta * np.sin(theta) plt.plot(x, y) # ## Matrix # # ### Multi-Dimensional Arrays # # In[52]: A_1 = np.array([[1, 2, 3], [6, 7, 8], [4, 5, 9]]) A_1 # In[53]: A_1*2 # In[54]: 2 / A_1 # In[55]: # Booling A_1 > 5 # In[56]: A_2 = np.random.randn(3, 3) A_2 # In[57]: # nesting 很高级 A_2[A_1 > 5] # In[58]: # Element Indexing A_1 # In[59]: A_1[:, 0] # first col # In[60]: A_1[:, 1] # In[61]: # just want 7,5 above A_1[1:, 1] # In[62]: # 2-d Function x = np.linspace(0, 10, 1000) y = np.linspace(0, 10, 1000) xv, yv = np.meshgrid(x, y) zv = xv**2 + yv ** 2 # In[63]: plt.contour(xv, yv, zv, levels=30) plt.colorbar() # ### `ravel()` method # # - Turn any N-Dimensional array to 1-D array # # In[64]: A_1.ravel() # ### Basic Linear Algebra # # In[65]: # Equations A for coefficients matrix and c For constants: Qramer rules A = np.array([[3, 2, 1], [5, -5, 4], [6, 0, 1]]) c = np.array([4, 3, 0]) # In[66]: np.linalg.solve(A, c) # In[67]: # Eigen value4s matrix_B = np.array([[4, 2, 2], [2, 4, 2], [2, 2, 4]]) np.linalg.eig(matrix_B) # - It returns eigen values and eigen vectors # # In[ ]: # ## Vector Math # # - vectorization,vectorized operation or array programming: # - Convert for-loop to one-time computation # # In[68]: # Difference between vectorization and Non-vectorization # sourcery skip: move-assign-in-block, sum-comprehension import time # vectorization num = 10000 a = np.random.random(num) b = np.random.random(num) start = time.time() c = np.dot(a, b) end = time.time() print(c) print(f"Verctorize time:{round((end -start)*1000,4)} ms") # Loop version:Non-vectorization c = 0 # iconic start = time.time() for i in range(num): c += a[i]*b[i] end = time.time() print(c) print(f"Loop version time:{round((end -start)*1000,4)} ms") # ### Generate continuous uniform random variables, with a=20, b =20 # # In[69]: N = 1000 a = 20 b = 30 rand_nums = np.random.uniform(a, b, N) uniform_rvs = (rand_nums - a) / (b - a) plt.plot(np.arange(N), uniform_rvs) # ### Generate exponential random variables with lambda = 2 by inverse transform method. # # In[70]: from numpy import log as ln # In[71]: lam = 2 mean = 1 / lam exp_rvs = -mean * ln(uniform_rvs) plt.plot(np.arange(N), exp_rvs) # # ### [Conditional vectorizaion](https://stackoverflow.com/questions/57896289/vectorizing-conditional-logic-in-python) # # ### Generate bernoulli random variables with probability p = 0.3 # # In[72]: p = 0.3 bernoulli_rvs = [1 if _ < p else 0 for _ in uniform_rvs] plt.hist(bernoulli_rvs) # In[73]: # np.where bernoulli_rvs_where = np.where(uniform_rvs < p, 1, 0) plt.hist(bernoulli_rvs_where) # ### Generate random variables from the discrete distribution **with the support set ${1,2,3,4}$ and pmf $P[W_i = j] = \frac{j}{10} ,j = 1,2,3, i = 1, ⋯, N.$** # # that is: # # $$ # \begin{aligned} # # \mathrm{P[W_i = 1]} &= \frac{1}{10} \\ # \mathrm{P[W_i = 2]} &= \frac{2}{10} \\ # \mathrm{P[W_i = 3]} &= \frac{3}{10} \\ # \mathrm{P[W_i = 4]} &= \frac{4}{10} ,where:\ i = 1 \cdots N # # \end{aligned} # $$ # # - Nesting conditional vectorization # # In[74]: discrete_rvs_where = np.where(uniform_rvs < 0.1, 1, np.where( uniform_rvs <= 0.3, 2, np.where(uniform_rvs <= 0.6, 3, 4))) plt.hist(discrete_rvs_where) # In[75]: # # Define the boundaries of the intervals # bins = [0, 0.1, 0.3, 0.6, 1] # # Use np.digitize to find the indices of the intervals # indices = np.digitize(uniform_rvs, bins) # # The discrete RV values are just the indices + 1 # discrete_rvs = indices + 1 # ### Generate random variables from **Poisson, Binomial and Gamma** (with the integer shape parameter) distributions # # #### Generate poisson random variables # # In[76]: def poisson_rvs_gen(lam=2): time = 0 event_count = -1 mean = 1 / lam while True: event_t = -mean * ln(np.random.uniform()) time += event_t event_count += 1 if time > 1: break return event_count # In[77]: poisson_rvs = [poisson_rvs_gen(lam=2) for _ in range(N)] plt.hist(poisson_rvs) # #### Generate binomial rvs from bernoulli distribution with p = 0.3 # # In[78]: p = 0.3 # N = 1000 # In[79]: binom_rvs = [np.sum([1 if _ < p else 0 for _ in np.random.uniform(0, 1, N)]) for _ in range(N)] # In[80]: plt.hist(binom_rvs) # In[81]: np.mean(binom_rvs) # #### Generate gamma rvs with alpha = 4, beta = 2 # # In[82]: # # Original code inverse function # a = 4 # b = 2 # gamma_rvs = [] # for _ in range(N): # r = 0 # for _ in range(a): # E = -b * ln(np.random.uniform()) # r = r + E # gamma_rvs.append(r) # In[83]: a = 4 b = 2 gamma_rvs = [np.sum([-b * ln(np.random.uniform()) for _ in range(a)]) for _ in range(N)] # In[84]: np.mean(gamma_rvs) # In[85]: plt.hist(gamma_rvs) # In[ ]: # In[ ]: