#!/usr/bin/env python
# coding: utf-8

# # Numpy tutorials
# 

# ## Content
# 
# - Basic
# - Matrix
# - Vector Math
# 

# ## Basic
# 

# In[1]:


# import libraries
import numpy as np
import matplotlib.pyplot as plt


# In[2]:


# name it
a1 = np.array([9, 5, 1, 5, 67, 7, 8, 6, 2, 3, 4, 5])


# In[3]:


a1


# ### Indexing / Slicing
# 

# In[4]:


a1[2]


# In[5]:


a1[2:]


# In[6]:


a1[:-4]


# In[7]:


a1[1:-4]


# In[8]:


a1[1:]


# In[9]:


a1[:-1]


# In[10]:


a1 > 3  # bool


# In[11]:


a1[a1 > 3]


# In[12]:


a1[a1 % 4 == 0]


# In[13]:


name = np.array(['Jim', 'Luke', 'Josh', 'Pete'])


# In[14]:


# fist_value = lambda s:s[0]
first_letter = np.vectorize(lambda s: s[0])(name) == 'J'


# In[15]:


first_letter


# In[16]:


name[first_letter]


# ### `np.zeros`
# 

# In[17]:


# a2 = np.zeros(10)  no name for coviences
np.zeros(10)


# ### `random` and `randn`
# 

# In[18]:


np.random.random(10)  # 0-1


# In[19]:


np.random.randn(10)  # -1 ~ 1 normal


# ### `linspace` and `arange`
# 

# In[20]:


np.linspace(0, 10, 21)  # 21: number of elements


# In[21]:


np.linspace(0, 10, 20)


# In[22]:


np.arange(0, 2, 0.02)  # 0.02: number of spacing, step length.


# ### array operations
# 

# In[23]:


a1


# In[24]:


2 * a1


# In[25]:


1 / a1


# In[ ]:


# In[26]:


1 / a1 + 7


# In[27]:


1 / a1 + a1 + 7


# In[28]:


a1 ** 2


# In[29]:


x = np.linspace(0, 1, 100)
y = np.sin(x)


# In[30]:


plt.plot(x, y)


# In[31]:


# More math function check the np doc "Mathematical functions"
# logistc funtion
def logistic_function(a, b, t):
    return np.exp(-a + b * t)/(1 + np.exp(-a + b*t))


# In[32]:


# let a = 5, b = 0.02
a = 1
b = 1
t = np.arange(0, 100)
y_1 = logistic_function(a, b, t)


# In[33]:


plt.plot(t, y_1)


# ### Calculus and Statistics in np
# 

# In[34]:


a1 = 3 * np.random.randn(100000) + 13  # mean 13 and std  =3


# In[35]:


np.mean(a1)


# In[36]:


np.std(a1)


# In[37]:


np.percentile(a1, 80)  # the number of percentiles


# ### Derivative and integral
# 

# In[38]:


x = np.linspace(0, 4, 100)
y = x ** 2

dydx = np.gradient(y, x)  # x is the spcae\steps
dy2dx2 = np.gradient(np.gradient(y, x), x)


# In[39]:


plt.plot(x, y)
plt.plot(x, dydx)
plt.plot(x, dy2dx2)


# In[40]:


y_2 = np.cumsum(dydx)  # integral
plt.plot(x, y_2)
plt.plot(x, dydx)


# In[41]:


y_t1 = np.exp(-x/10) * np.cos(3*x)
plt.plot(x, y_t1)


# In[42]:


np.mean(y_t1), np.std(y_t1)


# In[43]:


# for x domain [0.5,2] here * NOT &
y_t1[(x >= 0.5) * (x <= 2)]


# In[44]:


y_t1_range = y_t1[(x >= 0.5) * (x <= 2)]


# In[45]:


np.mean(y_t1_range), np.std(y_t1_range)


# In[46]:


np.percentile(y_t1_range, 80)


# In[47]:


dy_t1dx = np.gradient(y_t1)
plt.plot(x, dy_t1dx)


# In[48]:


x[1:][dy_t1dx[1:]*dy_t1dx[:-1] < 0]  # dydx=0


# In[49]:


# sum up 0 to 10000 except for those can be divided by 4 or 7 in one line
# nums_ls = [i for i in range(10000) if i % 4 != 0 & i % 7 != 0]
nums = np.arange(0, 10001, 1)
sum(nums[(nums % 4 != 0)*(nums % 7 != 0)])


# In[50]:


# Flower petal
theta = np.linspace(0, 2 * np.pi, 1000)
r_theta = [1 + 3/4 * np.sin(3 * _) for _ in theta]
plt.plot(theta, r_theta)


# In[51]:


# polar cordinates
x = r_theta * np.cos(theta)
y = r_theta * np.sin(theta)
plt.plot(x, y)


# ## Matrix
# 

# ### Multi-Dimensional Arrays
# 

# In[52]:


A_1 = np.array([[1, 2, 3], [6, 7, 8], [4, 5, 9]])
A_1


# In[53]:


A_1*2


# In[54]:


2 / A_1


# In[55]:


# Booling
A_1 > 5


# In[56]:


A_2 = np.random.randn(3, 3)
A_2


# In[57]:


# nesting 很高级
A_2[A_1 > 5]


# In[58]:


# Element Indexing
A_1


# In[59]:


A_1[:, 0]  # first col


# In[60]:


A_1[:, 1]


# In[61]:


# just want 7,5 above
A_1[1:, 1]


# In[62]:


# 2-d Function
x = np.linspace(0, 10, 1000)
y = np.linspace(0, 10, 1000)
xv, yv = np.meshgrid(x, y)
zv = xv**2 + yv ** 2


# In[63]:


plt.contour(xv, yv, zv, levels=30)
plt.colorbar()


# ### `ravel()` method
# 
# - Turn any N-Dimensional array to 1-D array
# 

# In[64]:


A_1.ravel()


# ### Basic Linear Algebra
# 

# In[65]:


# Equations A for coefficients matrix and c For constants: Qramer rules
A = np.array([[3, 2, 1], [5, -5, 4], [6, 0, 1]])
c = np.array([4, 3, 0])


# In[66]:


np.linalg.solve(A, c)


# In[67]:


# Eigen value4s
matrix_B = np.array([[4, 2, 2], [2, 4, 2], [2, 2, 4]])
np.linalg.eig(matrix_B)


# - It returns eigen values and eigen vectors
# 

# In[ ]:


# ## Vector Math
# 
# - vectorization,vectorized operation or array programming:
#   - Convert for-loop to one-time computation
# 

# In[68]:


# Difference between vectorization and Non-vectorization
# sourcery skip: move-assign-in-block, sum-comprehension
import time
# vectorization
num = 10000
a = np.random.random(num)
b = np.random.random(num)

start = time.time()
c = np.dot(a, b)
end = time.time()
print(c)
print(f"Verctorize time:{round((end -start)*1000,4)} ms")

# Loop version:Non-vectorization
c = 0  # iconic
start = time.time()
for i in range(num):
    c += a[i]*b[i]
end = time.time()
print(c)
print(f"Loop version time:{round((end -start)*1000,4)} ms")


# ### Generate continuous uniform random variables, with a=20, b =20
# 

# In[69]:


N = 1000
a = 20
b = 30

rand_nums = np.random.uniform(a, b, N)
uniform_rvs = (rand_nums - a) / (b - a)
plt.plot(np.arange(N), uniform_rvs)


# ### Generate exponential random variables with lambda = 2 by inverse transform method.
# 

# In[70]:


from numpy import log as ln


# In[71]:


lam = 2
mean = 1 / lam

exp_rvs = -mean * ln(uniform_rvs)
plt.plot(np.arange(N), exp_rvs)


# 

# ### [Conditional vectorizaion](https://stackoverflow.com/questions/57896289/vectorizing-conditional-logic-in-python)
# 
# ### Generate bernoulli random variables with probability p = 0.3
# 

# In[72]:


p = 0.3
bernoulli_rvs = [1 if _ < p else 0 for _ in uniform_rvs]
plt.hist(bernoulli_rvs)


# In[73]:


# np.where
bernoulli_rvs_where = np.where(uniform_rvs < p, 1, 0)
plt.hist(bernoulli_rvs_where)


# ### Generate random variables from the discrete distribution **with the support set ${1,2,3,4}$ and pmf $P[W_i = j] = \frac{j}{10} ,j = 1,2,3, i = 1, ⋯, N.$**
# 
# that is:
# 
# $$
# \begin{aligned}
# 
# \mathrm{P[W_i = 1]} &= \frac{1}{10} \\
# \mathrm{P[W_i = 2]} &= \frac{2}{10} \\
# \mathrm{P[W_i = 3]} &= \frac{3}{10} \\
# \mathrm{P[W_i = 4]} &= \frac{4}{10} ,where:\ i = 1 \cdots N
# 
# \end{aligned}
# $$
# 
# - Nesting conditional vectorization
# 

# In[74]:


discrete_rvs_where = np.where(uniform_rvs < 0.1, 1, np.where(
    uniform_rvs <= 0.3, 2, np.where(uniform_rvs <= 0.6, 3, 4)))
plt.hist(discrete_rvs_where)


# In[75]:


# # Define the boundaries of the intervals
# bins = [0, 0.1, 0.3, 0.6, 1]

# # Use np.digitize to find the indices of the intervals
# indices = np.digitize(uniform_rvs, bins)

# # The discrete RV values are just the indices + 1
# discrete_rvs = indices + 1


# ### Generate random variables from **Poisson, Binomial and Gamma** (with the integer shape parameter) distributions
# 
# #### Generate poisson random variables
# 

# In[76]:


def poisson_rvs_gen(lam=2):
    time = 0
    event_count = -1
    mean = 1 / lam

    while True:
        event_t = -mean * ln(np.random.uniform())
        time += event_t
        event_count += 1
        if time > 1:
            break
    return event_count


# In[77]:


poisson_rvs = [poisson_rvs_gen(lam=2) for _ in range(N)]
plt.hist(poisson_rvs)


# #### Generate binomial rvs from bernoulli distribution with p = 0.3
# 

# In[78]:


p = 0.3
# N = 1000


# In[79]:


binom_rvs = [np.sum([1 if _ < p else 0 for _ in np.random.uniform(0, 1, N)])
             for _ in range(N)]


# In[80]:


plt.hist(binom_rvs)


# In[81]:


np.mean(binom_rvs)


# #### Generate gamma rvs with alpha = 4, beta = 2
# 

# In[82]:


# # Original code inverse function
# a = 4
# b = 2
# gamma_rvs = []

# for _ in range(N):
#     r = 0
#     for _ in range(a):
#         E = -b * ln(np.random.uniform())
#         r = r + E
#     gamma_rvs.append(r)


# In[83]:


a = 4
b = 2

gamma_rvs = [np.sum([-b * ln(np.random.uniform())
                    for _ in range(a)]) for _ in range(N)]


# In[84]:


np.mean(gamma_rvs)


# In[85]:


plt.hist(gamma_rvs)


# In[ ]:


# In[ ]: