#!/usr/bin/env python
# coding: utf-8

# # Homework code
# 
# 
# 
#  #### [Back to main page](https://petrosyan.page/fall2020math3215)
# 

# In[1]:


# nbi:hide_in
# library
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["figure.figsize"] = (8, 5)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.gca().spines['bottom'].set_visible(False)

data=np.array([1,6,9,9,3,8,5,0,6,7,5,7,5,9,4,6,5,6,4,4,4,8,0,9,3,2,1,5,4,5,7,3,2,
      1,4,6,7,1,3,4,4,8,8,6,1,6,1,2,8,8,1,7,8,2,2,0,9,7,5,2,5,7,1,7,0,1,
      8,5,2,9,2,4,7,6,6,6,3,3,6,9,6,0,2,3,6,0,1,7,8,9,1,3,7,0,9,8,5,3,4,
      8,2,6,6,4,2,7,5,0,8,2,7,6,8,9,9,7,9,0,0,0,9,3,3,4,5,1,9,4,5,4,6,4,
      8,7,6,8,6,6,2,3,6,6,1,7,4,1,8,9,8,8])

range_x=np.arange(0,10)
pmf_values=np.ones(range_x.size)/range_x.size 

# compute empirical pmf
def epmf(data):
    erange_x, counts = np.unique(data, return_counts=True)
    epmf_values = counts/data.size
    return epmf_values, erange_x

epmf_values, erange_x = epmf(data)

# plot 
plt.ylim(0,0.2) 
plt.axhline(y=0, color='k')
plt.xticks(range_x)

plt.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20)
plt.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3,  label="True histogran")
plt.bar(erange_x, epmf_values, width=0.9, color=(1, 1, 1, 0), 
        edgecolor='green', linewidth=1.5,linestyle="--", label="Relative frequency histogram")
plt.legend()
plt.show();


# In[2]:


range_x=np.arange(0,10)
pmf_values=np.ones(range_x.size)/range_x.size

fig, ax2 = plt.subplots(num=1, clear=True)


ax2.set_ylim(-0.01, 0.2) 
ax2.set_xlim(-0.7, 10)
ax2.axhline(y=0, color='k')
ax2.set_xticks(range_x)
ax2.set_yticks(pmf_values)
ax2.spines["top"].set_visible(False)  
ax2.spines["right"].set_visible(False)
ax2.spines["bottom"].set_visible(False)


# PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn
ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20)
ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran")
ax2.set_title("Histogram")


plt.show();


# In[2]:


import re

text="169 938 506 757 594 656 444 809 321 545 732 146 713 448 861 612 881 782 209 752 571 701 852 924 766 633 696 023 601 789 137 098 534 826 642 750 827 689 979 000 933 451 945 464 876 866 236 617 418 988"

newtext=re.sub(r"\s","",text)

re.sub(r"(\d)",r"\g<1>,",newtext)

text = "0.1312, 0.0747, 0.2818, 0.7537, 0.9015, 0.7973, 0.6686, 0.0377,0.3207, 0.0497, 0.3036, 0.7613, 0.1278, 0.3596, 0.4977, 0.0802,0.5065, 0.6308, 0.1961, 0.921 , 0.2606, 0.6621, 0.5593, 0.1525,0.0694, 0.6032, 0.2863, 0.2178, 0.7832, 0.5217, 0.7545, 0.3325,0.5476, 0.7367, 0.0873, 0.8538, 0.3113, 0.5907, 0.7813, 0.0143"

newtext=re.sub(r",",",&",text)
print(newtext)


# In[4]:


range_x=np.arange(1,9)
pmf_values=np.ones(range_x.size)/range_x.size

fig, ax2 = plt.subplots(num=1, clear=True)


ax2.set_ylim(-0.01, 0.2) 
ax2.set_xlim(-0.7, 10)
ax2.axhline(y=0, color='k')
ax2.set_xticks(range_x)
ax2.set_yticks(pmf_values)
ax2.spines["top"].set_visible(False)  
ax2.spines["right"].set_visible(False)
ax2.spines["bottom"].set_visible(False)


# PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn
ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20)
ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran")
ax2.set_title("Histogram")


plt.show();


# In[6]:


range_x=np.array([-1,1,2,3,4])
pmf_values=np.array([1/4,1/12,1/6,2/6,1/6])

fig, ax2 = plt.subplots(num=1, clear=True)


ax2.set_ylim(-0.01, 0.4) 
ax2.set_xlim(-2, 5)
ax2.axhline(y=0, color='k')
ax2.set_xticks(range_x)
ax2.set_yticks(pmf_values)
ax2.spines["top"].set_visible(False)  
ax2.spines["right"].set_visible(False)
ax2.spines["bottom"].set_visible(False)


# PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn
ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20, zorder=2)
ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran", zorder=1)
ax2.set_title("Histogram")


plt.show();


# In[13]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, FloatSlider
plt.rcParams['figure.figsize'] = (12, 8)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
lmbd = 0.8
x=1

def cdf_func(xdata):
    val = np.piecewise(xdata, 
                       [xdata<0, xdata==0, (xdata>0) & (xdata<1),  
                        xdata==1, (xdata>1) & (xdata<2), xdata==2,
                        (xdata>2) & (xdata<3), xdata==3,
                       xdata>3], 
                       [0, np.nan, 1/2, np.nan, 0, np.nan, 1/2, np.nan, 0])
    return val
xdata = np.linspace(-0.5, 3.5, 1000)
plt.plot(xdata,  cdf_func(xdata), linewidth=3)
xshade = xdata[xdata<=x]
plt.ylim(0, 0.6)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.show();


# In[14]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact, FloatSlider
plt.rcParams['figure.figsize'] = (12, 8)
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
lmbd = 0.8
x=1

def func(xdata):
    f = lambda y: np.divide(y,2)
    return f(xdata)

def cdf_func(xdata):
    val = np.piecewise(xdata, 
                       [xdata<=0, (xdata>0) & (xdata<1),  
                         (xdata>=1) & (xdata<=2), 
                        (xdata>2) & (xdata<3),
                       xdata>=3], 
                       [0, lambda x: x/2, 1/2,  lambda x: x/2-1/2,  1])
    return val

xdata = np.linspace(-0.5, 3.5, 1000)
plt.plot(xdata,  cdf_func(xdata), linewidth=3)
xshade = xdata[xdata<=x]
plt.ylim(0, 1.1)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.show();


# In[15]:


func(xdata).shape


# In[29]:


import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (8, 4)


data = np.array([ -1.42, -0.31, -0.73, -0.51, -2.23, -0.32, 1.38, 0.32, -0.66, 0.01,
        -0.7, -1.86, -1.07, 0.1, -0.59, 0.58, -0.63, -0.87, -4.65, -1.14 ])

def epmf(data, inter, N):
    epmf_values = np.zeros(N)
    for i in range(N): 
        length = inter[i+1]-inter[i]
        epmf_values[i] = np.sum((inter[i]<=data) & (data<inter[i+1]))/(data.size*length)
    return epmf_values 
    
def dens_hist_std(data, N=4):
    zmax = np.floor(np.max(data)*10**1)/10**1
    zmin = np.floor(np.max(data)*10**1)/10**1
    print(zmin)
    inter = np.linspace(zmin,zmax,N+1)   
    length = inter[1]-inter[0]
    epmf_values = epmf(data, inter, N)
    
    plt.bar(inter[:N], epmf_values, width=length, 
            color='#039be5', edgecolor='black', linewidth=1, 
            align="edge", label="True histogran")
    plt.figtext(0.8,0.8, "N = {}".format(N), ha="left", va="top",
        backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large")
    plt.xlim(zmin-0.5, zmax+0.5)
    plt.ylim(0, np.max(epmf_values)+0.1)
    plt.title("Density histogram for uniform data")
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)
 
dens_hist_std(data, 4)

plt.show();


# In[30]:


np.sign(data)


# ### Problem 4.3-7

# In[94]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np


plt.figure(figsize=(4,6))

for x in range(1,5):
    for y in range(x+1,x+5):
        plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
        plt.text(x+0.1,y+0.2, r"$\frac{1}{16}$")
        
for x in range(1,5):
    plt.text(x-0.1,-0.5, r"$\frac{1}{4}$", color="red", size="large")
    
meany=0    
    
for y in range(2,9):
    val =  4- np.abs(5-y)
    plt.text(-1.5,y-0.13, r"$\frac{{{}}}{{16}}$".format(val), color="red", size="large")
    meany+=val*y
    
print(r"$\mu_Y = \frac{{{}}}{{16}}$".format(meany))

plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)

plt.show();


# In[95]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np


plt.figure(figsize=(4,6))

for x in range(1,5):
    for y in range(x+1,x+5):
        plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
        plt.text(x+0.1,y+0.2, r"$\frac{1}{4}$", color="red")
        

plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)

plt.show();


# In[111]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np


plt.figure(figsize=(6,10))

for x in range(1,5):
    for y in range(x+1,x+5):
        plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
        
xval=np.array([1, 2, 3, 4])
yval = xval+2.5
ycond = np.array([3.5, 4.5, 5.5, 6.5])

plt.plot(xval,yval,   linewidth=2, label="least squares", color="orange")
plt.scatter(xval,ycond,   linewidth=2, label="conditional mean")


plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.legend()
plt.show();


# ### Problem 4.2-10
# 
# 

# In[125]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np


plt.figure(figsize=(10,10))

for x in range(0,10):
    for y in range(x,10):
        plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
        plt.text(x+0.1,y+0.2, r"$\frac{{1}}{{{}}}$".format(10*(10-x)))
        
for x in range(0,10):
    plt.text(x-0.1,-1.4, r"$\frac{1}{10}$", color="red", size="large")
    
meany=0    

val = 0
for y in range(0,10):
    val += 1/(10*(10-y))    
    plt.text(-2,y-0.13, "{:.4f}".format(val), color="red", size="large")
    
plt.xticks(np.arange(0,10,1))
plt.yticks(np.arange(0,10,1))
plt.ylim(-0.5,10)
plt.xlim(-0.5,10)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)

plt.show();


# ### 4.4-4

# In[37]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(5,5))

x = np.linspace(0, 1, 100)
plt.fill_between(x, x**2, 1, alpha=0.5)
plt.plot(x,x**2, color="red", label=r"$y=x^2$")
plt.text(0.7,0.4, r"$y=x^2$")
plt.xticks([0,1])
plt.yticks([1])

plt.xlim(0,1)
plt.ylim(0,1)
 

plt.show();


# In[32]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(5,5))

x = np.linspace(0, 1, 100)
xval = np.linspace(1/2,1, 100)
plt.fill_between(xval, np.maximum(xval**2, 1/2), 1, alpha=0.5)
plt.plot(x,x**2, color="red", label=r"$y=x^2$")
plt.text(0.7,0.4, r"$y=x^2$")
plt.xticks([1/2, 1], [r"$\frac{1}{2}$", 1])
plt.yticks([0,1/2, 1], [0, r"$\frac{1}{2}$", 1])
plt.xlim(0,1)
plt.ylim(0,1)

 
plt.vlines(1/2,0,1, linestyles='dashed', linewidth=0.7)    
plt.hlines(1/2,0,1, linestyles='dashed', linewidth=0.7)    

plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")


plt.show();


# ### 4.4-14

# In[30]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(5,5))

x = np.linspace(0, 1, 100)
plt.fill_between(x, x, 1, alpha=0.5)
plt.plot(x,x, color="red", label=r"$y=x^2$")
plt.text(0.6,0.5, r"$x=y$")
plt.xticks([0,1])
plt.yticks([1])

plt.xlim(0,1)
plt.ylim(0,1)
 

plt.show();


# In[34]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(5,5))
x = np.linspace(0, 1, 100)
plt.fill_between(x, x, 1, alpha=0.5)
plt.text(0.6,0.5, r"$x=y$")
plt.xticks([0,1])
plt.yticks([1])

plt.xlim(0,1)
plt.ylim(0,1)

meanx=8/15
meany=4/5
sigma2x=11/225
cov=4/225

xval = np.linspace(-0.1, 1.1, 100)
yval = (xval-meanx)*cov/sigma2x + meany
plt.plot(xval,yval,   linewidth=2, label="least squares line", color="g")


plt.show();


# ### 4.4-18

# In[33]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(5,5))

y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)

plt.xlim(0,6)
plt.ylim(0,4)

 
plt.show();


# In[26]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(6,4))

y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)

x=np.linspace(0,6,100)
plt.plot(x, x-1, color="green")

plt.xlim(0,6)
plt.ylim(0,4)

 
plt.show();


# In[1]:


###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(6,4))

y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)

x=np.linspace(0,6,100)

def condmean(x):
    f1=lambda x: x/2
    f2=lambda x: x-1
    f3=lambda x: (x+2)/2
    z=np.piecewise(x, [x<2, (x>=2)&(x<=4), (x>=4)], [f1, f2, f3])
    return z

plt.plot(x, condmean(x), color="green")

plt.xlim(0,6)
plt.ylim(0,4)

 
plt.show();


# ### 6.1-4.

# In[116]:


# nbi:hide_in
# library
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["figure.figsize"] = (15, 6)


# generate data
data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331,
                 320, 320, 317, 329, 316, 308, 321, 319, 322, 335,
                 318, 313, 327, 314, 329, 323, 327, 323, 324, 314,
                 308, 305, 328, 330, 322, 310, 324, 314, 312, 318,
                 313, 320, 324, 311, 317, 325, 328, 319, 310, 324])

# compute empirical pmf
def epmf(data):
    erange_x, counts = np.unique(data, return_counts=True)
    epmf_values = counts
    return epmf_values, erange_x

epmf_values, erange_x = epmf(data)

# plot 
plt.axhline(y=0, color='k')
plt.xticks(erange_x)

plt.bar(erange_x, epmf_values, width=1, color='#039be5', edgecolor='black', linewidth=1)

mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))

plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -0.3,3.5,  label="1 std from mean", zorder=3)
plt.vlines([mean-2*std, mean+2*std], -0.3,3.5,  linestyle="dashed", label="2 std from mean", zorder=3)

plt.legend(loc='upper left')
plt.show();


# In[18]:


np.sort(data)


# In[19]:


np.mean(data)


# In[78]:


np.sqrt(np.var(data, ddof=1))


# In[22]:


data.shape


# In[140]:


# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 6)

N=100 # total number of samples
# intsize the number of class intervals
 
# generate data
data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331,
                 320, 320, 317, 329, 316, 308, 321, 319, 322, 335,
                 318, 313, 327, 314, 329, 323, 327, 323, 324, 314,
                 308, 305, 328, 330, 322, 310, 324, 314, 312, 318,
                 313, 320, 324, 311, 317, 325, 328, 319, 310, 324])


# Add the histogram
plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1)

# Add the mean and variances
mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))
plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -1,10,   label="1 std from mean", linewidth=1.2, zorder=3)
plt.vlines([mean-2*std, mean+2*std], -1,10,  linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3)

plt.legend(loc='upper left')
plt.show();


# In[ ]:


# In[135]:


[mean-std, mean+std, mean-2*std, mean+2*std]


# In[138]:


print(np.sum([(mean-std<=data) & (data<=mean+std)]))
print(np.sum([(mean-2*std<=data) & (data<=mean+2*std)]))


# In[136]:


[(mean-std<=data) & (data<=mean+std)]


# ### 6.2-2.

# In[137]:


data


# In[141]:


# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 6)

N=100 # total number of samples
# intsize the number of class intervals
 
# generate data
data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03,
                  1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04,
                  1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02,
                  1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04,
                  1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05,
                  1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05,
                  1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03,
                  1.05, 1.05, 1.05, 1.03, 1.04])


data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26,
                  1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15,
                  1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24,
                  1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27.
                  1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18,
                  1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21,
                  1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26,
                  1.12, 1.10, 1.19, 1.10, 1.24])


# Add the histogram
plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1)

# Add the mean and variances
mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))
plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -1,10,   label="1 std from mean", linewidth=1.2, zorder=3)
plt.vlines([mean-2*std, mean+2*std], -1,10,  linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3)

plt.legend(loc='upper left')
plt.show();


# In[228]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

# set up the figure
fig, ax = plt.subplots(1,1, figsize=(10,5))

# generate data
data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03,
                  1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04,
                  1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02,
                  1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04,
                  1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05,
                  1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05,
                  1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03,
                  1.05, 1.05, 1.05, 1.03, 1.04]).astype(float)


data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26,
                  1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15,
                  1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24,
                  1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27,
                  1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18,
                  1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21,
                  1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26,
                  1.12, 1.10, 1.19, 1.10, 1.24]).astype(float)


def percentile(data, p):
    """
    Compute the percentiles the way we defined in class 
    data : array of size N 
    p : percentile
    """
    data = np.sort(data, axis=0)
    rank = int(p * (data.shape[0] + 1) - 1) # the rank
    assert rank > 0, "the rank does not exist" 
    alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
    return data[rank] + alpha * (data[rank + 1] - data [rank])

def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), 
             textloc=np.array([0.8]), label = ""):
    """
    ax : matplotlib ax
    data : the data 
    width : box width
    showout : show the outliers   
    position: the y axis of the box plot
    """
    # compute the five number summary 
    minim = np.min(data)
    maxim = np.max(data)
    q1 = percentile(data, 0.25)
    q2 = np.median(data)
    q3 = percentile(data, 0.75)

    # interquartile range
    iqr = q3 - q1

    # inner fences
    left_innerfence = q1 - 1.5 * iqr
    right_innerfence = q3 + 1.5 * iqr

    # compute outliers 
    outliers = []
    
    # whiskers
    if showout==True:
        outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
        low_whisker = np.min(data[data >= left_innerfence])
        high_whisker = np.max(data[data <= right_innerfence])
    else:
        low_whisker = np.min(data)
        high_whisker = np.max(data)


    stats = [{'iqr': iqr,
              'whishi': high_whisker,
              'whislo': low_whisker,
              'fliers': outliers,
              'q1': q1,
              'med': q2,
              'q3': q3}]

    # add the box plot
    flierprops = dict(markerfacecolor='black', markersize=5)
    ax.bxp(stats, vert = False, widths=width, positions = position, 
           flierprops=flierprops, showfliers=showout)

    # add Tukey's fences
    if showout==True:
        ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

        ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

    #  
    plt.figtext(1,textloc,
                r"$\min={:.4}$".format(minim)+"\n"+
                r"$q_1={:.4}$".format(q1)+"\n"+
                r"med$={:.4}$".format(q2)+"\n"+
                r"$q_3={:.4}$".format(q3)+"\n"+
                r"max$={:.4}$".format(maxim),
                ha="left", va="top",
                backgroundcolor=(0.1, 0.1, 1, 0.15),
                fontsize="large")
    
def disp_data(ax, data):
    ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
    ax.set_yticks([])
#     ax.set_xticks([])
    mean = np.mean(data)
    ax.scatter(mean, 0, zorder=2, s=20, color="red")
    ax.set_ylim(-0.01,0.1)
    ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)

    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    ax.set_ylim(-0.1,1.5)
            
box_plot(ax, data2, width=0.7, showout=False, position = np.array([1]),
         textloc = np.array([0.4]), label = "Regular carrots")

box_plot(ax, data1, width=0.7, showout=False, position = np.array([2]),
         textloc = np.array([0.8]), label = "Baby carrots" )

ax.set_yticklabels(["Regular-sized carrots", "Baby-sized carrots" ])
plt.show();


# In[227]:


np.set_printoptions(formatter={'float': '{: 0.2f}'.format}, linewidth=90)
print(repr(np.sort(data2)))


# ### 6.2-6

# In[231]:


s= "25 9 5 5 5 9 6 5 15 4555 6 5 6 24 21 16 5 8 77 5 5 35 13 9 5 18 6 1019 16 21 8 13 5 9 10 10 623 8 5 10 15 7 5 5 24 911 34 12 11 17 11 16 5 15 512 6 5 5 7 6 17 20 7 88 6 10 11 6 7 5 12 11 186 21 6 5 24 7 16 21 23 1511 8 6 8 14 11 6 9 6 10"

re.sub("\s+", ",", s.strip())


# In[345]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
import stemgraphic 
plt.rcParams["figure.figsize"] = (20, 20)
plt.rcParams['figure.dpi'] = 300

# generate data
data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6,
                  24,21,16,5,8,7,7,5,5,35,13,9,5,18,
                  6,10, 19,16,21,8,13,5,9,10,10,6, 23,8,
                  5,10,15,7,5,5,24,9, 11,34,12,11,17,11,
                  16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6,
                  10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16,
                  21,23,15, 11,8,6,8,14,11,6,9,6,10])

stemgraphic.stem_graphic(data, scale = 10, legend_pos=None, 
                         alpha=0,outliers=False) 
plt.show();


# In[325]:


data.shape


# In[362]:


np.sort(data)[90]


# In[337]:


np.set_printoptions()  


# In[328]:


data


# In[359]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

# set up the figure
fig, ax = plt.subplots(1,1, figsize=(10,5))

# generate data
# generate data
data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6,
                  24,21,16,5,8,7,7,5,5,35,13,9,5,18,
                  6,10, 19,16,21,8,13,5,9,10,10,6, 23,8,
                  5,10,15,7,5,5,24,9, 11,34,12,11,17,11,
                  16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6,
                  10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16,
                  21,23,15, 11,8,6,8,14,11,6,9,6,10]).astype(float)


def percentile(data, p):
    """
    Compute the percentiles the way we defined in class 
    data : array of size N 
    p : percentile
    """
    data = np.sort(data, axis=0)
    rank = int(p * (data.shape[0] + 1) - 1) # the rank
    assert rank > 0, "the rank does not exist" 
    alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
    return data[rank] + alpha * (data[rank + 1] - data [rank])

def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), 
             textloc=np.array([0.8]), label = ""):
    """
    ax : matplotlib ax
    data : the data 
    width : box width
    showout : show the outliers   
    position: the y axis of the box plot
    """
    # compute the five number summary 
    minim = np.min(data)
    maxim = np.max(data)
    q1 = percentile(data, 0.25)
    q2 = np.median(data)
    q3 = percentile(data, 0.75)

    # interquartile range
    iqr = q3 - q1

    # inner fences
    left_innerfence = q1 - 1.5 * iqr
    right_innerfence = q3 + 1.5 * iqr

    # compute outliers 
    outliers = []
    
    # whiskers
    if showout==True:
        outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
        low_whisker = np.min(data[data >= left_innerfence])
        high_whisker = np.max(data[data <= right_innerfence])
    else:
        low_whisker = np.min(data)
        high_whisker = np.max(data)


    stats = [{'iqr': iqr,
              'whishi': high_whisker,
              'whislo': low_whisker,
              'fliers': outliers,
              'q1': q1,
              'med': q2,
              'q3': q3}]

    # add the box plot
    flierprops = dict(markerfacecolor='black', markersize=5)
    ax.bxp(stats, vert = False, widths=width, positions = position, 
           flierprops=flierprops, showfliers=showout)

    # add Tukey's fences
    if showout==True:
        ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

        ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
    print ("iqr={}\n".format(iqr)+"\n"+
           "left inner fence={}".format(q1-1.5*iqr)+"\n"+
           "right inner fence={}".format(q3+1.5*iqr)+"\n"+
           "left outer fence={}".format(q1-3*iqr)+"\n"+
           "right ouer fence={}".format(q3+3*iqr)
          )
    #  
    ax.set_yticks([])
    plt.figtext(1,textloc,
                r"$\min={:.4}$".format(minim)+"\n"+
                r"$q_1={:.4}$".format(q1)+"\n"+
                r"med$={:.4}$".format(q2)+"\n"+
                r"$q_3={:.4}$".format(q3)+"\n"+
                r"max$={:.4}$".format(maxim),
                ha="left", va="top",
                backgroundcolor=(0.1, 0.1, 1, 0.15),
                fontsize="large")
    
def disp_data(ax, data):
    ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
    ax.set_yticks([])
#     ax.set_xticks([])
    mean = np.mean(data)
    ax.scatter(mean, 0, zorder=2, s=20, color="red")
    ax.set_ylim(-0.01,0.1)
    ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)

    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    ax.set_ylim(-0.1,1.5)
            

box_plot(ax, data, width=0.7, showout=True, position = np.array([1]),
         textloc = np.array([0.8])  )

plt.show();


# In[363]:


21+0.9* (23-21)


# In[364]:


percentile(data, 0.9)


# ### 6.4-6

# In[369]:


s="31.5 36.9 33.8 30.1 33.9 35.2 29.6 34.4 30.5 34.2 31.6 36.7 35.8 34.5 32.7"

re.sub("\s+", ",", s.strip())

re.sub("\s+", "+", s.strip())


# In[368]:


import numpy

data = np.array([31.5,36.9,33.8,30.1,33.9,35.2,29.6,34.4,30.5,34.2,31.6,36.7,35.8,34.5,32.7])

print("mean = {}".format(np.mean(data))+"\n"+"var = {}".format(np.var(data)))


# ### 7.1-7

# In[371]:


s="21.50 18.95 18.55 19.40 19.15 22.35 22.90 22.20 23.10"

re.sub("\s+", ",", s.strip())

# re.sub("\s+", "+", s.strip())


# In[375]:


import numpy

data = np.array([21.50,18.95,18.55,19.40,19.15,22.35,22.90,22.20,23.10])

print("mean = {}".format(np.mean(data))+"\n"+"S = {}".format(np.sqrt(np.var(data, ddof=1))))


# ### 7.1-5

# In[2]:


import re

s="93 140 8 120 3 120 33 70 91 61 7 100 19 98 110 23 14 94 57 9 66 53 28 76 58 9 73 49 37 92"

re.sub("\s+", ",", s.strip())

# re.sub("\s+", "+", s.strip())


# In[6]:


import numpy as np
data=np.array([37.4, 48.8, 46.9, 55.0, 44.0])

print(np.mean(data), np.sqrt(np.var(data, ddof=1)))


# ### 7.1-8

# In[1]:


import numpy as np
data=np.array([93,140,8,120,3,120,33,70,91,61,7,100,19,98,110,23,14,94,57,9,66,53,28,76,58,9,73,49,37,92])

print(np.mean(data), np.sqrt(np.var(data, ddof=1)))


# ### 8.1-8 

# In[2]:


import re

s="3.4 3.6 3.8 3.3 3.4 3.5 3.7 3.6 3.7"

re.sub("\s+", ",", s.strip())

# re.sub("\s+", "+", s.strip())


# In[5]:


import numpy as np
data=np.array([3.4,3.6,3.8,3.3,3.4,3.5,3.7,3.6,3.7])

print(np.mean(data), np.sqrt(np.var(data, ddof=1)))


# ### 8.1-12

# In[6]:


import re

s1="265 272 246 260 274 263 255 258 276 274 274 269 244 212 235 254 224"
s2="252 276 243 246 275 246 244 245 259 260 267 267 251 222 235 255 231"

print(re.sub("\s+", ",", s1.strip()))
print(re.sub("\s+", ",", s2.strip()))

# re.sub("\s+", "+", s.strip())


# In[10]:


D1=np.array([265,272,246,260,274,263,255,258,276,274,274,269,244,212,235,254,224])
D2=np.array([252,276,243,246,275,246,244,245,259,260,267,267,251,222,235,255,231])
data=D1-D2

import numpy as np

print(np.mean(data), np.sqrt(np.var(data, ddof=1)))


# # Final
# 

# In[18]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

# set up the figure
fig, ax = plt.subplots(1,1, figsize=(12,5))

# data
data = np.array([0.1, 0.5, 1, 1.2, 3, 0.8, 1.6, -3, 2, 10]).astype(float)
# data = np.array([-30,-1, -5, -0.5, 0.5, 0.6, 0, 2, 3, 4.6, 4, 7, 18, 35]).astype(float)


def percentile(data, p):
    """
    Compute the percentiles the way we defined in class 
    data : array of size N 
    p : percentile
    """
    data = np.sort(data, axis=0)
    rank = int(p * (data.shape[0] + 1) - 1) # the rank
    assert rank > 0, "the rank does not exist" 
    alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
    return data[rank] + alpha * (data[rank + 1] - data [rank])

def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4])):
    """
    ax : matplotlib ax
    data : the data 
    width : box width
    showout : show the outliers   
    position: the y axis of the box plot
    """
    # compute the five number summary 
    minim = np.min(data)
    maxim = np.max(data)
    q1 = percentile(data, 0.25)
    q2 = np.median(data)
    q3 = percentile(data, 0.75)

    # interquartile range
    iqr = q3 - q1

    # inner fences
    left_innerfence = q1 - 1.5 * iqr
    right_innerfence = q3 + 1.5 * iqr

    # compute outliers 
    outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
    
    # whiskers
    if showout==True:
        low_whisker = np.min(data[data >= left_innerfence])
        high_whisker = np.max(data[data <= right_innerfence])
    else:
        low_whisker = np.min(data)
        high_whisker = np.max(data)


    stats = [{'iqr': iqr,
              'whishi': high_whisker,
              'whislo': low_whisker,
              'fliers': outliers,
              'q1': q1,
              'med': q2,
              'q3': q3}]

    # add the box plot
    flierprops = dict(markerfacecolor='black', markersize=5)
    ax.bxp(stats, vert = False, widths=width, positions = position, 
           flierprops=flierprops, showfliers=showout)

    # add Tukey's fences
    if showout==True:
        ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

        ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

    #  
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.set_ylim(-0.1,position+0.3)
    ax.set_yticks([])
    plt.figtext(1,0.8,
                r"$\min={:.4}$".format(minim)+"\n"+
                r"$q_1={:.4}$".format(q1)+"\n"+
                r"med$={:.4}$".format(q2)+"\n"+
                r"$q_3={:.4}$".format(q3)+"\n"+
                r"max$={:.4}$".format(maxim),
                ha="left", va="top",
                backgroundcolor=(0.1, 0.1, 1, 0.15),
                fontsize="large")
    
def disp_data(ax, data):
    ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
    ax.set_yticks([])
#     ax.set_xticks([])
    mean = np.mean(data)
    ax.scatter(mean, 0, zorder=2, s=20, color="red")
    ax.set_ylim(-0.01,0.1)
    ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)

    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    ax.set_ylim(-0.1,1.5)
            
box_plot(ax, data, width=0.2, showout=True)

plt.show();


# ## problem 4
# 

# In[36]:


# nbi:hide_in
import numpy as np

import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(15,10)) 

meanx=1
meany=2
varx=1/3
cov = 6/5

def pdf(X,Y):
    Z = np.zeros(X.shape)    
    cond = (0<=X) & (X<=4) & (-1/2<=Y-X**3) & (Y-X**3<=1/2)
    Z[cond] = 1/4
    return Z

x = np.linspace(0, 2, 1000)
y = np.linspace(-2, 8.5 , 1000)

X, Y = np.meshgrid(x, y)
Z = pdf(X, Y)

plt.contourf(X, Y, Z, 20,  cmap = "Blues", zorder=1)
cb=plt.colorbar()
cb.remove()

xval = np.linspace(0, 2, 100)
plt.plot(xval,xval**3, linewidth=4, label="conditional mean function", color=(0.2,0.7,0.3), zorder=2)
yval = (xval-meanx)*cov/varx + meany
plt.plot(xval,yval,   linewidth=4, label="least squares line", color="y", zorder=3)
plt.scatter(meanx, meany, color = "red", s=200 , label ="mean", zorder=4)  

plt.grid(True)
plt.legend()
# plt.title("Fitting least squares line and conditional mean to a joint distribution")
plt.xlabel("x")
plt.ylabel("y")
plt.draw()


# ### problem 5
# 

# In[2]:


# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np

# set up the figure
fig, ax = plt.subplots(1,1, figsize=(10,5))

# generate data
# generate data
data = np.array([0.1, 0.5, 1, 1.2, 3, 0.8, 1.6, -3, 2, 10]).astype(float)


def percentile(data, p):
    """
    Compute the percentiles the way we defined in class 
    data : array of size N 
    p : percentile
    """
    data = np.sort(data, axis=0)
    rank = int(p * (data.shape[0] + 1) - 1) # the rank
    assert rank > 0, "the rank does not exist" 
    alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
    return data[rank] + alpha * (data[rank + 1] - data [rank])

def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), 
             textloc=np.array([0.8]), label = ""):
    """
    ax : matplotlib ax
    data : the data 
    width : box width
    showout : show the outliers   
    position: the y axis of the box plot
    """
    # compute the five number summary 
    minim = np.min(data)
    maxim = np.max(data)
    q1 = percentile(data, 0.25)
    q2 = np.median(data)
    q3 = percentile(data, 0.75)

    # interquartile range
    iqr = q3 - q1

    # inner fences
    left_innerfence = q1 - 1.5 * iqr
    right_innerfence = q3 + 1.5 * iqr

    # compute outliers 
    outliers = []
    
    # whiskers
    if showout==True:
        outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
        low_whisker = np.min(data[data >= left_innerfence])
        high_whisker = np.max(data[data <= right_innerfence])
    else:
        low_whisker = np.min(data)
        high_whisker = np.max(data)


    stats = [{'iqr': iqr,
              'whishi': high_whisker,
              'whislo': low_whisker,
              'fliers': outliers,
              'q1': q1,
              'med': q2,
              'q3': q3}]

    # add the box plot
    flierprops = dict(markerfacecolor='black', markersize=5)
    ax.bxp(stats, vert = False, widths=width, positions = position, 
           flierprops=flierprops, showfliers=showout)

    # add Tukey's fences
    if showout==True:
        ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)

        ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
        ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
    print ("iqr={}\n".format(iqr)+"\n"+
           "left inner fence={}".format(q1-1.5*iqr)+"\n"+
           "right inner fence={}".format(q3+1.5*iqr)+"\n"+
           "left outer fence={}".format(q1-3*iqr)+"\n"+
           "right ouer fence={}".format(q3+3*iqr)
          )
    #  
    ax.set_yticks([])
    plt.figtext(1,textloc,
                r"$\min={:.4}$".format(minim)+"\n"+
                r"$q_1={:.4}$".format(q1)+"\n"+
                r"med$={:.4}$".format(q2)+"\n"+
                r"$q_3={:.4}$".format(q3)+"\n"+
                r"max$={:.4}$".format(maxim),
                ha="left", va="top",
                backgroundcolor=(0.1, 0.1, 1, 0.15),
                fontsize="large")
    
def disp_data(ax, data):
    ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
    ax.set_yticks([])
#     ax.set_xticks([])
    mean = np.mean(data)
    ax.scatter(mean, 0, zorder=2, s=20, color="red")
    ax.set_ylim(-0.01,0.1)
    ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)

    
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)

    ax.set_ylim(-0.1,1.5)
            

box_plot(ax, data, width=0.7, showout=True, position = np.array([1]),
         textloc = np.array([0.8])  )

plt.show();


# In[3]:


np.sort(data)


# ## 6

# In[53]:


data=np.array([423.90,   420.24,   431.00,   418.76,   
               428.68,   423.64,   430.65,   432.92,  
               421.93,   433.97,   426.10,   430.20])


# In[55]:


np.mean(data)


# In[56]:


np.sqrt(np.var(data, ddof=1))


# In[ ]: