#!/usr/bin/env python # coding: utf-8 # # Homework code # # # # #### [Back to main page](https://petrosyan.page/fall2020math3215) # # In[1]: # nbi:hide_in # library import matplotlib.pyplot as plt import numpy as np plt.rcParams["figure.figsize"] = (8, 5) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.gca().spines['bottom'].set_visible(False) data=np.array([1,6,9,9,3,8,5,0,6,7,5,7,5,9,4,6,5,6,4,4,4,8,0,9,3,2,1,5,4,5,7,3,2, 1,4,6,7,1,3,4,4,8,8,6,1,6,1,2,8,8,1,7,8,2,2,0,9,7,5,2,5,7,1,7,0,1, 8,5,2,9,2,4,7,6,6,6,3,3,6,9,6,0,2,3,6,0,1,7,8,9,1,3,7,0,9,8,5,3,4, 8,2,6,6,4,2,7,5,0,8,2,7,6,8,9,9,7,9,0,0,0,9,3,3,4,5,1,9,4,5,4,6,4, 8,7,6,8,6,6,2,3,6,6,1,7,4,1,8,9,8,8]) range_x=np.arange(0,10) pmf_values=np.ones(range_x.size)/range_x.size # compute empirical pmf def epmf(data): erange_x, counts = np.unique(data, return_counts=True) epmf_values = counts/data.size return epmf_values, erange_x epmf_values, erange_x = epmf(data) # plot plt.ylim(0,0.2) plt.axhline(y=0, color='k') plt.xticks(range_x) plt.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20) plt.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="True histogran") plt.bar(erange_x, epmf_values, width=0.9, color=(1, 1, 1, 0), edgecolor='green', linewidth=1.5,linestyle="--", label="Relative frequency histogram") plt.legend() plt.show(); # In[2]: range_x=np.arange(0,10) pmf_values=np.ones(range_x.size)/range_x.size fig, ax2 = plt.subplots(num=1, clear=True) ax2.set_ylim(-0.01, 0.2) ax2.set_xlim(-0.7, 10) ax2.axhline(y=0, color='k') ax2.set_xticks(range_x) ax2.set_yticks(pmf_values) ax2.spines["top"].set_visible(False) ax2.spines["right"].set_visible(False) ax2.spines["bottom"].set_visible(False) # PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20) ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran") ax2.set_title("Histogram") plt.show(); # In[2]: import re text="169 938 506 757 594 656 444 809 321 545 732 146 713 448 861 612 881 782 209 752 571 701 852 924 766 633 696 023 601 789 137 098 534 826 642 750 827 689 979 000 933 451 945 464 876 866 236 617 418 988" newtext=re.sub(r"\s","",text) re.sub(r"(\d)",r"\g<1>,",newtext) text = "0.1312, 0.0747, 0.2818, 0.7537, 0.9015, 0.7973, 0.6686, 0.0377,0.3207, 0.0497, 0.3036, 0.7613, 0.1278, 0.3596, 0.4977, 0.0802,0.5065, 0.6308, 0.1961, 0.921 , 0.2606, 0.6621, 0.5593, 0.1525,0.0694, 0.6032, 0.2863, 0.2178, 0.7832, 0.5217, 0.7545, 0.3325,0.5476, 0.7367, 0.0873, 0.8538, 0.3113, 0.5907, 0.7813, 0.0143" newtext=re.sub(r",",",&",text) print(newtext) # In[4]: range_x=np.arange(1,9) pmf_values=np.ones(range_x.size)/range_x.size fig, ax2 = plt.subplots(num=1, clear=True) ax2.set_ylim(-0.01, 0.2) ax2.set_xlim(-0.7, 10) ax2.axhline(y=0, color='k') ax2.set_xticks(range_x) ax2.set_yticks(pmf_values) ax2.spines["top"].set_visible(False) ax2.spines["right"].set_visible(False) ax2.spines["bottom"].set_visible(False) # PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20) ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran") ax2.set_title("Histogram") plt.show(); # In[6]: range_x=np.array([-1,1,2,3,4]) pmf_values=np.array([1/4,1/12,1/6,2/6,1/6]) fig, ax2 = plt.subplots(num=1, clear=True) ax2.set_ylim(-0.01, 0.4) ax2.set_xlim(-2, 5) ax2.axhline(y=0, color='k') ax2.set_xticks(range_x) ax2.set_yticks(pmf_values) ax2.spines["top"].set_visible(False) ax2.spines["right"].set_visible(False) ax2.spines["bottom"].set_visible(False) # PLotting with plt.bar instead of plt.hist works better when f(x) are knowwn ax2.scatter(range_x,np.zeros(range_x.shape), color ="red", s=20, zorder=2) ax2.bar(range_x, pmf_values, width=1, color='#039be5', edgecolor="w", linewidth=1.3, label="Histogran", zorder=1) ax2.set_title("Histogram") plt.show(); # In[13]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np from ipywidgets import interact, FloatSlider plt.rcParams['figure.figsize'] = (12, 8) import matplotlib as mpl mpl.rcParams.update(mpl.rcParamsDefault) lmbd = 0.8 x=1 def cdf_func(xdata): val = np.piecewise(xdata, [xdata<0, xdata==0, (xdata>0) & (xdata<1), xdata==1, (xdata>1) & (xdata<2), xdata==2, (xdata>2) & (xdata<3), xdata==3, xdata>3], [0, np.nan, 1/2, np.nan, 0, np.nan, 1/2, np.nan, 0]) return val xdata = np.linspace(-0.5, 3.5, 1000) plt.plot(xdata, cdf_func(xdata), linewidth=3) xshade = xdata[xdata<=x] plt.ylim(0, 0.6) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.show(); # In[14]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np from ipywidgets import interact, FloatSlider plt.rcParams['figure.figsize'] = (12, 8) import matplotlib as mpl mpl.rcParams.update(mpl.rcParamsDefault) lmbd = 0.8 x=1 def func(xdata): f = lambda y: np.divide(y,2) return f(xdata) def cdf_func(xdata): val = np.piecewise(xdata, [xdata<=0, (xdata>0) & (xdata<1), (xdata>=1) & (xdata<=2), (xdata>2) & (xdata<3), xdata>=3], [0, lambda x: x/2, 1/2, lambda x: x/2-1/2, 1]) return val xdata = np.linspace(-0.5, 3.5, 1000) plt.plot(xdata, cdf_func(xdata), linewidth=3) xshade = xdata[xdata<=x] plt.ylim(0, 1.1) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.show(); # In[15]: func(xdata).shape # In[29]: import numpy as np import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (8, 4) data = np.array([ -1.42, -0.31, -0.73, -0.51, -2.23, -0.32, 1.38, 0.32, -0.66, 0.01, -0.7, -1.86, -1.07, 0.1, -0.59, 0.58, -0.63, -0.87, -4.65, -1.14 ]) def epmf(data, inter, N): epmf_values = np.zeros(N) for i in range(N): length = inter[i+1]-inter[i] epmf_values[i] = np.sum((inter[i]<=data) & (data=2)&(x<=4), (x>=4)], [f1, f2, f3]) return z plt.plot(x, condmean(x), color="green") plt.xlim(0,6) plt.ylim(0,4) plt.show(); # ### 6.1-4. # In[116]: # nbi:hide_in # library import matplotlib.pyplot as plt import numpy as np plt.rcParams["figure.figsize"] = (15, 6) # generate data data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331, 320, 320, 317, 329, 316, 308, 321, 319, 322, 335, 318, 313, 327, 314, 329, 323, 327, 323, 324, 314, 308, 305, 328, 330, 322, 310, 324, 314, 312, 318, 313, 320, 324, 311, 317, 325, 328, 319, 310, 324]) # compute empirical pmf def epmf(data): erange_x, counts = np.unique(data, return_counts=True) epmf_values = counts return epmf_values, erange_x epmf_values, erange_x = epmf(data) # plot plt.axhline(y=0, color='k') plt.xticks(erange_x) plt.bar(erange_x, epmf_values, width=1, color='#039be5', edgecolor='black', linewidth=1) mean = np.mean(data) std = np.sqrt(np.var(data, ddof=1)) plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50) plt.vlines([mean-std, mean+std], -0.3,3.5, label="1 std from mean", zorder=3) plt.vlines([mean-2*std, mean+2*std], -0.3,3.5, linestyle="dashed", label="2 std from mean", zorder=3) plt.legend(loc='upper left') plt.show(); # In[18]: np.sort(data) # In[19]: np.mean(data) # In[78]: np.sqrt(np.var(data, ddof=1)) # In[22]: data.shape # In[140]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (10, 6) N=100 # total number of samples # intsize the number of class intervals # generate data data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331, 320, 320, 317, 329, 316, 308, 321, 319, 322, 335, 318, 313, 327, 314, 329, 323, 327, 323, 324, 314, 308, 305, 328, 330, 322, 310, 324, 314, 312, 318, 313, 320, 324, 311, 317, 325, 328, 319, 310, 324]) # Add the histogram plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1) # Add the mean and variances mean = np.mean(data) std = np.sqrt(np.var(data, ddof=1)) plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50) plt.vlines([mean-std, mean+std], -1,10, label="1 std from mean", linewidth=1.2, zorder=3) plt.vlines([mean-2*std, mean+2*std], -1,10, linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3) plt.legend(loc='upper left') plt.show(); # In[ ]: # In[135]: [mean-std, mean+std, mean-2*std, mean+2*std] # In[138]: print(np.sum([(mean-std<=data) & (data<=mean+std)])) print(np.sum([(mean-2*std<=data) & (data<=mean+2*std)])) # In[136]: [(mean-std<=data) & (data<=mean+std)] # ### 6.2-2. # In[137]: data # In[141]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt plt.rcParams["figure.figsize"] = (10, 6) N=100 # total number of samples # intsize the number of class intervals # generate data data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03, 1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04, 1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02, 1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04, 1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05, 1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05, 1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03, 1.05, 1.05, 1.05, 1.03, 1.04]) data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26, 1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15, 1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24, 1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27. 1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18, 1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21, 1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26, 1.12, 1.10, 1.19, 1.10, 1.24]) # Add the histogram plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1) # Add the mean and variances mean = np.mean(data) std = np.sqrt(np.var(data, ddof=1)) plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50) plt.vlines([mean-std, mean+std], -1,10, label="1 std from mean", linewidth=1.2, zorder=3) plt.vlines([mean-2*std, mean+2*std], -1,10, linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3) plt.legend(loc='upper left') plt.show(); # In[228]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np # set up the figure fig, ax = plt.subplots(1,1, figsize=(10,5)) # generate data data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03, 1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04, 1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02, 1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04, 1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05, 1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05, 1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03, 1.05, 1.05, 1.05, 1.03, 1.04]).astype(float) data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26, 1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15, 1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24, 1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27, 1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18, 1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21, 1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26, 1.12, 1.10, 1.19, 1.10, 1.24]).astype(float) def percentile(data, p): """ Compute the percentiles the way we defined in class data : array of size N p : percentile """ data = np.sort(data, axis=0) rank = int(p * (data.shape[0] + 1) - 1) # the rank assert rank > 0, "the rank does not exist" alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part return data[rank] + alpha * (data[rank + 1] - data [rank]) def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), textloc=np.array([0.8]), label = ""): """ ax : matplotlib ax data : the data width : box width showout : show the outliers position: the y axis of the box plot """ # compute the five number summary minim = np.min(data) maxim = np.max(data) q1 = percentile(data, 0.25) q2 = np.median(data) q3 = percentile(data, 0.75) # interquartile range iqr = q3 - q1 # inner fences left_innerfence = q1 - 1.5 * iqr right_innerfence = q3 + 1.5 * iqr # compute outliers outliers = [] # whiskers if showout==True: outliers = data[np.logical_or(data = right_innerfence)] low_whisker = np.min(data[data >= left_innerfence]) high_whisker = np.max(data[data <= right_innerfence]) else: low_whisker = np.min(data) high_whisker = np.max(data) stats = [{'iqr': iqr, 'whishi': high_whisker, 'whislo': low_whisker, 'fliers': outliers, 'q1': q1, 'med': q2, 'q3': q3}] # add the box plot flierprops = dict(markerfacecolor='black', markersize=5) ax.bxp(stats, vert = False, widths=width, positions = position, flierprops=flierprops, showfliers=showout) # add Tukey's fences if showout==True: ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) # plt.figtext(1,textloc, r"$\min={:.4}$".format(minim)+"\n"+ r"$q_1={:.4}$".format(q1)+"\n"+ r"med$={:.4}$".format(q2)+"\n"+ r"$q_3={:.4}$".format(q3)+"\n"+ r"max$={:.4}$".format(maxim), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") def disp_data(ax, data): ax.scatter(data, np.zeros(data.shape), zorder=2, s=10) ax.set_yticks([]) # ax.set_xticks([]) mean = np.mean(data) ax.scatter(mean, 0, zorder=2, s=20, color="red") ax.set_ylim(-0.01,0.1) ax.axhline(y=0, color='k', zorder=1, linewidth=0.5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.set_ylim(-0.1,1.5) box_plot(ax, data2, width=0.7, showout=False, position = np.array([1]), textloc = np.array([0.4]), label = "Regular carrots") box_plot(ax, data1, width=0.7, showout=False, position = np.array([2]), textloc = np.array([0.8]), label = "Baby carrots" ) ax.set_yticklabels(["Regular-sized carrots", "Baby-sized carrots" ]) plt.show(); # In[227]: np.set_printoptions(formatter={'float': '{: 0.2f}'.format}, linewidth=90) print(repr(np.sort(data2))) # ### 6.2-6 # In[231]: s= "25 9 5 5 5 9 6 5 15 4555 6 5 6 24 21 16 5 8 77 5 5 35 13 9 5 18 6 1019 16 21 8 13 5 9 10 10 623 8 5 10 15 7 5 5 24 911 34 12 11 17 11 16 5 15 512 6 5 5 7 6 17 20 7 88 6 10 11 6 7 5 12 11 186 21 6 5 24 7 16 21 23 1511 8 6 8 14 11 6 9 6 10" re.sub("\s+", ",", s.strip()) # In[345]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np import stemgraphic plt.rcParams["figure.figsize"] = (20, 20) plt.rcParams['figure.dpi'] = 300 # generate data data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6, 24,21,16,5,8,7,7,5,5,35,13,9,5,18, 6,10, 19,16,21,8,13,5,9,10,10,6, 23,8, 5,10,15,7,5,5,24,9, 11,34,12,11,17,11, 16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6, 10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16, 21,23,15, 11,8,6,8,14,11,6,9,6,10]) stemgraphic.stem_graphic(data, scale = 10, legend_pos=None, alpha=0,outliers=False) plt.show(); # In[325]: data.shape # In[362]: np.sort(data)[90] # In[337]: np.set_printoptions() # In[328]: data # In[359]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np # set up the figure fig, ax = plt.subplots(1,1, figsize=(10,5)) # generate data # generate data data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6, 24,21,16,5,8,7,7,5,5,35,13,9,5,18, 6,10, 19,16,21,8,13,5,9,10,10,6, 23,8, 5,10,15,7,5,5,24,9, 11,34,12,11,17,11, 16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6, 10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16, 21,23,15, 11,8,6,8,14,11,6,9,6,10]).astype(float) def percentile(data, p): """ Compute the percentiles the way we defined in class data : array of size N p : percentile """ data = np.sort(data, axis=0) rank = int(p * (data.shape[0] + 1) - 1) # the rank assert rank > 0, "the rank does not exist" alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part return data[rank] + alpha * (data[rank + 1] - data [rank]) def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), textloc=np.array([0.8]), label = ""): """ ax : matplotlib ax data : the data width : box width showout : show the outliers position: the y axis of the box plot """ # compute the five number summary minim = np.min(data) maxim = np.max(data) q1 = percentile(data, 0.25) q2 = np.median(data) q3 = percentile(data, 0.75) # interquartile range iqr = q3 - q1 # inner fences left_innerfence = q1 - 1.5 * iqr right_innerfence = q3 + 1.5 * iqr # compute outliers outliers = [] # whiskers if showout==True: outliers = data[np.logical_or(data = right_innerfence)] low_whisker = np.min(data[data >= left_innerfence]) high_whisker = np.max(data[data <= right_innerfence]) else: low_whisker = np.min(data) high_whisker = np.max(data) stats = [{'iqr': iqr, 'whishi': high_whisker, 'whislo': low_whisker, 'fliers': outliers, 'q1': q1, 'med': q2, 'q3': q3}] # add the box plot flierprops = dict(markerfacecolor='black', markersize=5) ax.bxp(stats, vert = False, widths=width, positions = position, flierprops=flierprops, showfliers=showout) # add Tukey's fences if showout==True: ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) print ("iqr={}\n".format(iqr)+"\n"+ "left inner fence={}".format(q1-1.5*iqr)+"\n"+ "right inner fence={}".format(q3+1.5*iqr)+"\n"+ "left outer fence={}".format(q1-3*iqr)+"\n"+ "right ouer fence={}".format(q3+3*iqr) ) # ax.set_yticks([]) plt.figtext(1,textloc, r"$\min={:.4}$".format(minim)+"\n"+ r"$q_1={:.4}$".format(q1)+"\n"+ r"med$={:.4}$".format(q2)+"\n"+ r"$q_3={:.4}$".format(q3)+"\n"+ r"max$={:.4}$".format(maxim), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") def disp_data(ax, data): ax.scatter(data, np.zeros(data.shape), zorder=2, s=10) ax.set_yticks([]) # ax.set_xticks([]) mean = np.mean(data) ax.scatter(mean, 0, zorder=2, s=20, color="red") ax.set_ylim(-0.01,0.1) ax.axhline(y=0, color='k', zorder=1, linewidth=0.5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.set_ylim(-0.1,1.5) box_plot(ax, data, width=0.7, showout=True, position = np.array([1]), textloc = np.array([0.8]) ) plt.show(); # In[363]: 21+0.9* (23-21) # In[364]: percentile(data, 0.9) # ### 6.4-6 # In[369]: s="31.5 36.9 33.8 30.1 33.9 35.2 29.6 34.4 30.5 34.2 31.6 36.7 35.8 34.5 32.7" re.sub("\s+", ",", s.strip()) re.sub("\s+", "+", s.strip()) # In[368]: import numpy data = np.array([31.5,36.9,33.8,30.1,33.9,35.2,29.6,34.4,30.5,34.2,31.6,36.7,35.8,34.5,32.7]) print("mean = {}".format(np.mean(data))+"\n"+"var = {}".format(np.var(data))) # ### 7.1-7 # In[371]: s="21.50 18.95 18.55 19.40 19.15 22.35 22.90 22.20 23.10" re.sub("\s+", ",", s.strip()) # re.sub("\s+", "+", s.strip()) # In[375]: import numpy data = np.array([21.50,18.95,18.55,19.40,19.15,22.35,22.90,22.20,23.10]) print("mean = {}".format(np.mean(data))+"\n"+"S = {}".format(np.sqrt(np.var(data, ddof=1)))) # ### 7.1-5 # In[2]: import re s="93 140 8 120 3 120 33 70 91 61 7 100 19 98 110 23 14 94 57 9 66 53 28 76 58 9 73 49 37 92" re.sub("\s+", ",", s.strip()) # re.sub("\s+", "+", s.strip()) # In[6]: import numpy as np data=np.array([37.4, 48.8, 46.9, 55.0, 44.0]) print(np.mean(data), np.sqrt(np.var(data, ddof=1))) # ### 7.1-8 # In[1]: import numpy as np data=np.array([93,140,8,120,3,120,33,70,91,61,7,100,19,98,110,23,14,94,57,9,66,53,28,76,58,9,73,49,37,92]) print(np.mean(data), np.sqrt(np.var(data, ddof=1))) # ### 8.1-8 # In[2]: import re s="3.4 3.6 3.8 3.3 3.4 3.5 3.7 3.6 3.7" re.sub("\s+", ",", s.strip()) # re.sub("\s+", "+", s.strip()) # In[5]: import numpy as np data=np.array([3.4,3.6,3.8,3.3,3.4,3.5,3.7,3.6,3.7]) print(np.mean(data), np.sqrt(np.var(data, ddof=1))) # ### 8.1-12 # In[6]: import re s1="265 272 246 260 274 263 255 258 276 274 274 269 244 212 235 254 224" s2="252 276 243 246 275 246 244 245 259 260 267 267 251 222 235 255 231" print(re.sub("\s+", ",", s1.strip())) print(re.sub("\s+", ",", s2.strip())) # re.sub("\s+", "+", s.strip()) # In[10]: D1=np.array([265,272,246,260,274,263,255,258,276,274,274,269,244,212,235,254,224]) D2=np.array([252,276,243,246,275,246,244,245,259,260,267,267,251,222,235,255,231]) data=D1-D2 import numpy as np print(np.mean(data), np.sqrt(np.var(data, ddof=1))) # # Final # # In[18]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np # set up the figure fig, ax = plt.subplots(1,1, figsize=(12,5)) # data data = np.array([0.1, 0.5, 1, 1.2, 3, 0.8, 1.6, -3, 2, 10]).astype(float) # data = np.array([-30,-1, -5, -0.5, 0.5, 0.6, 0, 2, 3, 4.6, 4, 7, 18, 35]).astype(float) def percentile(data, p): """ Compute the percentiles the way we defined in class data : array of size N p : percentile """ data = np.sort(data, axis=0) rank = int(p * (data.shape[0] + 1) - 1) # the rank assert rank > 0, "the rank does not exist" alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part return data[rank] + alpha * (data[rank + 1] - data [rank]) def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4])): """ ax : matplotlib ax data : the data width : box width showout : show the outliers position: the y axis of the box plot """ # compute the five number summary minim = np.min(data) maxim = np.max(data) q1 = percentile(data, 0.25) q2 = np.median(data) q3 = percentile(data, 0.75) # interquartile range iqr = q3 - q1 # inner fences left_innerfence = q1 - 1.5 * iqr right_innerfence = q3 + 1.5 * iqr # compute outliers outliers = data[np.logical_or(data = right_innerfence)] # whiskers if showout==True: low_whisker = np.min(data[data >= left_innerfence]) high_whisker = np.max(data[data <= right_innerfence]) else: low_whisker = np.min(data) high_whisker = np.max(data) stats = [{'iqr': iqr, 'whishi': high_whisker, 'whislo': low_whisker, 'fliers': outliers, 'q1': q1, 'med': q2, 'q3': q3}] # add the box plot flierprops = dict(markerfacecolor='black', markersize=5) ax.bxp(stats, vert = False, widths=width, positions = position, flierprops=flierprops, showfliers=showout) # add Tukey's fences if showout==True: ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) # ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.set_ylim(-0.1,position+0.3) ax.set_yticks([]) plt.figtext(1,0.8, r"$\min={:.4}$".format(minim)+"\n"+ r"$q_1={:.4}$".format(q1)+"\n"+ r"med$={:.4}$".format(q2)+"\n"+ r"$q_3={:.4}$".format(q3)+"\n"+ r"max$={:.4}$".format(maxim), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") def disp_data(ax, data): ax.scatter(data, np.zeros(data.shape), zorder=2, s=10) ax.set_yticks([]) # ax.set_xticks([]) mean = np.mean(data) ax.scatter(mean, 0, zorder=2, s=20, color="red") ax.set_ylim(-0.01,0.1) ax.axhline(y=0, color='k', zorder=1, linewidth=0.5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.set_ylim(-0.1,1.5) box_plot(ax, data, width=0.2, showout=True) plt.show(); # ## problem 4 # # In[36]: # nbi:hide_in import numpy as np import matplotlib.pyplot as plt import numpy as np plt.figure(figsize=(15,10)) meanx=1 meany=2 varx=1/3 cov = 6/5 def pdf(X,Y): Z = np.zeros(X.shape) cond = (0<=X) & (X<=4) & (-1/2<=Y-X**3) & (Y-X**3<=1/2) Z[cond] = 1/4 return Z x = np.linspace(0, 2, 1000) y = np.linspace(-2, 8.5 , 1000) X, Y = np.meshgrid(x, y) Z = pdf(X, Y) plt.contourf(X, Y, Z, 20, cmap = "Blues", zorder=1) cb=plt.colorbar() cb.remove() xval = np.linspace(0, 2, 100) plt.plot(xval,xval**3, linewidth=4, label="conditional mean function", color=(0.2,0.7,0.3), zorder=2) yval = (xval-meanx)*cov/varx + meany plt.plot(xval,yval, linewidth=4, label="least squares line", color="y", zorder=3) plt.scatter(meanx, meany, color = "red", s=200 , label ="mean", zorder=4) plt.grid(True) plt.legend() # plt.title("Fitting least squares line and conditional mean to a joint distribution") plt.xlabel("x") plt.ylabel("y") plt.draw() # ### problem 5 # # In[2]: # nbi:hide_in import matplotlib.pyplot as plt import numpy as np # set up the figure fig, ax = plt.subplots(1,1, figsize=(10,5)) # generate data # generate data data = np.array([0.1, 0.5, 1, 1.2, 3, 0.8, 1.6, -3, 2, 10]).astype(float) def percentile(data, p): """ Compute the percentiles the way we defined in class data : array of size N p : percentile """ data = np.sort(data, axis=0) rank = int(p * (data.shape[0] + 1) - 1) # the rank assert rank > 0, "the rank does not exist" alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part return data[rank] + alpha * (data[rank + 1] - data [rank]) def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]), textloc=np.array([0.8]), label = ""): """ ax : matplotlib ax data : the data width : box width showout : show the outliers position: the y axis of the box plot """ # compute the five number summary minim = np.min(data) maxim = np.max(data) q1 = percentile(data, 0.25) q2 = np.median(data) q3 = percentile(data, 0.75) # interquartile range iqr = q3 - q1 # inner fences left_innerfence = q1 - 1.5 * iqr right_innerfence = q3 + 1.5 * iqr # compute outliers outliers = [] # whiskers if showout==True: outliers = data[np.logical_or(data = right_innerfence)] low_whisker = np.min(data[data >= left_innerfence]) high_whisker = np.max(data[data <= right_innerfence]) else: low_whisker = np.min(data) high_whisker = np.max(data) stats = [{'iqr': iqr, 'whishi': high_whisker, 'whislo': low_whisker, 'fliers': outliers, 'q1': q1, 'med': q2, 'q3': q3}] # add the box plot flierprops = dict(markerfacecolor='black', markersize=5) ax.bxp(stats, vert = False, widths=width, positions = position, flierprops=flierprops, showfliers=showout) # add Tukey's fences if showout==True: ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1) print ("iqr={}\n".format(iqr)+"\n"+ "left inner fence={}".format(q1-1.5*iqr)+"\n"+ "right inner fence={}".format(q3+1.5*iqr)+"\n"+ "left outer fence={}".format(q1-3*iqr)+"\n"+ "right ouer fence={}".format(q3+3*iqr) ) # ax.set_yticks([]) plt.figtext(1,textloc, r"$\min={:.4}$".format(minim)+"\n"+ r"$q_1={:.4}$".format(q1)+"\n"+ r"med$={:.4}$".format(q2)+"\n"+ r"$q_3={:.4}$".format(q3)+"\n"+ r"max$={:.4}$".format(maxim), ha="left", va="top", backgroundcolor=(0.1, 0.1, 1, 0.15), fontsize="large") def disp_data(ax, data): ax.scatter(data, np.zeros(data.shape), zorder=2, s=10) ax.set_yticks([]) # ax.set_xticks([]) mean = np.mean(data) ax.scatter(mean, 0, zorder=2, s=20, color="red") ax.set_ylim(-0.01,0.1) ax.axhline(y=0, color='k', zorder=1, linewidth=0.5) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.set_ylim(-0.1,1.5) box_plot(ax, data, width=0.7, showout=True, position = np.array([1]), textloc = np.array([0.8]) ) plt.show(); # In[3]: np.sort(data) # ## 6 # In[53]: data=np.array([423.90, 420.24, 431.00, 418.76, 428.68, 423.64, 430.65, 432.92, 421.93, 433.97, 426.10, 430.20]) # In[55]: np.mean(data) # In[56]: np.sqrt(np.var(data, ddof=1)) # In[ ]: