###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(4,6))
for x in range(1,5):
for y in range(x+1,x+5):
plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
plt.text(x+0.1,y+0.2, r"$\frac{1}{16}$")
for x in range(1,5):
plt.text(x-0.1,-0.5, r"$\frac{1}{4}$", color="red", size="large")
meany=0
for y in range(2,9):
val = 4- np.abs(5-y)
plt.text(-1.5,y-0.13, r"$\frac{{{}}}{{16}}$".format(val), color="red", size="large")
meany+=val*y
print(r"$\mu_Y = \frac{{{}}}{{16}}$".format(meany))
plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)
plt.show();
$\mu_Y = \frac{80}{16}$
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(4,6))
for x in range(1,5):
for y in range(x+1,x+5):
plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
plt.text(x+0.1,y+0.2, r"$\frac{1}{4}$", color="red")
plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(6,10))
for x in range(1,5):
for y in range(x+1,x+5):
plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
xval=np.array([1, 2, 3, 4])
yval = xval+2.5
ycond = np.array([3.5, 4.5, 5.5, 6.5])
plt.plot(xval,yval, linewidth=2, label="least squares", color="orange")
plt.scatter(xval,ycond, linewidth=2, label="conditional mean")
plt.xticks(np.arange(1,5,1))
plt.yticks(np.arange(2,9,1))
plt.ylim(1,9)
plt.xlim(0,5)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.legend()
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(10,10))
for x in range(0,10):
for y in range(x,10):
plt.scatter(x,y, color="red", edgecolor="black",alpha=1)
plt.text(x+0.1,y+0.2, r"$\frac{{1}}{{{}}}$".format(10*(10-x)))
for x in range(0,10):
plt.text(x-0.1,-1.4, r"$\frac{1}{10}$", color="red", size="large")
meany=0
val = 0
for y in range(0,10):
val += 1/(10*(10-y))
plt.text(-2,y-0.13, "{:.4f}".format(val), color="red", size="large")
plt.xticks(np.arange(0,10,1))
plt.yticks(np.arange(0,10,1))
plt.ylim(-0.5,10)
plt.xlim(-0.5,10)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.grid(True)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(5,5))
x = np.linspace(0, 1, 100)
plt.fill_between(x, x**2, 1, alpha=0.5)
plt.plot(x,x**2, color="red", label=r"$y=x^2$")
plt.text(0.7,0.4, r"$y=x^2$")
plt.xticks([0,1])
plt.yticks([1])
plt.xlim(0,1)
plt.ylim(0,1)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(5,5))
x = np.linspace(0, 1, 100)
xval = np.linspace(1/2,1, 100)
plt.fill_between(xval, np.maximum(xval**2, 1/2), 1, alpha=0.5)
plt.plot(x,x**2, color="red", label=r"$y=x^2$")
plt.text(0.7,0.4, r"$y=x^2$")
plt.xticks([1/2, 1], [r"$\frac{1}{2}$", 1])
plt.yticks([0,1/2, 1], [0, r"$\frac{1}{2}$", 1])
plt.xlim(0,1)
plt.ylim(0,1)
plt.vlines(1/2,0,1, linestyles='dashed', linewidth=0.7)
plt.hlines(1/2,0,1, linestyles='dashed', linewidth=0.7)
plt.xlabel("x", color="blue")
plt.ylabel("y", color="blue")
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(5,5))
x = np.linspace(0, 1, 100)
plt.fill_between(x, x, 1, alpha=0.5)
plt.plot(x,x, color="red", label=r"$y=x^2$")
plt.text(0.6,0.5, r"$x=y$")
plt.xticks([0,1])
plt.yticks([1])
plt.xlim(0,1)
plt.ylim(0,1)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(5,5))
x = np.linspace(0, 1, 100)
plt.fill_between(x, x, 1, alpha=0.5)
plt.text(0.6,0.5, r"$x=y$")
plt.xticks([0,1])
plt.yticks([1])
plt.xlim(0,1)
plt.ylim(0,1)
meanx=8/15
meany=4/5
sigma2x=11/225
cov=4/225
xval = np.linspace(-0.1, 1.1, 100)
yval = (xval-meanx)*cov/sigma2x + meany
plt.plot(xval,yval, linewidth=2, label="least squares line", color="g")
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(5,5))
y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)
plt.xlim(0,6)
plt.ylim(0,4)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(6,4))
y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)
x=np.linspace(0,6,100)
plt.plot(x, x-1, color="green")
plt.xlim(0,6)
plt.ylim(0,4)
plt.show();
###### nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
plt.figure(figsize=(6,4))
y = np.linspace(0, 4, 100)
plt.fill_betweenx(y, y, y+2, alpha=0.5)
x=np.linspace(0,6,100)
def condmean(x):
f1=lambda x: x/2
f2=lambda x: x-1
f3=lambda x: (x+2)/2
z=np.piecewise(x, [x<2, (x>=2)&(x<=4), (x>=4)], [f1, f2, f3])
return z
plt.plot(x, condmean(x), color="green")
plt.xlim(0,6)
plt.ylim(0,4)
plt.show();
# nbi:hide_in
# library
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams["figure.figsize"] = (15, 6)
# generate data
data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331,
320, 320, 317, 329, 316, 308, 321, 319, 322, 335,
318, 313, 327, 314, 329, 323, 327, 323, 324, 314,
308, 305, 328, 330, 322, 310, 324, 314, 312, 318,
313, 320, 324, 311, 317, 325, 328, 319, 310, 324])
# compute empirical pmf
def epmf(data):
erange_x, counts = np.unique(data, return_counts=True)
epmf_values = counts
return epmf_values, erange_x
epmf_values, erange_x = epmf(data)
# plot
plt.axhline(y=0, color='k')
plt.xticks(erange_x)
plt.bar(erange_x, epmf_values, width=1, color='#039be5', edgecolor='black', linewidth=1)
mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))
plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -0.3,3.5, label="1 std from mean", zorder=3)
plt.vlines([mean-2*std, mean+2*std], -0.3,3.5, linestyle="dashed", label="2 std from mean", zorder=3)
plt.legend(loc='upper left')
plt.show();
np.sort(data)
array([305, 308, 308, 310, 310, 311, 312, 313, 313, 314, 314, 314, 316, 316, 317, 317, 317, 318, 318, 318, 319, 319, 320, 320, 320, 320, 320, 321, 322, 322, 322, 323, 323, 324, 324, 324, 324, 325, 325, 326, 327, 327, 328, 328, 329, 329, 329, 330, 331, 335])
np.mean(data)
320.1
np.sqrt(np.var(data, ddof=1))
6.7499055171014
data.shape
(50,)
# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 6)
N=100 # total number of samples
# intsize the number of class intervals
# generate data
data = np.array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331,
320, 320, 317, 329, 316, 308, 321, 319, 322, 335,
318, 313, 327, 314, 329, 323, 327, 323, 324, 314,
308, 305, 328, 330, 322, 310, 324, 314, 312, 318,
313, 320, 324, 311, 317, 325, 328, 319, 310, 324])
# Add the histogram
plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1)
# Add the mean and variances
mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))
plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -1,10, label="1 std from mean", linewidth=1.2, zorder=3)
plt.vlines([mean-2*std, mean+2*std], -1,10, linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3)
plt.legend(loc='upper left')
plt.show();
[mean-std, mean+std, mean-2*std, mean+2*std]
[313.3500944828986, 326.84990551710143, 306.6001889657972, 333.59981103420284]
print(np.sum([(mean-std<=data) & (data<=mean+std)]))
print(np.sum([(mean-2*std<=data) & (data<=mean+2*std)]))
31 48
[(mean-std<=data) & (data<=mean+std)]
[array([ True, True, True, True, True, True, False, True, True, False, True, True, True, False, True, False, True, True, True, False, True, False, False, True, False, True, False, True, True, True, False, False, False, False, True, False, True, True, False, True, False, True, True, False, True, True, False, True, False, True])]
data
array([320, 326, 325, 318, 322, 320, 329, 317, 316, 331, 320, 320, 317, 329, 316, 308, 321, 319, 322, 335, 318, 313, 327, 314, 329, 323, 327, 323, 324, 314, 308, 305, 328, 330, 322, 310, 324, 314, 312, 318, 313, 320, 324, 311, 317, 325, 328, 319, 310, 324])
# nbi:hide_in
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (10, 6)
N=100 # total number of samples
# intsize the number of class intervals
# generate data
data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03,
1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04,
1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02,
1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04,
1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05,
1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05,
1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03,
1.05, 1.05, 1.05, 1.03, 1.04])
data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26,
1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15,
1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24,
1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27.
1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18,
1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21,
1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26,
1.12, 1.10, 1.19, 1.10, 1.24])
# Add the histogram
plt.hist(data, bins='auto', color='#039be5', edgecolor='black', linewidth=1)
# Add the mean and variances
mean = np.mean(data)
std = np.sqrt(np.var(data, ddof=1))
plt.scatter(mean, 0, color="red", label="mean", zorder=3, s=50)
plt.vlines([mean-std, mean+std], -1,10, label="1 std from mean", linewidth=1.2, zorder=3)
plt.vlines([mean-2*std, mean+2*std], -1,10, linestyle="dashed", linewidth=1.2, label="2 std from mean", zorder=3)
plt.legend(loc='upper left')
plt.show();
# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
# set up the figure
fig, ax = plt.subplots(1,1, figsize=(10,5))
# generate data
data1 = np.array([1.03, 1.03, 1.06, 1.02, 1.03, 1.03, 1.03, 1.02, 1.03, 1.03,
1.06, 1.04, 1.05, 1.03, 1.04, 1.03, 1.05, 1.06, 1.04, 1.04,
1.03, 1.04, 1.04, 1.06, 1.03, 1.04, 1.05, 1.04, 1.04, 1.02,
1.03, 1.05, 1.05, 1.03, 1.04, 1.03, 1.04, 1.04, 1.03, 1.04,
1.03, 1.04, 1.04, 1.04, 1.05, 1.04, 1.04, 1.03, 1.03, 1.05,
1.04, 1.04, 1.05, 1.04, 1.03, 1.03, 1.05, 1.03, 1.04, 1.05,
1.04, 1.04, 1.04, 1.05, 1.03, 1.04, 1.04, 1.04, 1.04, 1.03,
1.05, 1.05, 1.05, 1.03, 1.04]).astype(float)
data2 = np.array([1.29, 1.10, 1.28, 1.29, 1.23, 1.20, 1.31, 1.25, 1.13, 1.26,
1.19, 1.33, 1.24, 1.20, 1.26, 1.24, 1.11, 1.14, 1.15, 1.15,
1.19, 1.26, 1.14, 1.20, 1.20, 1.20, 1.24, 1.25, 1.28, 1.24,
1.26, 1.20, 1.30, 1.23, 1.26, 1.16, 1.34, 1.10, 1.22, 1.27,
1.21, 1.09, 1.23, 1.03, 1.32, 1.21, 1.23, 1.34, 1.19, 1.18,
1.20, 1.20, 1.13, 1.43, 1.19, 1.05, 1.16, 1.19, 1.07, 1.21,
1.36, 1.21, 1.00, 1.23, 1.22, 1.13, 1.24, 1.10, 1.18, 1.26,
1.12, 1.10, 1.19, 1.10, 1.24]).astype(float)
def percentile(data, p):
"""
Compute the percentiles the way we defined in class
data : array of size N
p : percentile
"""
data = np.sort(data, axis=0)
rank = int(p * (data.shape[0] + 1) - 1) # the rank
assert rank > 0, "the rank does not exist"
alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
return data[rank] + alpha * (data[rank + 1] - data [rank])
def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]),
textloc=np.array([0.8]), label = ""):
"""
ax : matplotlib ax
data : the data
width : box width
showout : show the outliers
position: the y axis of the box plot
"""
# compute the five number summary
minim = np.min(data)
maxim = np.max(data)
q1 = percentile(data, 0.25)
q2 = np.median(data)
q3 = percentile(data, 0.75)
# interquartile range
iqr = q3 - q1
# inner fences
left_innerfence = q1 - 1.5 * iqr
right_innerfence = q3 + 1.5 * iqr
# compute outliers
outliers = []
# whiskers
if showout==True:
outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
low_whisker = np.min(data[data >= left_innerfence])
high_whisker = np.max(data[data <= right_innerfence])
else:
low_whisker = np.min(data)
high_whisker = np.max(data)
stats = [{'iqr': iqr,
'whishi': high_whisker,
'whislo': low_whisker,
'fliers': outliers,
'q1': q1,
'med': q2,
'q3': q3}]
# add the box plot
flierprops = dict(markerfacecolor='black', markersize=5)
ax.bxp(stats, vert = False, widths=width, positions = position,
flierprops=flierprops, showfliers=showout)
# add Tukey's fences
if showout==True:
ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
#
plt.figtext(1,textloc,
r"$\min={:.4}$".format(minim)+"\n"+
r"$q_1={:.4}$".format(q1)+"\n"+
r"med$={:.4}$".format(q2)+"\n"+
r"$q_3={:.4}$".format(q3)+"\n"+
r"max$={:.4}$".format(maxim),
ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15),
fontsize="large")
def disp_data(ax, data):
ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
ax.set_yticks([])
# ax.set_xticks([])
mean = np.mean(data)
ax.scatter(mean, 0, zorder=2, s=20, color="red")
ax.set_ylim(-0.01,0.1)
ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_ylim(-0.1,1.5)
box_plot(ax, data2, width=0.7, showout=False, position = np.array([1]),
textloc = np.array([0.4]), label = "Regular carrots")
box_plot(ax, data1, width=0.7, showout=False, position = np.array([2]),
textloc = np.array([0.8]), label = "Baby carrots" )
ax.set_yticklabels(["Regular-sized carrots", "Baby-sized carrots" ])
plt.show();
np.set_printoptions(formatter={'float': '{: 0.2f}'.format}, linewidth=90)
print(repr(np.sort(data2)))
array([ 1.00, 1.03, 1.05, 1.07, 1.09, 1.10, 1.10, 1.10, 1.10, 1.10, 1.11, 1.12, 1.13, 1.13, 1.13, 1.14, 1.14, 1.15, 1.15, 1.16, 1.16, 1.18, 1.18, 1.19, 1.19, 1.19, 1.19, 1.19, 1.19, 1.20, 1.20, 1.20, 1.20, 1.20, 1.20, 1.20, 1.20, 1.21, 1.21, 1.21, 1.21, 1.22, 1.22, 1.23, 1.23, 1.23, 1.23, 1.23, 1.24, 1.24, 1.24, 1.24, 1.24, 1.24, 1.25, 1.25, 1.26, 1.26, 1.26, 1.26, 1.26, 1.26, 1.27, 1.28, 1.28, 1.29, 1.29, 1.30, 1.31, 1.32, 1.33, 1.34, 1.34, 1.36, 1.43])
s= "25 9 5 5 5 9 6 5 15 4555 6 5 6 24 21 16 5 8 77 5 5 35 13 9 5 18 6 1019 16 21 8 13 5 9 10 10 623 8 5 10 15 7 5 5 24 911 34 12 11 17 11 16 5 15 512 6 5 5 7 6 17 20 7 88 6 10 11 6 7 5 12 11 186 21 6 5 24 7 16 21 23 1511 8 6 8 14 11 6 9 6 10"
re.sub("\s+", ",", s.strip())
'25,9,5,5,5,9,6,5,15,4555,6,5,6,24,21,16,5,8,77,5,5,35,13,9,5,18,6,1019,16,21,8,13,5,9,10,10,623,8,5,10,15,7,5,5,24,911,34,12,11,17,11,16,5,15,512,6,5,5,7,6,17,20,7,88,6,10,11,6,7,5,12,11,186,21,6,5,24,7,16,21,23,1511,8,6,8,14,11,6,9,6,10'
# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
import stemgraphic
plt.rcParams["figure.figsize"] = (20, 20)
plt.rcParams['figure.dpi'] = 300
# generate data
data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6,
24,21,16,5,8,7,7,5,5,35,13,9,5,18,
6,10, 19,16,21,8,13,5,9,10,10,6, 23,8,
5,10,15,7,5,5,24,9, 11,34,12,11,17,11,
16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6,
10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16,
21,23,15, 11,8,6,8,14,11,6,9,6,10])
stemgraphic.stem_graphic(data, scale = 10, legend_pos=None,
alpha=0,outliers=False)
plt.show();
data.shape
(100,)
np.sort(data)[90]
23.0
np.set_printoptions()
data
array([25, 9, 5, 5, 5, 9, 6, 5, 15, 45, 55, 6, 5, 6, 24, 21, 16, 5, 8, 7, 7, 5, 5, 35, 13, 9, 5, 18, 6, 10, 19, 16, 21, 8, 13, 5, 9, 10, 10, 6, 23, 8, 5, 10, 15, 7, 5, 5, 24, 9, 11, 34, 12, 11, 17, 11, 16, 5, 15, 5, 12, 6, 5, 5, 7, 6, 17, 20, 7, 8, 8, 6, 10, 11, 6, 7, 5, 12, 11, 18, 6, 21, 6, 5, 24, 7, 16, 21, 23, 15, 11, 8, 6, 8, 14, 11, 6, 9, 6, 10])
# nbi:hide_in
import matplotlib.pyplot as plt
import numpy as np
# set up the figure
fig, ax = plt.subplots(1,1, figsize=(10,5))
# generate data
# generate data
data = np.array([25,9,5,5,5,9,6,5,15,45, 55,6,5,6,
24,21,16,5,8,7,7,5,5,35,13,9,5,18,
6,10, 19,16,21,8,13,5,9,10,10,6, 23,8,
5,10,15,7,5,5,24,9, 11,34,12,11,17,11,
16,5,15,5, 12,6,5,5,7,6,17,20,7,8, 8,6,
10,11,6,7,5,12,11,18, 6,21,6,5,24,7,16,
21,23,15, 11,8,6,8,14,11,6,9,6,10]).astype(float)
def percentile(data, p):
"""
Compute the percentiles the way we defined in class
data : array of size N
p : percentile
"""
data = np.sort(data, axis=0)
rank = int(p * (data.shape[0] + 1) - 1) # the rank
assert rank > 0, "the rank does not exist"
alpha = p * (data.shape[0] + 1) - 1 - rank # the fractional part
return data[rank] + alpha * (data[rank + 1] - data [rank])
def box_plot(ax, data, width=0.4, showout = True, position = np.array([0.4]),
textloc=np.array([0.8]), label = ""):
"""
ax : matplotlib ax
data : the data
width : box width
showout : show the outliers
position: the y axis of the box plot
"""
# compute the five number summary
minim = np.min(data)
maxim = np.max(data)
q1 = percentile(data, 0.25)
q2 = np.median(data)
q3 = percentile(data, 0.75)
# interquartile range
iqr = q3 - q1
# inner fences
left_innerfence = q1 - 1.5 * iqr
right_innerfence = q3 + 1.5 * iqr
# compute outliers
outliers = []
# whiskers
if showout==True:
outliers = data[np.logical_or(data <left_innerfence, data >= right_innerfence)]
low_whisker = np.min(data[data >= left_innerfence])
high_whisker = np.max(data[data <= right_innerfence])
else:
low_whisker = np.min(data)
high_whisker = np.max(data)
stats = [{'iqr': iqr,
'whishi': high_whisker,
'whislo': low_whisker,
'fliers': outliers,
'q1': q1,
'med': q2,
'q3': q3}]
# add the box plot
flierprops = dict(markerfacecolor='black', markersize=5)
ax.bxp(stats, vert = False, widths=width, positions = position,
flierprops=flierprops, showfliers=showout)
# add Tukey's fences
if showout==True:
ax.vlines(q1-1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q3+1.5*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q1-3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
ax.vlines(q3+3*iqr, position-0.2,position+0.2, linestyle="dashed", linewidth=1)
print ("iqr={}\n".format(iqr)+"\n"+
"left inner fence={}".format(q1-1.5*iqr)+"\n"+
"right inner fence={}".format(q3+1.5*iqr)+"\n"+
"left outer fence={}".format(q1-3*iqr)+"\n"+
"right ouer fence={}".format(q3+3*iqr)
)
#
ax.set_yticks([])
plt.figtext(1,textloc,
r"$\min={:.4}$".format(minim)+"\n"+
r"$q_1={:.4}$".format(q1)+"\n"+
r"med$={:.4}$".format(q2)+"\n"+
r"$q_3={:.4}$".format(q3)+"\n"+
r"max$={:.4}$".format(maxim),
ha="left", va="top",
backgroundcolor=(0.1, 0.1, 1, 0.15),
fontsize="large")
def disp_data(ax, data):
ax.scatter(data, np.zeros(data.shape), zorder=2, s=10)
ax.set_yticks([])
# ax.set_xticks([])
mean = np.mean(data)
ax.scatter(mean, 0, zorder=2, s=20, color="red")
ax.set_ylim(-0.01,0.1)
ax.axhline(y=0, color='k', zorder=1, linewidth=0.5)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.set_ylim(-0.1,1.5)
box_plot(ax, data, width=0.7, showout=True, position = np.array([1]),
textloc = np.array([0.8]) )
plt.show();
iqr=9.0 left inner fence=-7.5 right inner fence=28.5 left outer fence=-21.0 right ouer fence=42.0
21+0.9* (23-21)
22.8
percentile(data, 0.9)
22.80000000000001
s="31.5 36.9 33.8 30.1 33.9 35.2 29.6 34.4 30.5 34.2 31.6 36.7 35.8 34.5 32.7"
re.sub("\s+", ",", s.strip())
re.sub("\s+", "+", s.strip())
'31.5+36.9+33.8+30.1+33.9+35.2+29.6+34.4+30.5+34.2+31.6+36.7+35.8+34.5+32.7'
import numpy
data = np.array([31.5,36.9,33.8,30.1,33.9,35.2,29.6,34.4,30.5,34.2,31.6,36.7,35.8,34.5,32.7])
print("mean = {}".format(np.mean(data))+"\n"+"var = {}".format(np.var(data)))
mean = 33.42666666666666 var = 5.097955555555554
s="21.50 18.95 18.55 19.40 19.15 22.35 22.90 22.20 23.10"
re.sub("\s+", ",", s.strip())
# re.sub("\s+", "+", s.strip())
'21.50,18.95,18.55,19.40,19.15,22.35,22.90,22.20,23.10'
import numpy
data = np.array([21.50,18.95,18.55,19.40,19.15,22.35,22.90,22.20,23.10])
print("mean = {}".format(np.mean(data))+"\n"+"S = {}".format(np.sqrt(np.var(data, ddof=1))))
mean = 20.9 S = 1.8584267540045805
import re
s="93 140 8 120 3 120 33 70 91 61 7 100 19 98 110 23 14 94 57 9 66 53 28 76 58 9 73 49 37 92"
re.sub("\s+", ",", s.strip())
# re.sub("\s+", "+", s.strip())
'93,140,8,120,3,120,33,70,91,61,7,100,19,98,110,23,14,94,57,9,66,53,28,76,58,9,73,49,37,92'
import numpy as np
data=np.array([37.4, 48.8, 46.9, 55.0, 44.0])
print(np.mean(data), np.sqrt(np.var(data, ddof=1)))
46.42 6.456159849322196
import numpy as np
data=np.array([93,140,8,120,3,120,33,70,91,61,7,100,19,98,110,23,14,94,57,9,66,53,28,76,58,9,73,49,37,92])
print(np.mean(data), np.sqrt(np.var(data, ddof=1)))
60.36666666666667 39.621905586415835
import re
s="3.4 3.6 3.8 3.3 3.4 3.5 3.7 3.6 3.7"
re.sub("\s+", ",", s.strip())
# re.sub("\s+", "+", s.strip())
'3.4,3.6,3.8,3.3,3.4,3.5,3.7,3.6,3.7'
import numpy as np
data=np.array([3.4,3.6,3.8,3.3,3.4,3.5,3.7,3.6,3.7])
print(np.mean(data), np.sqrt(np.var(data, ddof=1)))
3.5555555555555554 0.1666666666666667
import re
s1="265 272 246 260 274 263 255 258 276 274 274 269 244 212 235 254 224"
s2="252 276 243 246 275 246 244 245 259 260 267 267 251 222 235 255 231"
print(re.sub("\s+", ",", s1.strip()))
print(re.sub("\s+", ",", s2.strip()))
# re.sub("\s+", "+", s.strip())
265,272,246,260,274,263,255,258,276,274,274,269,244,212,235,254,224 252,276,243,246,275,246,244,245,259,260,267,267,251,222,235,255,231
D1=np.array([265,272,246,260,274,263,255,258,276,274,274,269,244,212,235,254,224])
D2=np.array([252,276,243,246,275,246,244,245,259,260,267,267,251,222,235,255,231])
data=D1-D2
import numpy as np
print(np.mean(data), np.sqrt(np.var(data, ddof=1)))
4.764705882352941 9.086593226869367