np.random.seed(0)
x = np.random.normal(size=21)
x
array([ 1.76405235, 0.40015721, 0.97873798, 2.2408932 , 1.86755799, -0.97727788, 0.95008842, -0.15135721, -0.10321885, 0.4105985 , 0.14404357, 1.45427351, 0.76103773, 0.12167502, 0.44386323, 0.33367433, 1.49407907, -0.20515826, 0.3130677 , -0.85409574, -2.55298982])
bins = np.linspace(-4, 4, 17)
sns.distplot(x, rug=True, kde=False, bins=bins)
plt.title("히스토그램으로 나타낸 데이터 분포")
plt.xlabel("x")
plt.show()
print("표본평균 = {}, 표본중앙값 = {}".format(np.mean(x), np.median(x)))
표본평균 = 0.4206524782418389, 표본중앙값 = 0.4001572083672233
ns, _ = np.histogram(x, bins=bins)
m_bin = np.argmax(ns)
print("최빈구간 = {}~{}".format(bins[m_bin], bins[m_bin + 1]))
최빈구간 = 0.0~0.5
np.random.seed(1)
x = np.random.normal(size=1000)
x = np.hstack([x, 5 * np.ones(50)])
bins = np.linspace(-6, 6, 12 * 4 + 1)
ns, _ = np.histogram(x, bins=bins)
sample_mean = np.mean(x)
sample_median = np.median(x)
mode_index = np.argmax(ns)
sample_mode = 0.5 * (bins[mode_index] + bins[mode_index + 1])
sns.distplot(x, bins=bins)
plt.axvline(sample_mean, c='k', ls=":", label="표본평균")
plt.axvline(sample_median, c='k', ls="--", label="표본중앙값")
plt.axvline(sample_mode, c='k', ls="-", label="표본최빈값")
plt.title("표본평균, 표본중앙값, 표본최빈값의 차이")
plt.xlabel("x")
plt.legend()
plt.show()
sp.random.seed(0)
x = sp.stats.norm(0, 2).rvs(1000) # 평균=0, 표준편차=2 인 정규분포 데이터 생성
np.var(x), np.std(x) # 편향 표본분산, 표본표준편차
(3.896937825248617, 1.9740663173380515)
np.var(x, ddof=1), np.std(x, ddof=1) # 비편향 표본분산, 표본표준편차
(3.900838663912529, 1.9750540913890255)
sp.stats.skew(x), sp.stats.kurtosis(x)
(0.03385895323565712, -0.0467663244783294)
sp.stats.moment(x, 1), sp.stats.moment(x, 2), sp.stats.moment(x, 3), sp.stats.moment(x, 4)
(0.0, 3.896937825248617, 0.2604706193725514, 44.84817401965371)