%load_ext watermark
%watermark -v -p sklearn,numpy,scipy,matplotlib
CPython 3.5.6 IPython 6.5.0 sklearn 0.20.1 numpy 1.15.2 scipy 1.1.0 matplotlib 3.0.0
%matplotlib inline
from preamble import *
plt.rcParams['image.cmap'] = "gray"
plt.rcParams['axes.xmargin'] = 0.05
plt.rcParams['axes.ymargin'] = 0.05
mglearn.plots.plot_scaling()
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target,
random_state=1)
print(X_train.shape)
print(X_test.shape)
(426, 30) (143, 30)
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
MinMaxScaler(copy=True, feature_range=(0, 1))
# 데이터 변환
X_train_scaled = scaler.transform(X_train)
# 스케일이 조정된 후 데이터셋의 속성을 출력합니다
print("변환된 후 크기: {}".format(X_train_scaled.shape))
print("스케일 조정 전 특성별 최소값:\n {}".format(X_train.min(axis=0)))
print("스케일 조정 전 특성별 최대값:\n {}".format(X_train.max(axis=0)))
print("스케일 조정 후 특성별 최소값:\n {}".format(X_train_scaled.min(axis=0)))
print("스케일 조정 후 특성별 최대값:\n {}".format(X_train_scaled.max(axis=0)))
변환된 후 크기: (426, 30) 스케일 조정 전 특성별 최소값: [ 6.981 9.71 43.79 143.5 0.053 0.019 0. 0. 0.106 0.05 0.115 0.36 0.757 6.802 0.002 0.002 0. 0. 0.01 0.001 7.93 12.02 50.41 185.2 0.071 0.027 0. 0. 0.157 0.055] 스케일 조정 전 특성별 최대값: [ 28.11 39.28 188.5 2501. 0.163 0.287 0.427 0.201 0.304 0.096 2.873 4.885 21.98 542.2 0.031 0.135 0.396 0.053 0.061 0.03 36.04 49.54 251.2 4254. 0.223 0.938 1.17 0.291 0.577 0.149] 스케일 조정 후 특성별 최소값: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.] 스케일 조정 후 특성별 최대값: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
# 테스트 데이터 변환
X_test_scaled = scaler.transform(X_test)
# 스케일이 조정된 후 테스트 데이터의 속성을 출력합니다
print("스케일 조정 후 특성별 최소값:\n{}".format(X_test_scaled.min(axis=0)))
print("스케일 조정 후 특성별 최대값:\n{}".format(X_test_scaled.max(axis=0)))
스케일 조정 후 특성별 최소값: [ 0.034 0.023 0.031 0.011 0.141 0.044 0. 0. 0.154 -0.006 -0.001 0.006 0.004 0.001 0.039 0.011 0. 0. -0.032 0.007 0.027 0.058 0.02 0.009 0.109 0.026 0. 0. -0. -0.002] 스케일 조정 후 특성별 최대값: [0.958 0.815 0.956 0.894 0.811 1.22 0.88 0.933 0.932 1.037 0.427 0.498 0.441 0.284 0.487 0.739 0.767 0.629 1.337 0.391 0.896 0.793 0.849 0.745 0.915 1.132 1.07 0.924 1.205 1.631]
matplotlib 3.0 버전에서는 scatter
함수에 색깔을 지정할 때 하나의 RGB 포맷 문자열이나 Colormap
의 리스트를 지정해야 합니다. 경고를 피하기 위해 mglearn
에서 만든 ListedColormap
객체의 colors
속성의 원소를 직접 선택하여 RGB 포맷 문자열을 지정합니다.
from sklearn.datasets import make_blobs
# 인위적인 데이터셋 생성
X, _ = make_blobs(n_samples=50, centers=5, random_state=4, cluster_std=2)
# 훈련 세트와 테스트 세트로 나눕니다
X_train, X_test = train_test_split(X, random_state=5, test_size=.1)
# 훈련 세트와 테스트 세트의 산점도를 그립니다
fig, axes = plt.subplots(1, 3, figsize=(13, 4))
axes[0].scatter(X_train[:, 0], X_train[:, 1],
c=mglearn.cm2.colors[0], label="훈련 세트", s=60)
axes[0].scatter(X_test[:, 0], X_test[:, 1], marker='^',
c=mglearn.cm2.colors[1], label="테스트 세트", s=60)
axes[0].legend(loc='upper left')
axes[0].set_title("원본 데이터")
# MinMaxScaler를 사용해 스케일을 조정합니다
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 스케일이 조정된 데이터의 산점도를 그립니다
axes[1].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1],
c=mglearn.cm2.colors[0], label="훈련 세트", s=60)
axes[1].scatter(X_test_scaled[:, 0], X_test_scaled[:, 1], marker='^',
c=mglearn.cm2.colors[1], label="테스트 세트", s=60)
axes[1].set_title("스케일 조정된 데이터")
# 테스트 세트의 스케일을 따로 조정합니다
# 테스트 세트의 최솟값은 0, 최댓값은 1이 됩니다
# 이는 예제를 위한 것으로 절대로 이렇게 사용해서는 안됩니다
test_scaler = MinMaxScaler()
test_scaler.fit(X_test)
X_test_scaled_badly = test_scaler.transform(X_test)
# 잘못 조정된 데이터의 산점도를 그립니다
axes[2].scatter(X_train_scaled[:, 0], X_train_scaled[:, 1],
c=mglearn.cm2.colors[0], label="training set", s=60)
axes[2].scatter(X_test_scaled_badly[:, 0], X_test_scaled_badly[:, 1],
marker='^', c=mglearn.cm2.colors[1], label="test set", s=60)
axes[2].set_title("잘못 조정된 데이터")
for ax in axes:
ax.set_xlabel("특성 0")
ax.set_ylabel("특성 1")
fig.tight_layout()
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# 메소드 체이닝(chaining)을 사용하여 fit과 transform을 연달아 호출합니다
X_scaled = scaler.fit(X_train).transform(X_train)
# 위와 동일하지만 더 효율적입니다
X_scaled_d = scaler.fit_transform(X_train)
사이킷런 0.20 버전에서 SVC
클래스의 gamma
매개변수 옵션에 auto
외에 scale
이 추가되었습니다. auto
는 1/n_features
, 즉 특성 개수의 역수입니다. scale
은 1/(n_features * X.std())
로 스케일 조정이 되지 않은 특성에서 더 좋은 결과를 만듭니다. 사이킷런 0.22 버전부터는 gamma
매개변수의 기본값이 auto
에서 scale
로 변경됩니다. 서포트 벡터 머신을 사용하기 전에 특성을 표준화 전처리하면 scale
과 auto
는 차이가 없습니다. 경고를 피하기 위해 명시적으로 auto
옵션을 지정합니다.
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target,
random_state=0)
svm = SVC(gamma='auto', C=100)
svm.fit(X_train, y_train)
print("테스트 세트 정확도: {:.2f}".format(svm.score(X_test, y_test)))
테스트 세트 정확도: 0.63
# 0~1 사이로 스케일 조정
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 조정된 데이터로 SVM 학습
svm.fit(X_train_scaled, y_train)
# 스케일 조정된 테스트 세트의 정확도
print("스케일 조정된 테스트 세트의 정확도: {:.2f}".format(svm.score(X_test_scaled, y_test)))
스케일 조정된 테스트 세트의 정확도: 0.97
# 평균 0, 분산 1을 갖도록 스케일 조정
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
# 조정된 데이터로 SVM 학습
svm.fit(X_train_scaled, y_train)
# 스케일 조정된 테스트 세트의 정확도
print("SVM test accuracy: {:.2f}".format(svm.score(X_test_scaled, y_test)))
SVM test accuracy: 0.96
mglearn.plots.plot_pca_illustration()
fig, axes = plt.subplots(15, 2, figsize=(10, 20))
malignant = cancer.data[cancer.target == 0]
benign = cancer.data[cancer.target == 1]
ax = axes.ravel()
for i in range(30):
_, bins = np.histogram(cancer.data[:, i], bins=50)
ax[i].hist(malignant[:, i], bins=bins, color=mglearn.cm3(0), alpha=.5)
ax[i].hist(benign[:, i], bins=bins, color=mglearn.cm3(2), alpha=.5)
ax[i].set_title(cancer.feature_names[i])
ax[i].set_yticks(())
ax[0].set_xlabel("특성 크기")
ax[0].set_ylabel("빈도")
ax[0].legend(["악성", "양성"], loc="best")
fig.tight_layout()
from sklearn.datasets import load_breast_cancer
cancer = load_breast_cancer()
scaler = StandardScaler()
scaler.fit(cancer.data)
X_scaled = scaler.transform(cancer.data)
from sklearn.decomposition import PCA
# 데이터의 처음 두 개 주성분만 유지시킵니다
pca = PCA(n_components=2)
# 유방암 데이터로 PCA 모델을 만듭니다
pca.fit(X_scaled)
# 처음 두 개의 주성분을 사용해 데이터를 변환합니다
X_pca = pca.transform(X_scaled)
print("원본 데이터 형태: {}".format(str(X_scaled.shape)))
print("축소된 데이터 형태: {}".format(str(X_pca.shape)))
원본 데이터 형태: (569, 30) 축소된 데이터 형태: (569, 2)
# 클래스를 색깔로 구분하여 처음 두 개의 주성분을 그래프로 나타냅니다.
plt.figure(figsize=(8, 8))
mglearn.discrete_scatter(X_pca[:, 0], X_pca[:, 1], cancer.target)
plt.legend(["악성", "양성"], loc="best")
plt.gca().set_aspect("equal")
plt.xlabel("첫 번째 주성분")
plt.ylabel("두 번째 주성분")
Text(0, 0.5, '두 번째 주성분')
print("PCA 주성분 형태: {}".format(pca.components_.shape))
PCA 주성분 형태: (2, 30)
print("PCA 주성분: {}".format(pca.components_))
PCA 주성분: [[ 0.219 0.104 0.228 0.221 0.143 0.239 0.258 0.261 0.138 0.064 0.206 0.017 0.211 0.203 0.015 0.17 0.154 0.183 0.042 0.103 0.228 0.104 0.237 0.225 0.128 0.21 0.229 0.251 0.123 0.132] [-0.234 -0.06 -0.215 -0.231 0.186 0.152 0.06 -0.035 0.19 0.367 -0.106 0.09 -0.089 -0.152 0.204 0.233 0.197 0.13 0.184 0.28 -0.22 -0.045 -0.2 -0.219 0.172 0.144 0.098 -0.008 0.142 0.275]]
plt.matshow(pca.components_, cmap='viridis')
plt.yticks([0, 1], ["첫 번째 주성분", "두 번째 주성분"])
plt.colorbar()
plt.xticks(range(len(cancer.feature_names)),
cancer.feature_names, rotation=60, ha='left')
plt.xlabel("특성")
plt.ylabel("주성분")
Text(0, 0.5, '주성분')
from sklearn.datasets import fetch_lfw_people
people = fetch_lfw_people(min_faces_per_person=20, resize=0.7)
image_shape = people.images[0].shape
fig, axes = plt.subplots(2, 5, figsize=(15, 8),
subplot_kw={'xticks': (), 'yticks': ()})
for target, image, ax in zip(people.target, people.images, axes.ravel()):
ax.imshow(image)
ax.set_title(people.target_names[target])
people.target[0:10], people.target_names[people.target[0:10]]
(array([61, 25, 9, 5, 1, 10, 48, 17, 13, 54]), array(['Winona Ryder', 'Jean Chretien', 'Carlos Menem', 'Ariel Sharon', 'Alvaro Uribe', 'Colin Powell', 'Recep Tayyip Erdogan', 'Gray Davis', 'George Robertson', 'Silvio Berlusconi'], dtype='<U25'))
print("people.images.shape: {}".format(people.images.shape))
print("클래스 개수: {}".format(len(people.target_names)))
people.images.shape: (3023, 87, 65) 클래스 개수: 62
# 각 타깃이 나타난 횟수 계산
counts = np.bincount(people.target)
# 타깃별 이름과 횟수 출력
for i, (count, name) in enumerate(zip(counts, people.target_names)):
print("{0:25} {1:3}".format(name, count), end=' ')
if (i + 1) % 3 == 0:
print()
Alejandro Toledo 39 Alvaro Uribe 35 Amelie Mauresmo 21 Andre Agassi 36 Angelina Jolie 20 Ariel Sharon 77 Arnold Schwarzenegger 42 Atal Bihari Vajpayee 24 Bill Clinton 29 Carlos Menem 21 Colin Powell 236 David Beckham 31 Donald Rumsfeld 121 George Robertson 22 George W Bush 530 Gerhard Schroeder 109 Gloria Macapagal Arroyo 44 Gray Davis 26 Guillermo Coria 30 Hamid Karzai 22 Hans Blix 39 Hugo Chavez 71 Igor Ivanov 20 Jack Straw 28 Jacques Chirac 52 Jean Chretien 55 Jennifer Aniston 21 Jennifer Capriati 42 Jennifer Lopez 21 Jeremy Greenstock 24 Jiang Zemin 20 John Ashcroft 53 John Negroponte 31 Jose Maria Aznar 23 Juan Carlos Ferrero 28 Junichiro Koizumi 60 Kofi Annan 32 Laura Bush 41 Lindsay Davenport 22 Lleyton Hewitt 41 Luiz Inacio Lula da Silva 48 Mahmoud Abbas 29 Megawati Sukarnoputri 33 Michael Bloomberg 20 Naomi Watts 22 Nestor Kirchner 37 Paul Bremer 20 Pete Sampras 22 Recep Tayyip Erdogan 30 Ricardo Lagos 27 Roh Moo-hyun 32 Rudolph Giuliani 26 Saddam Hussein 23 Serena Williams 52 Silvio Berlusconi 33 Tiger Woods 23 Tom Daschle 25 Tom Ridge 33 Tony Blair 144 Vicente Fox 32 Vladimir Putin 49 Winona Ryder 24
mask = np.zeros(people.target.shape, dtype=np.bool)
for target in np.unique(people.target):
mask[np.where(people.target == target)[0][:50]] = 1
X_people = people.data[mask]
y_people = people.target[mask]
# 0~255 사이의 흑백 이미지의 픽셀 값을 0~1 사이로 스케일 조정합니다.
# (옮긴이) MinMaxScaler를 적용하는 것과 거의 동일합니다.
X_people = X_people / 255.
from sklearn.neighbors import KNeighborsClassifier
# 데이터를 훈련 세트와 테스트 세트로 나눕니다
X_train, X_test, y_train, y_test = train_test_split(
X_people, y_people, stratify=y_people, random_state=0)
# 이웃 개수를 한 개로 하여 KNeighborsClassifier 모델을 만듭니다
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
print("1-최근접 이웃의 테스트 세트 점수: {:.2f}".format(knn.score(X_test, y_test)))
1-최근접 이웃의 테스트 세트 점수: 0.23
mglearn.plots.plot_pca_whitening()
pca = PCA(n_components=100, whiten=True, random_state=0).fit(X_train)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print("X_train_pca.shape: {}".format(X_train_pca.shape))
X_train_pca.shape: (1547, 100)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train_pca, y_train)
print("테스트 세트 정확도: {:.2f}".format(knn.score(X_test_pca, y_test)))
테스트 세트 정확도: 0.31
print("pca.components_.shape: {}".format(pca.components_.shape))
pca.components_.shape: (100, 5655)
fig, axes = plt.subplots(3, 5, figsize=(15, 12),
subplot_kw={'xticks': (), 'yticks': ()})
for i, (component, ax) in enumerate(zip(pca.components_, axes.ravel())):
ax.imshow(component.reshape(image_shape), cmap='viridis')
ax.set_title("주성분 {}".format((i + 1)))
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
image_shape = people.images[0].shape
plt.figure(figsize=(20, 3))
ax = plt.gca()
imagebox = OffsetImage(people.images[0], zoom=2, cmap="gray")
ab = AnnotationBbox(imagebox, (.05, 0.4), pad=0.0, xycoords='data')
ax.add_artist(ab)
for i in range(4):
imagebox = OffsetImage(pca.components_[i].reshape(image_shape), zoom=2,
cmap="viridis")
ab = AnnotationBbox(imagebox, (.285 + .2 * i, 0.4),
pad=0.0, xycoords='data')
ax.add_artist(ab)
if i == 0:
plt.text(.155, .3, 'x_{} *'.format(i), fontdict={'fontsize': 30})
else:
plt.text(.145 + .2 * i, .3, '+ x_{} *'.format(i),
fontdict={'fontsize': 30})
plt.text(.95, .3, '+ ...', fontdict={'fontsize': 30})
plt.rc('text')
plt.text(.12, .3, '=', fontdict={'fontsize': 30})
plt.axis("off")
plt.show()
plt.close()
plt.rc('text')
mglearn.plots.plot_pca_faces(X_train, X_test, image_shape)
mglearn.discrete_scatter(X_train_pca[:, 0], X_train_pca[:, 1], y_train)
plt.xlabel("첫 번째 주성분")
plt.ylabel("두 번째 주성분")
Text(0, 0.5, '두 번째 주성분')
mglearn.plots.plot_nmf_illustration()
mglearn.plots.plot_nmf_faces(X_train, X_test, image_shape)
from sklearn.decomposition import NMF
nmf = NMF(n_components=15, random_state=0)
nmf.fit(X_train)
X_train_nmf = nmf.transform(X_train)
X_test_nmf = nmf.transform(X_test)
fig, axes = plt.subplots(3, 5, figsize=(15, 12),
subplot_kw={'xticks': (), 'yticks': ()})
for i, (component, ax) in enumerate(zip(nmf.components_, axes.ravel())):
ax.imshow(component.reshape(image_shape))
ax.set_title("성분 {}".format(i))
compn = 3
# 4번째 성분으로 정렬하여 처음 10개 이미지를 출력합니다
inds = np.argsort(X_train_nmf[:, compn])[::-1]
fig, axes = plt.subplots(2, 5, figsize=(15, 8),
subplot_kw={'xticks': (), 'yticks': ()})
for i, (ind, ax) in enumerate(zip(inds, axes.ravel())):
ax.imshow(X_train[ind].reshape(image_shape))
compn = 7
# 8번째 성분으로 정렬하여 처음 10개 이미지를 출력합니다
inds = np.argsort(X_train_nmf[:, compn])[::-1]
fig, axes = plt.subplots(2, 5, figsize=(15, 8),
subplot_kw={'xticks': (), 'yticks': ()})
for i, (ind, ax) in enumerate(zip(inds, axes.ravel())):
ax.imshow(X_train[ind].reshape(image_shape))
S = mglearn.datasets.make_signals()
plt.figure(figsize=(6, 1))
plt.plot(S, '-')
plt.xlabel("시간")
plt.ylabel("신호")
plt.margins(0)
# 원본 데이터를 사용해 100개의 측정 데이터를 만듭니다
A = np.random.RandomState(0).uniform(size=(100, 3))
X = np.dot(S, A.T)
print("측정 데이터 형태: {}".format(X.shape))
측정 데이터 형태: (2000, 100)
nmf = NMF(n_components=3, random_state=42)
S_ = nmf.fit_transform(X)
print("복원한 신호 데이터 형태: {}".format(S_.shape))
복원한 신호 데이터 형태: (2000, 3)
pca = PCA(n_components=3)
H = pca.fit_transform(X)
models = [X, S, S_, H]
names = ['측정 신호 (처음 3개)',
'원본 신호',
'NMF로 복원한 신호',
'PCA로 복원한 신호']
fig, axes = plt.subplots(4, figsize=(8, 4), gridspec_kw={'hspace': .5},
subplot_kw={'xticks': (), 'yticks': ()})
for model, name, ax in zip(models, names, axes):
ax.set_title(name)
ax.plot(model[:, :3], '-')
ax.margins(0)
from sklearn.datasets import load_digits
digits = load_digits()
fig, axes = plt.subplots(2, 5, figsize=(10, 5),
subplot_kw={'xticks':(), 'yticks': ()})
for ax, img in zip(axes.ravel(), digits.images):
ax.imshow(img)
# PCA 모델을 생성합니다
pca = PCA(n_components=2)
pca.fit(digits.data)
# 처음 두 개의 주성분으로 숫자 데이터를 변환합니다
digits_pca = pca.transform(digits.data)
colors = ["#476A2A", "#7851B8", "#BD3430", "#4A2D4E", "#875525",
"#A83683", "#4E655E", "#853541", "#3A3120","#535D8E"]
plt.figure(figsize=(10, 10))
plt.xlim(digits_pca[:, 0].min(), digits_pca[:, 0].max())
plt.ylim(digits_pca[:, 1].min(), digits_pca[:, 1].max())
for i in range(len(digits.data)):
# 숫자 텍스트를 이용해 산점도를 그립니다
plt.text(digits_pca[i, 0], digits_pca[i, 1], str(digits.target[i]),
color = colors[digits.target[i]],
fontdict={'weight': 'bold', 'size': 9})
plt.xlabel("첫 번째 주성분")
plt.ylabel("두 번째 주성분")
Text(0, 0.5, '두 번째 주성분')
from sklearn.manifold import TSNE
tsne = TSNE(random_state=42)
# TSNE에는 transform 메소드가 없으므로 대신 fit_transform을 사용합니다
digits_tsne = tsne.fit_transform(digits.data)
plt.figure(figsize=(10, 10))
plt.xlim(digits_tsne[:, 0].min(), digits_tsne[:, 0].max() + 1)
plt.ylim(digits_tsne[:, 1].min(), digits_tsne[:, 1].max() + 1)
for i in range(len(digits.data)):
# 숫자 텍스트를 이용해 산점도를 그립니다
plt.text(digits_tsne[i, 0], digits_tsne[i, 1], str(digits.target[i]),
color = colors[digits.target[i]],
fontdict={'weight': 'bold', 'size': 9})
plt.xlabel("t-SNE 특성 0")
plt.xlabel("t-SNE 특성 1")
Text(0.5, 0, 't-SNE 특성 1')
mglearn.plots.plot_kmeans_algorithm()
mglearn.plots.plot_kmeans_boundaries()
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
# 인위적으로 2차원 데이터를 생성합니다
X, y = make_blobs(random_state=1)
# 군집 모델을 만듭니다
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300, n_clusters=3, n_init=10, n_jobs=None, precompute_distances='auto', random_state=None, tol=0.0001, verbose=0)
print(kmeans.labels_)
[0 2 2 2 1 1 1 2 0 0 2 2 1 0 1 1 1 0 2 2 1 2 1 0 2 1 1 0 0 1 0 0 1 0 2 1 2 2 2 1 1 2 0 2 2 1 0 0 0 0 2 1 1 1 0 1 2 2 0 0 2 1 1 2 2 1 0 1 0 2 2 2 1 0 0 2 1 1 0 2 0 2 2 1 0 0 0 0 2 0 1 0 0 2 2 1 1 0 1 0]
print(kmeans.predict(X))
[0 2 2 2 1 1 1 2 0 0 2 2 1 0 1 1 1 0 2 2 1 2 1 0 2 1 1 0 0 1 0 0 1 0 2 1 2 2 2 1 1 2 0 2 2 1 0 0 0 0 2 1 1 1 0 1 2 2 0 0 2 1 1 2 2 1 0 1 0 2 2 2 1 0 0 2 1 1 0 2 0 2 2 1 0 0 0 0 2 0 1 0 0 2 2 1 1 0 1 0]
mglearn.discrete_scatter(X[:, 0], X[:, 1], kmeans.labels_, markers='o')
mglearn.discrete_scatter(
kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], [0, 1, 2],
markers='^', markeredgewidth=2)
[<matplotlib.lines.Line2D at 0x7f69732a6c50>, <matplotlib.lines.Line2D at 0x7f69732a6f98>, <matplotlib.lines.Line2D at 0x7f69732ac320>]
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
# 두 개의 클러스터 중심을 사용합니다
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
assignments = kmeans.labels_
mglearn.discrete_scatter(X[:, 0], X[:, 1], assignments, ax=axes[0])
# 다섯 개의 클러스터 중심을 사용합니다
kmeans = KMeans(n_clusters=5)
kmeans.fit(X)
assignments = kmeans.labels_
mglearn.discrete_scatter(X[:, 0], X[:, 1], assignments, ax=axes[1])
[<matplotlib.lines.Line2D at 0x7f6973ae2dd8>, <matplotlib.lines.Line2D at 0x7f6973ad3160>, <matplotlib.lines.Line2D at 0x7f6973ad34a8>, <matplotlib.lines.Line2D at 0x7f6973ad37f0>, <matplotlib.lines.Line2D at 0x7f6973ad3b38>]
X_varied, y_varied = make_blobs(n_samples=200,
cluster_std=[1.0, 2.5, 0.5],
random_state=170)
y_pred = KMeans(n_clusters=3, random_state=0).fit_predict(X_varied)
mglearn.discrete_scatter(X_varied[:, 0], X_varied[:, 1], y_pred)
plt.legend(["클러스터 0", "클러스터 1", "클러스터 2"], loc='best')
plt.xlabel("특성 0")
plt.ylabel("특성 1")
Text(0, 0.5, '특성 1')
# 무작위로 클러스터 데이터 생성합니다
X, y = make_blobs(random_state=170, n_samples=600)
rng = np.random.RandomState(74)
# 데이터가 길게 늘어지도록 변경합니다
transformation = rng.normal(size=(2, 2))
X = np.dot(X, transformation)
# 세 개의 클러스터로 데이터에 KMeans 알고리즘을 적용합니다
kmeans = KMeans(n_clusters=3)
kmeans.fit(X)
y_pred = kmeans.predict(X)
# 클러스터 할당과 클러스터 중심을 나타냅니다
mglearn.discrete_scatter(X[:, 0], X[:, 1], kmeans.labels_, markers='o')
mglearn.discrete_scatter(
kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], [0, 1, 2],
markers='^', markeredgewidth=2)
plt.xlabel("특성 0")
plt.ylabel("특성 1")
Text(0, 0.5, '특성 1')
# two_moons 데이터를 생성합니다(이번에는 노이즈를 조금만 넣습니다)
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
# 두 개의 클러스터로 데이터에 KMeans 알고리즘을 적용합니다
kmeans = KMeans(n_clusters=2)
kmeans.fit(X)
y_pred = kmeans.predict(X)
# 클러스터 할당과 클러스터 중심을 표시합니다
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap=mglearn.cm2, s=60, edgecolors='k')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
marker='^', c=[mglearn.cm2(0), mglearn.cm2(1)], s=100, linewidth=2, edgecolors='k')
plt.xlabel("특성 0")
plt.ylabel("특성 1")
Text(0, 0.5, '특성 1')
X_train, X_test, y_train, y_test = train_test_split(
X_people, y_people, stratify=y_people, random_state=42)
nmf = NMF(n_components=100, random_state=0)
nmf.fit(X_train)
pca = PCA(n_components=100, random_state=0)
pca.fit(X_train)
kmeans = KMeans(n_clusters=100, random_state=0)
kmeans.fit(X_train)
X_reconstructed_pca = pca.inverse_transform(pca.transform(X_test))
X_reconstructed_kmeans = kmeans.cluster_centers_[kmeans.predict(X_test)]
X_reconstructed_nmf = np.dot(nmf.transform(X_test), nmf.components_)
/home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances) /home/haesun/anaconda3/envs/introduction_to_ml_with_python/lib/python3.5/site-packages/sklearn/metrics/pairwise.py:258: RuntimeWarning: invalid value encountered in sqrt return distances if squared else np.sqrt(distances, out=distances)
fig, axes = plt.subplots(3, 5, figsize=(8, 8), subplot_kw={'xticks': (), 'yticks': ()})
fig.suptitle("추출한 성분")
for ax, comp_kmeans, comp_pca, comp_nmf in zip(
axes.T, kmeans.cluster_centers_, pca.components_, nmf.components_):
ax[0].imshow(comp_kmeans.reshape(image_shape))
ax[1].imshow(comp_pca.reshape(image_shape), cmap='viridis')
ax[2].imshow(comp_nmf.reshape(image_shape))
axes[0, 0].set_ylabel("kmeans")
axes[1, 0].set_ylabel("pca")
axes[2, 0].set_ylabel("nmf")
fig, axes = plt.subplots(4, 5, subplot_kw={'xticks': (), 'yticks': ()},
figsize=(8, 8))
fig.suptitle("재구성")
for ax, orig, rec_kmeans, rec_pca, rec_nmf in zip(
axes.T, X_test, X_reconstructed_kmeans, X_reconstructed_pca,
X_reconstructed_nmf):
ax[0].imshow(orig.reshape(image_shape))
ax[1].imshow(rec_kmeans.reshape(image_shape))
ax[2].imshow(rec_pca.reshape(image_shape))
ax[3].imshow(rec_nmf.reshape(image_shape))
axes[0, 0].set_ylabel("원본")
axes[1, 0].set_ylabel("kmeans")
axes[2, 0].set_ylabel("pca")
axes[3, 0].set_ylabel("nmf")
Text(0, 0.5, 'nmf')
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
kmeans = KMeans(n_clusters=10, random_state=0)
kmeans.fit(X)
y_pred = kmeans.predict(X)
plt.scatter(X[:, 0], X[:, 1], c=y_pred, s=60, cmap='Paired', edgecolors='black')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=60,
marker='^', c=range(kmeans.n_clusters), linewidth=2, cmap='Paired', edgecolors='black')
plt.xlabel("특성 0")
plt.ylabel("특성 1")
print("클러스터 레이블:\n{}".format(y_pred))
클러스터 레이블: [9 2 5 4 2 7 9 6 9 6 1 0 2 6 1 9 3 0 3 1 7 6 8 6 8 5 2 7 5 8 9 8 6 5 3 7 0 9 4 5 0 1 3 5 2 8 9 1 5 6 1 0 7 4 6 3 3 6 3 8 0 4 2 9 6 4 8 2 8 4 0 4 0 5 6 4 5 9 3 0 7 8 0 7 5 8 9 8 0 7 3 9 7 1 7 2 2 0 4 5 6 7 8 9 4 5 4 1 2 3 1 8 8 4 9 2 3 7 0 9 9 1 5 8 5 1 9 5 6 7 9 1 4 0 6 2 6 4 7 9 5 5 3 8 1 9 5 6 3 5 0 2 9 3 0 8 6 0 3 3 5 6 3 2 0 2 3 0 2 6 3 4 4 1 5 6 7 1 1 3 2 4 7 2 7 3 8 6 4 1 4 3 9 9 5 1 7 5 8 2]
distance_features = kmeans.transform(X)
print("클러스터 거리 데이터의 형태: {}".format(distance_features.shape))
print("클러스터 거리:\n{}".format(distance_features))
클러스터 거리 데이터의 형태: (200, 10) 클러스터 거리: [[0.922 1.466 1.14 ... 1.166 1.039 0.233] [1.142 2.517 0.12 ... 0.707 2.204 0.983] [0.788 0.774 1.749 ... 1.971 0.716 0.944] ... [0.446 1.106 1.49 ... 1.791 1.032 0.812] [1.39 0.798 1.981 ... 1.978 0.239 1.058] [1.149 2.454 0.045 ... 0.572 2.113 0.882]]
mglearn.plots.plot_agglomerative_algorithm()
from sklearn.cluster import AgglomerativeClustering
X, y = make_blobs(random_state=1)
agg = AgglomerativeClustering(n_clusters=3)
assignment = agg.fit_predict(X)
mglearn.discrete_scatter(X[:, 0], X[:, 1], assignment)
plt.legend(["클러스터 0", "클러스터 1", "클러스터 2"], loc="best")
plt.xlabel("특성 0")
plt.ylabel("특성 1")
Text(0, 0.5, '특성 1')
mglearn.plots.plot_agglomerative()
# SciPy에서 ward 군집 함수와 덴드로그램 함수를 임포트합니다
from scipy.cluster.hierarchy import dendrogram, ward
X, y = make_blobs(random_state=0, n_samples=12)
# 데이터 배열 X 에 ward 함수를 적용합니다
# SciPy의 ward 함수는 병합 군집을 수행할 때 생성된
# 거리 정보가 담긴 배열을 리턴합니다
linkage_array = ward(X)
# 클러스터 간의 거리 정보가 담긴 linkage_array를 사용해 덴드로그램을 그립니다
dendrogram(linkage_array)
# 두 개와 세 개의 클러스터를 구분하는 커트라인을 표시합니다
ax = plt.gca()
bounds = ax.get_xbound()
ax.plot(bounds, [7.25, 7.25], '--', c='k')
ax.plot(bounds, [4, 4], '--', c='k')
ax.text(bounds[1], 7.25, ' 두 개 클러스터', va='center', fontdict={'size': 15})
ax.text(bounds[1], 4, ' 세 개 클러스터', va='center', fontdict={'size': 15})
plt.xlabel("샘플 번호")
plt.ylabel("클러스터 거리")
Text(0, 0.5, '클러스터 거리')
from sklearn.cluster import DBSCAN
X, y = make_blobs(random_state=0, n_samples=12)
dbscan = DBSCAN()
clusters = dbscan.fit_predict(X)
print("클러스터 레이블:\n{}".format(clusters))
클러스터 레이블: [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1]
mglearn.plots.plot_dbscan()
min_samples: 2 eps: 1.000000 클러스터: [-1 0 0 -1 0 -1 1 1 0 1 -1 -1] min_samples: 2 eps: 1.500000 클러스터: [0 1 1 1 1 0 2 2 1 2 2 0] min_samples: 2 eps: 2.000000 클러스터: [0 1 1 1 1 0 0 0 1 0 0 0] min_samples: 2 eps: 3.000000 클러스터: [0 0 0 0 0 0 0 0 0 0 0 0] min_samples: 3 eps: 1.000000 클러스터: [-1 0 0 -1 0 -1 1 1 0 1 -1 -1] min_samples: 3 eps: 1.500000 클러스터: [0 1 1 1 1 0 2 2 1 2 2 0] min_samples: 3 eps: 2.000000 클러스터: [0 1 1 1 1 0 0 0 1 0 0 0] min_samples: 3 eps: 3.000000 클러스터: [0 0 0 0 0 0 0 0 0 0 0 0] min_samples: 5 eps: 1.000000 클러스터: [-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1] min_samples: 5 eps: 1.500000 클러스터: [-1 0 0 0 0 -1 -1 -1 0 -1 -1 -1] min_samples: 5 eps: 2.000000 클러스터: [-1 0 0 0 0 -1 -1 -1 0 -1 -1 -1] min_samples: 5 eps: 3.000000 클러스터: [0 0 0 0 0 0 0 0 0 0 0 0]
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
# 평균이 0, 분산이 1이 되도록 데이터의 스케일을 조정합니다
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
dbscan = DBSCAN()
clusters = dbscan.fit_predict(X_scaled)
# 클러스터 할당을 표시합니다
plt.scatter(X_scaled[:, 0], X_scaled[:, 1], c=clusters, cmap=mglearn.cm2, s=60, edgecolors='black')
plt.xlabel("특성 0")
plt.ylabel("특성 1")
Text(0, 0.5, '특성 1')
from sklearn.metrics.cluster import adjusted_rand_score
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
# 평균이 0, 분산이 1이 되도록 데이터의 스케일을 조정합니다
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
fig, axes = plt.subplots(1, 4, figsize=(15, 3),
subplot_kw={'xticks': (), 'yticks': ()})
# 사용할 알고리즘 모델을 리스트로 만듭니다
algorithms = [KMeans(n_clusters=2), AgglomerativeClustering(n_clusters=2),
DBSCAN()]
# 비교를 위해 무작위로 클러스터 할당을 합니다
random_state = np.random.RandomState(seed=0)
random_clusters = random_state.randint(low=0, high=2, size=len(X))
# 무작위 할당한 클러스터를 그립니다
axes[0].scatter(X_scaled[:, 0], X_scaled[:, 1], c=random_clusters,
cmap=mglearn.cm3, s=60, edgecolors='black')
axes[0].set_title("무작위 할당 - ARI: {:.2f}".format(
adjusted_rand_score(y, random_clusters)))
for ax, algorithm in zip(axes[1:], algorithms):
# 클러스터 할당과 클러스터 중심을 그립니다
clusters = algorithm.fit_predict(X_scaled)
ax.scatter(X_scaled[:, 0], X_scaled[:, 1], c=clusters,
cmap=mglearn.cm3, s=60, edgecolors='black')
ax.set_title("{} - ARI: {:.2f}".format(algorithm.__class__.__name__,
adjusted_rand_score(y, clusters)))
from sklearn.metrics import accuracy_score
# 포인트가 클러스터로 나뉜 두 가지 경우
clusters1 = [0, 0, 1, 1, 0]
clusters2 = [1, 1, 0, 0, 1]
# 모든 레이블이 달라졌으므로 정확도는 0입니다
print("정확도: {:.2f}".format(accuracy_score(clusters1, clusters2)))
# 같은 포인트가 클러스터에 모였으므로 ARI는 1입니다
print("ARI: {:.2f}".format(adjusted_rand_score(clusters1, clusters2)))
정확도: 0.00 ARI: 1.00
from sklearn.metrics.cluster import silhouette_score
X, y = make_moons(n_samples=200, noise=0.05, random_state=0)
# 평균이 0, 분산이 1이 되도록 데이터의 스케일을 조정합니다
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)
fig, axes = plt.subplots(1, 4, figsize=(15, 3),
subplot_kw={'xticks': (), 'yticks': ()})
# 비교를 위해 무작위로 클러스터 할당을 합니다
random_state = np.random.RandomState(seed=0)
random_clusters = random_state.randint(low=0, high=2, size=len(X))
# 무작위 할당한 클러스터를 그립니다
axes[0].scatter(X_scaled[:, 0], X_scaled[:, 1], c=random_clusters,
cmap=mglearn.cm3, s=60, edgecolors='black')
axes[0].set_title("무작위 할당: {:.2f}".format(
silhouette_score(X_scaled, random_clusters)))
algorithms = [KMeans(n_clusters=2), AgglomerativeClustering(n_clusters=2),
DBSCAN()]
for ax, algorithm in zip(axes[1:], algorithms):
clusters = algorithm.fit_predict(X_scaled)
# 클러스터 할당과 클러스터 중심을 그립니다
ax.scatter(X_scaled[:, 0], X_scaled[:, 1], c=clusters, cmap=mglearn.cm3,
s=60, edgecolors='black')
ax.set_title("{} : {:.2f}".format(algorithm.__class__.__name__,
silhouette_score(X_scaled, clusters)))
# LFW 데이터에서 고유얼굴을 찾은 다음 데이터를 변환합니다
from sklearn.decomposition import PCA
pca = PCA(n_components=100, whiten=True, random_state=0)
pca.fit_transform(X_people)
X_pca = pca.transform(X_people)
# 기본 매개변수로 DBSCAN을 적용합니다
dbscan = DBSCAN()
labels = dbscan.fit_predict(X_pca)
print("고유한 레이블: {}".format(np.unique(labels)))
고유한 레이블: [-1]
dbscan = DBSCAN(min_samples=3)
labels = dbscan.fit_predict(X_pca)
print("고유한 레이블: {}".format(np.unique(labels)))
고유한 레이블: [-1]
dbscan = DBSCAN(min_samples=3, eps=15)
labels = dbscan.fit_predict(X_pca)
print("고유한 레이블: {}".format(np.unique(labels)))
고유한 레이블: [-1 0]
# 잡음 포인트와 클러스터에 속한 포인트 수를 셉니다.
# bincount는 음수를 받을 수 없어서 labels에 1을 더했습니다.
# 반환값의 첫 번째 원소는 잡음 포인트의 수입니다.
print("클러스터별 포인트 수: {}".format(np.bincount(labels + 1)))
클러스터별 포인트 수: [ 32 2031]
noise = X_people[labels==-1]
fig, axes = plt.subplots(3, 9, subplot_kw={'xticks': (), 'yticks': ()},
figsize=(12, 4))
for image, ax in zip(noise, axes.ravel()):
ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
for eps in [1, 3, 5, 7, 9, 11, 13]:
print("\neps={}".format(eps))
dbscan = DBSCAN(eps=eps, min_samples=3)
labels = dbscan.fit_predict(X_pca)
print("클러스터 수: {}".format(len(np.unique(labels))))
print("클러스터 크기: {}".format(np.bincount(labels + 1)))
eps=1 클러스터 수: 1 클러스터 크기: [2063] eps=3 클러스터 수: 1 클러스터 크기: [2063] eps=5 클러스터 수: 1 클러스터 크기: [2063] eps=7 클러스터 수: 14 클러스터 크기: [2004 3 14 7 4 3 3 4 4 3 3 5 3 3] eps=9 클러스터 수: 4 클러스터 크기: [1307 750 3 3] eps=11 클러스터 수: 2 클러스터 크기: [ 413 1650] eps=13 클러스터 수: 2 클러스터 크기: [ 120 1943]
dbscan = DBSCAN(min_samples=3, eps=7)
labels = dbscan.fit_predict(X_pca)
for cluster in range(max(labels) + 1):
mask = labels == cluster
n_images = np.sum(mask)
fig, axes = plt.subplots(1, 14, figsize=(14*1.5, 4),
subplot_kw={'xticks': (), 'yticks': ()})
i = 0
for image, label, ax in zip(X_people[mask], y_people[mask], axes):
ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
ax.set_title(people.target_names[label].split()[-1])
i += 1
for j in range(len(axes) - i):
axes[j+i].imshow(np.array([[1]*65]*87), vmin=0, vmax=1)
axes[j+i].axis('off')
n_clusters = 10
# k-평균으로 클러스터를 추출합니다
km = KMeans(n_clusters=n_clusters, random_state=0)
labels_km = km.fit_predict(X_pca)
print("k-평균의 클러스터 크기: {}".format(np.bincount(labels_km)))
k-평균의 클러스터 크기: [155 175 238 75 358 257 91 219 323 172]
fig, axes = plt.subplots(2, 5, subplot_kw={'xticks': (), 'yticks': ()},
figsize=(12, 4))
for center, ax in zip(km.cluster_centers_, axes.ravel()):
ax.imshow(pca.inverse_transform(center).reshape(image_shape),
vmin=0, vmax=1)
mglearn.plots.plot_kmeans_faces(km, pca, X_pca, X_people,
y_people, people.target_names)
# 병합 군집으로 클러스터를 추출합니다
agglomerative = AgglomerativeClustering(n_clusters=10)
labels_agg = agglomerative.fit_predict(X_pca)
print("병합 군집의 클러스터 크기: {}".format(
np.bincount(labels_agg)))
병합 군집의 클러스터 크기: [169 660 144 329 217 85 18 261 31 149]
print("ARI: {:.2f}".format(adjusted_rand_score(labels_agg, labels_km)))
ARI: 0.09
linkage_array = ward(X_pca)
# 클러스터 사이의 거리가 담겨있는 linkage_array로 덴드로그램을 그립니다
plt.figure(figsize=(20, 5))
dendrogram(linkage_array, p=7, truncate_mode='level', no_labels=True)
plt.xlabel("샘플 번호")
plt.ylabel("클러스터 거리")
ax = plt.gca()
bounds = ax.get_xbound()
ax.plot(bounds, [36, 36], '--', c='k')
[<matplotlib.lines.Line2D at 0x7f6973ffe710>]
n_clusters = 10
for cluster in range(n_clusters):
mask = labels_agg == cluster
fig, axes = plt.subplots(1, 10, subplot_kw={'xticks': (), 'yticks': ()},
figsize=(15, 8))
axes[0].set_ylabel(np.sum(mask))
for image, label, asdf, ax in zip(X_people[mask], y_people[mask],
labels_agg[mask], axes):
ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
ax.set_title(people.target_names[label].split()[-1],
fontdict={'fontsize': 9})
# 병합 군집으로 클러스터를 추출합니다
agglomerative = AgglomerativeClustering(n_clusters=40)
labels_agg = agglomerative.fit_predict(X_pca)
print("병합 군집의 클러스터 크기: {}".format(np.bincount(labels_agg)))
n_clusters = 40
for cluster in [13, 16, 23, 38, 39]: # 흥미로운 클러스터 몇개를 골랐습니다
mask = labels_agg == cluster
fig, axes = plt.subplots(1, 15, subplot_kw={'xticks': (), 'yticks': ()},
figsize=(15, 8))
cluster_size = np.sum(mask)
axes[0].set_ylabel("#{}: {}".format(cluster, cluster_size))
for image, label, asdf, ax in zip(X_people[mask], y_people[mask],
labels_agg[mask], axes):
ax.imshow(image.reshape(image_shape), vmin=0, vmax=1)
ax.set_title(people.target_names[label].split()[-1],
fontdict={'fontsize': 9})
for i in range(cluster_size, 15):
axes[i].set_visible(False)
병합 군집의 클러스터 크기: [ 43 120 100 194 56 58 127 22 6 37 65 49 84 18 168 44 47 31 78 30 166 20 57 14 11 29 23 5 8 84 67 30 57 16 22 12 29 2 26 8]
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()