from google.colab import drive
import os
drive.mount('/content/gdrive')
# Establecer ruta de acceso en dr
import os
print(os.getcwd())
os.chdir("/content/gdrive/My Drive")
Mounted at /content/gdrive /content
El análisis factorial es un enfoque útil para encontrar variables latentes que no se miden directamente en una sola variable, sino que se infieren de otras variables en el conjunto de datos. Estas variables latentes se denominan factores. Entonces, el análisis factorial es un modelo de medición de variables latentes. Por ejemplo, si encontramos dos variables latentes en nuestro modelo, se llama modelo de dos factores. La suposición principal del FA es que existen tales variables latentes en nuestros datos.
En la actualidad, realizamos el análisis factorial utilizando el método de componentes principales, que es muy similar al análisis de componentes principales. Utilizaremos datos de registros nacionales de mujeres que representan a 55 países en siete eventos diferentes.
Las variables son:
import pandas as pd
df = pd.read_csv("women_track_records.csv")
df.head()
COUNTRY | X1 | X2 | X3 | X4 | X5 | X6 | X7 | |
---|---|---|---|---|---|---|---|---|
0 | Argentina | 11.61 | 22.94 | 54.50 | 2.15 | 4.43 | 9.79 | 178.52 |
1 | Australia | 11.20 | 22.35 | 51.80 | 1.98 | 4.13 | 9.08 | 152.37 |
2 | Austria | 11.43 | 23.09 | 50.62 | 1.99 | 4.22 | 9.34 | 159.37 |
3 | Belgium | 11.41 | 23.04 | 52.00 | 2.00 | 4.14 | 8.88 | 157.85 |
4 | Bermuda | 11.46 | 23.05 | 53.30 | 2.16 | 4.58 | 9.81 | 169.98 |
X = df.iloc[:, 1:8]
X.head()
X1 | X2 | X3 | X4 | X5 | X6 | X7 | |
---|---|---|---|---|---|---|---|
0 | 11.61 | 22.94 | 54.50 | 2.15 | 4.43 | 9.79 | 178.52 |
1 | 11.20 | 22.35 | 51.80 | 1.98 | 4.13 | 9.08 | 152.37 |
2 | 11.43 | 23.09 | 50.62 | 1.99 | 4.22 | 9.34 | 159.37 |
3 | 11.41 | 23.04 | 52.00 | 2.00 | 4.14 | 8.88 | 157.85 |
4 | 11.46 | 23.05 | 53.30 | 2.16 | 4.58 | 9.81 | 169.98 |
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled
array([[-2.24428486e-02, -5.62237236e-01, 3.00209295e-01, 6.86677436e-01, 8.06867242e-02, 3.93983736e-01, 1.74496205e-01], [-9.42599641e-01, -1.08128697e+00, -6.11096516e-01, -8.98614916e-01, -6.80073818e-01, -2.51633758e-01, -6.91870707e-01], [-4.26414124e-01, -4.30275438e-01, -1.00937091e+00, -8.05362425e-01, -4.51845656e-01, -1.52104506e-02, -4.59956046e-01], [-4.71299821e-01, -4.74262704e-01, -5.43592382e-01, -7.12109934e-01, -6.54715134e-01, -4.33497841e-01, -5.10314658e-01], [-3.59085578e-01, -4.65465251e-01, -1.04815510e-01, 7.79929927e-01, 4.61066996e-01, 4.12170144e-01, -1.08439681e-01], [-6.95728307e-01, -3.59895812e-01, -2.73575845e-01, 2.20414979e-01, 2.32838833e-01, 3.75797327e-01, -1.49190400e-01], [ 1.16702813e+00, 7.83773102e-01, 4.68969630e-01, 9.66434910e-01, 1.31404094e-01, 1.39374020e-01, 5.88629529e-01], [-1.39145661e+00, -1.16926151e+00, -1.01612132e+00, -7.12109934e-01, -8.57584612e-01, -4.97150270e-01, -7.88612251e-01], [ 8.52828247e-01, -1.81147559e+00, 4.35217563e-01, -2.45847477e-01, -4.26486971e-01, 1.20691619e-02, -6.20567490e-02], [ 7.40614004e-01, 7.30988383e-01, 4.58844010e-01, 3.39099968e-02, -1.72900123e-01, -4.24900630e-02, -1.58135680e-01], [-4.48856972e-02, 3.70292802e-01, -1.18316337e-01, 3.13667471e-01, -1.22182754e-01, 9.39079991e-02, -2.59515518e-01], [ 2.87268462e+00, 3.09750329e+00, 2.29158125e+00, 2.08546481e+00, 1.12039280e+00, 1.58519348e+00, 1.98674363e+00], [ 7.63056853e-01, 8.98139994e-01, 1.56591181e+00, 1.24619238e+00, 7.14653843e-01, 9.75948801e-01, -4.81418694e-02], [-1.18947098e+00, -1.41559019e+00, -1.89705027e+00, -1.73788734e+00, -6.54715134e-01, -4.94372709e+00, -4.77183992e-01], [-4.48856972e-01, -5.19849507e-02, 3.37164740e+00, -4.32352460e-01, -5.53280395e-01, -5.88082311e-01, -7.12411720e-01], [ 3.81528426e-01, 4.14280068e-01, 8.23366334e-01, 1.52594986e+00, 8.66805952e-01, 4.84915777e-01, 1.01468989e+00], [-1.09969958e+00, -1.04609716e+00, -1.17138083e+00, -4.32352460e-01, -7.56149873e-01, -3.97125024e-01, -6.30247669e-01], [-1.05481388e+00, -1.22204622e+00, -6.34722963e-01, -7.12109934e-01, -6.54715134e-01, -3.42565800e-01, -6.28922442e-01], [-1.63832795e+00, -1.64432398e+00, -1.83967176e+00, -1.36487737e+00, -1.11117146e+00, -5.51709495e-01, -5.15946871e-01], [-1.36901376e+00, -1.04609716e+00, -1.30301389e+00, -1.17837239e+00, -9.33660666e-01, -6.97200761e-01, -8.19092464e-01], [-1.39145661e+00, -1.27483094e+00, -1.06337421e+00, -8.98614916e-01, -9.33660666e-01, -6.69921149e-01, -7.79666972e-01], [ 3.81528426e-01, 4.40672428e-01, 4.45343183e-01, -5.93424945e-02, -1.22182754e-01, 4.66729369e-01, 2.96417056e-01], [ 4.93742669e-01, 8.45355275e-01, 8.36867160e-01, 1.89895982e+00, 1.17111017e+00, 1.07597405e+00, 1.40960743e+00], [-3.81528426e-01, -4.56667797e-01, -7.12352717e-01, -6.18857442e-01, -6.54715134e-01, -3.42565800e-01, -5.59348044e-01], [ 7.40614004e-01, 6.16621492e-01, -3.55930889e-03, 2.20414979e-01, -1.98258808e-01, 5.66754614e-01, 4.98514117e-01], [ 5.16185518e-01, 5.81431679e-01, 5.83726658e-01, 1.33944488e+00, 5.37143050e-01, 6.03127431e-01, 9.28550160e-01], [-4.26414124e-01, -6.07824039e-02, -1.25066750e-01, -2.45847477e-01, -7.30791188e-01, -4.24404637e-01, -7.90931398e-01], [-3.81528426e-01, -7.99768472e-03, 4.35217563e-01, 2.20414979e-01, -3.75769601e-01, 1.20691619e-02, -4.23181007e-01], [-7.40614004e-01, -5.09452517e-01, -5.40217175e-01, -1.08511990e+00, -1.06045409e+00, -6.60827944e-01, -7.10092573e-01], [ 2.46871335e-01, 3.70292802e-01, 4.03183783e-02, 1.27162488e-01, 2.41368572e+00, -1.42515309e-01, -7.53825052e-01], [ 2.46871335e-01, 2.64723364e-01, -3.07327912e-01, -7.12109934e-01, 1.90651203e+00, -1.42515309e-01, 2.58316790e-01], [ 7.63056853e-01, 8.01368009e-01, 7.05234099e-01, 6.86677436e-01, 5.53280395e-02, 2.39399265e-01, -2.85026130e-01], [ 1.41389946e+00, 1.93623947e+00, -8.13608918e-01, -9.91867408e-01, -3.75769601e-01, -6.11724642e-03, 1.96031138e-01], [ 9.20156793e-01, 1.21484831e+00, 8.40242367e-01, -5.93424945e-02, -4.61066996e-02, 2.57585674e-01, 4.72744483e-02], [ 1.36901376e+00, 5.55039319e-01, 4.99346490e-01, 1.05968740e+00, 7.40012528e-01, 1.00322841e+00, 2.95423136e-01], [ 3.14199880e-01, 1.32041775e+00, 1.51528371e+00, 1.80570733e+00, 9.93599375e-01, 1.40332940e+00, 2.91142051e+00], [ 6.05956912e-01, 3.59895812e-02, 5.04439984e-02, -3.39099968e-01, -3.75769601e-01, 2.12119653e-01, -4.87785805e-01], [-8.30385398e-01, -6.76604127e-01, -4.15334527e-01, -8.05362425e-01, 1.67828386e+00, -3.15286187e-01, -6.88226334e-01], [-1.57099940e-01, -3.95085625e-01, -6.78600650e-01, -5.25604951e-01, 1.98258808e+00, -5.42616291e-01, -9.20140995e-01], [-8.97713944e-02, -2.36731468e-01, -1.65569231e-01, -4.32352460e-01, -9.84378036e-01, -7.51759986e-01, -9.20140995e-01], [ 1.41389946e+00, 1.27643048e+00, 1.13051014e+00, 1.52594986e+00, 1.12039280e+00, 1.21237211e+00, 1.97945488e+00], [ 3.14199880e-01, -3.43900443e-02, 3.33961362e-01, 1.05968740e+00, 5.11784365e-01, 7.30432289e-01, 8.98401254e-01], [-1.09969958e+00, -1.20445132e+00, -1.45827340e+00, -1.17837239e+00, -1.03509541e+00, -3.51659004e-01, -4.11916581e-01], [ 4.26414124e-01, 5.63836773e-01, 2.32705160e-01, 1.27162488e-01, -6.03997764e-01, -4.69870658e-01, -7.30633586e-01], [-4.03971275e-01, -1.04769670e-01, -8.13608918e-01, -1.45812986e+00, -1.11117146e+00, -7.51759986e-01, -2.58521598e-01], [ 1.52611371e+00, 1.25003812e+00, 4.95971283e-01, 4.06919962e-01, 3.08914887e-01, 5.30381798e-01, 3.15301535e-01], [ 4.03971275e-01, 3.52697896e-01, -6.93451559e-03, -2.45847477e-01, -6.54715134e-01, -3.06192983e-01, -3.52943995e-01], [-1.03237104e+00, -6.67806674e-01, -6.14471722e-01, -5.25604951e-01, -7.05432503e-01, -4.69870658e-01, -6.21965002e-01], [-3.81528426e-01, -2.36731468e-01, -1.68944437e-01, -5.25604951e-01, -8.32225927e-01, -5.33523087e-01, -6.57083508e-01], [-8.97713944e-01, -8.43755738e-01, -3.74832046e-01, 2.20414979e-01, -4.61066996e-02, 2.48492469e-01, 1.52961272e-01], [ 2.91757032e-01, 7.74975649e-01, 7.38986166e-01, 1.15293989e+00, 8.16088582e-01, 8.39550739e-01, -1.59129600e-01], [ 8.07942550e-01, 7.57380743e-01, 9.58374602e-01, 6.86677436e-01, -7.14653843e-02, 2.11623660e-02, 9.21924027e-01], [-1.86275643e+00, -1.53875454e+00, -1.00937091e+00, -1.08511990e+00, -1.13653014e+00, -7.79039598e-01, -1.01158163e+00], [-1.25679952e+00, -1.22204622e+00, -1.49202547e+00, -1.73788734e+00, -1.33939962e+00, -8.24505619e-01, -7.29970973e-01], [ 2.51359904e+00, 1.99782164e+00, 1.72792173e+00, 2.36522228e+00, 3.58018522e+00, 3.34927508e+00, 4.39799349e+00]])
!pip install factor_analyzer
Collecting factor_analyzer Downloading factor_analyzer-0.4.0.tar.gz (41 kB) |████████████████████████████████| 41 kB 486 kB/s Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from factor_analyzer) (1.3.5) Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from factor_analyzer) (1.4.1) Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from factor_analyzer) (1.21.6) Requirement already satisfied: scikit-learn in /usr/local/lib/python3.7/dist-packages (from factor_analyzer) (1.0.2) Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->factor_analyzer) (2.8.2) Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->factor_analyzer) (2022.1) Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->factor_analyzer) (1.15.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->factor_analyzer) (3.1.0) Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn->factor_analyzer) (1.1.0) Building wheels for collected packages: factor-analyzer Building wheel for factor-analyzer (setup.py) ... done Created wheel for factor-analyzer: filename=factor_analyzer-0.4.0-py3-none-any.whl size=41455 sha256=3d1d93c5107dbc5248504614853c4452dfb77e7b27e85ae0941bc08f1d68247e Stored in directory: /root/.cache/pip/wheels/ac/00/37/1f0e8a5039f9e9f207c4405bbce0796f07701eb377bfc6cc76 Successfully built factor-analyzer Installing collected packages: factor-analyzer Successfully installed factor-analyzer-0.4.0
Realicemos FA nuevamente con nfactors = 2 (anteriormente, nfactors = 4). Esto se debe a que hemos decidido mantener solo dos factores para nuestros datos
from factor_analyzer import FactorAnalyzer
fa = FactorAnalyzer(n_factors=2, rotation="varimax", method="principal",
is_corr_matrix=False)
fa.fit(X_scaled)
/usr/local/lib/python3.7/dist-packages/sklearn/utils/extmath.py:376: FutureWarning: If 'random_state' is not supplied, the current default is to use 0 as a fixed seed. This will change to None in version 1.2 leading to non-deterministic results that better reflect nature of the randomized_svd solver. If you want to silence this warning, set 'random_state' to an integer seed or to None explicitly depending if you want your code to be deterministic or not. FutureWarning,
FactorAnalyzer(method='principal', n_factors=2, rotation='varimax', rotation_kwargs={})
print("Valores propios:")
print(fa.get_eigenvalues()[0])
print()
print("Communalities:")
print(fa.get_communalities())
print()
print("Varianzas especificas:")
print(fa.get_uniquenesses())
print()
print("Cargas de los factores:")
print(fa.loadings_)
Valores propios: [5.06759677 0.6020256 0.44429295 0.36590389 0.26931274 0.13929091 0.11157713] Communalities: [0.8632044 0.86854473 0.77623794 0.84827979 0.79619776 0.73826138 0.77889637] Varianzas especificas: [0.1367956 0.13145527 0.22376206 0.15172021 0.20380224 0.26173862 0.22110363] Cargas de los factores: [[0.8399412 0.3971186 ] [0.86109019 0.35646657] [0.81415209 0.33674071] [0.61543129 0.6852183 ] [0.22614824 0.86316553] [0.48965453 0.7060452 ] [0.46668107 0.74906952]]
Comunalidades
variaciones específicas
import matplotlib.pyplot as plt
plt.style.use("ggplot")
plt.plot(fa.get_eigenvalues()[0], marker='o')
plt.xlabel("Eigenvalue ")
plt.ylabel("Eigenvalue tamaño")
plt.title("Scree Plot")
Text(0.5, 1.0, 'Scree Plot')
Está claro que las variables X1, X2, X3 definen el factor 1 (cargas altas en el factor 1, cargas relativamente pequeñas en el factor 2) mientras que las variables X4, X5, X6 y X7 definen el factor 2 (cargas altas en el factor 2, cargas relativamente pequeñas en el factor 2). factor 1). Pero la variable X4 tiene aspectos de atributos representados por ambos factores (cargas aproximadamente iguales en ambos factores).
Para dar nombres a los dos factores, centrémonos en el dominio del conocimiento del campo.
El conjunto de datos proporcionado representa los registros nacionales de mujeres que representan a 55 países en siete eventos diferentes. Generalmente, en carreras de corta distancia (por ejemplo, 100 m, 200 m, 400 m), los atletas deben centrarse principalmente en la velocidad. En carreras de larga distancia (por ejemplo, 1500 m, 3000 m, maratón), los atletas deben centrarse principalmente en la tolerancia o la resistencia. En nuestro análisis, el factor 1 representa los antecedentes de corta distancia (ya que X1, X2 y X3) y el factor 2 representa los antecedentes de larga distancia (ya que X4, X5, X6 y X7). Por lo tanto, podemos dar nombres relevantes para los dos factores de la siguiente manera.
transformed_df = pd.DataFrame(fa.transform(X_scaled), columns=['RF1', 'RF2'])
transformed_df
RF1 | RF2 | |
---|---|---|
0 | -0.213747 | 0.492749 |
1 | -0.864122 | -0.351732 |
2 | -0.645847 | -0.195162 |
3 | -0.335540 | -0.573830 |
4 | -0.628536 | 0.822508 |
5 | -0.719778 | 0.603115 |
6 | 1.014360 | -0.016134 |
7 | -1.178504 | -0.320728 |
8 | -0.129174 | -0.167418 |
9 | 1.007324 | -0.676723 |
10 | 0.200318 | -0.146080 |
11 | 3.068299 | 0.444342 |
12 | 1.177901 | 0.243695 |
13 | -1.407983 | -1.528352 |
14 | 1.721239 | -1.727718 |
15 | 0.262662 | 1.080618 |
16 | -1.123549 | -0.150988 |
17 | -0.999261 | -0.227916 |
18 | -1.856963 | -0.146468 |
19 | -1.205515 | -0.520131 |
20 | -1.213444 | -0.427182 |
21 | 0.565193 | -0.147662 |
22 | 0.309477 | 1.578275 |
23 | -0.363157 | -0.522204 |
24 | 0.609653 | -0.034210 |
25 | 0.430094 | 0.805991 |
26 | 0.173307 | -0.864563 |
27 | 0.257576 | -0.397847 |
28 | -0.263473 | -1.020930 |
29 | -0.609446 | 1.162281 |
30 | -0.777327 | 1.148980 |
31 | 1.087221 | -0.419528 |
32 | 1.304007 | -1.043131 |
33 | 1.392007 | -0.704095 |
34 | 0.712123 | 0.605590 |
35 | 0.618732 | 1.868953 |
36 | 0.509618 | -0.612468 |
37 | -1.532721 | 1.070597 |
38 | -1.285372 | 1.013866 |
39 | 0.365952 | -1.256078 |
40 | 0.986941 | 1.328728 |
41 | -0.087738 | 1.039127 |
42 | -1.278473 | -0.287339 |
43 | 0.984739 | -1.144108 |
44 | -0.088259 | -1.106852 |
45 | 1.322910 | -0.213729 |
46 | 0.699542 | -0.931063 |
47 | -0.666742 | -0.425431 |
48 | 0.109772 | -0.929756 |
49 | -1.006417 | 0.695615 |
50 | 0.500099 | 0.559547 |
51 | 1.110912 | -0.166870 |
52 | -1.406942 | -0.571562 |
53 | -1.178561 | -0.856376 |
54 | 0.564617 | 4.267786 |
from sklearn.datasets import load_iris
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
iris = load_iris()
X = iris.data
y = iris.target
X
array([[5.1, 3.5, 1.4, 0.2], [4.9, 3. , 1.4, 0.2], [4.7, 3.2, 1.3, 0.2], [4.6, 3.1, 1.5, 0.2], [5. , 3.6, 1.4, 0.2], [5.4, 3.9, 1.7, 0.4], [4.6, 3.4, 1.4, 0.3], [5. , 3.4, 1.5, 0.2], [4.4, 2.9, 1.4, 0.2], [4.9, 3.1, 1.5, 0.1], [5.4, 3.7, 1.5, 0.2], [4.8, 3.4, 1.6, 0.2], [4.8, 3. , 1.4, 0.1], [4.3, 3. , 1.1, 0.1], [5.8, 4. , 1.2, 0.2], [5.7, 4.4, 1.5, 0.4], [5.4, 3.9, 1.3, 0.4], [5.1, 3.5, 1.4, 0.3], [5.7, 3.8, 1.7, 0.3], [5.1, 3.8, 1.5, 0.3], [5.4, 3.4, 1.7, 0.2], [5.1, 3.7, 1.5, 0.4], [4.6, 3.6, 1. , 0.2], [5.1, 3.3, 1.7, 0.5], [4.8, 3.4, 1.9, 0.2], [5. , 3. , 1.6, 0.2], [5. , 3.4, 1.6, 0.4], [5.2, 3.5, 1.5, 0.2], [5.2, 3.4, 1.4, 0.2], [4.7, 3.2, 1.6, 0.2], [4.8, 3.1, 1.6, 0.2], [5.4, 3.4, 1.5, 0.4], [5.2, 4.1, 1.5, 0.1], [5.5, 4.2, 1.4, 0.2], [4.9, 3.1, 1.5, 0.2], [5. , 3.2, 1.2, 0.2], [5.5, 3.5, 1.3, 0.2], [4.9, 3.6, 1.4, 0.1], [4.4, 3. , 1.3, 0.2], [5.1, 3.4, 1.5, 0.2], [5. , 3.5, 1.3, 0.3], [4.5, 2.3, 1.3, 0.3], [4.4, 3.2, 1.3, 0.2], [5. , 3.5, 1.6, 0.6], [5.1, 3.8, 1.9, 0.4], [4.8, 3. , 1.4, 0.3], [5.1, 3.8, 1.6, 0.2], [4.6, 3.2, 1.4, 0.2], [5.3, 3.7, 1.5, 0.2], [5. , 3.3, 1.4, 0.2], [7. , 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.9, 3.1, 4.9, 1.5], [5.5, 2.3, 4. , 1.3], [6.5, 2.8, 4.6, 1.5], [5.7, 2.8, 4.5, 1.3], [6.3, 3.3, 4.7, 1.6], [4.9, 2.4, 3.3, 1. ], [6.6, 2.9, 4.6, 1.3], [5.2, 2.7, 3.9, 1.4], [5. , 2. , 3.5, 1. ], [5.9, 3. , 4.2, 1.5], [6. , 2.2, 4. , 1. ], [6.1, 2.9, 4.7, 1.4], [5.6, 2.9, 3.6, 1.3], [6.7, 3.1, 4.4, 1.4], [5.6, 3. , 4.5, 1.5], [5.8, 2.7, 4.1, 1. ], [6.2, 2.2, 4.5, 1.5], [5.6, 2.5, 3.9, 1.1], [5.9, 3.2, 4.8, 1.8], [6.1, 2.8, 4. , 1.3], [6.3, 2.5, 4.9, 1.5], [6.1, 2.8, 4.7, 1.2], [6.4, 2.9, 4.3, 1.3], [6.6, 3. , 4.4, 1.4], [6.8, 2.8, 4.8, 1.4], [6.7, 3. , 5. , 1.7], [6. , 2.9, 4.5, 1.5], [5.7, 2.6, 3.5, 1. ], [5.5, 2.4, 3.8, 1.1], [5.5, 2.4, 3.7, 1. ], [5.8, 2.7, 3.9, 1.2], [6. , 2.7, 5.1, 1.6], [5.4, 3. , 4.5, 1.5], [6. , 3.4, 4.5, 1.6], [6.7, 3.1, 4.7, 1.5], [6.3, 2.3, 4.4, 1.3], [5.6, 3. , 4.1, 1.3], [5.5, 2.5, 4. , 1.3], [5.5, 2.6, 4.4, 1.2], [6.1, 3. , 4.6, 1.4], [5.8, 2.6, 4. , 1.2], [5. , 2.3, 3.3, 1. ], [5.6, 2.7, 4.2, 1.3], [5.7, 3. , 4.2, 1.2], [5.7, 2.9, 4.2, 1.3], [6.2, 2.9, 4.3, 1.3], [5.1, 2.5, 3. , 1.1], [5.7, 2.8, 4.1, 1.3], [6.3, 3.3, 6. , 2.5], [5.8, 2.7, 5.1, 1.9], [7.1, 3. , 5.9, 2.1], [6.3, 2.9, 5.6, 1.8], [6.5, 3. , 5.8, 2.2], [7.6, 3. , 6.6, 2.1], [4.9, 2.5, 4.5, 1.7], [7.3, 2.9, 6.3, 1.8], [6.7, 2.5, 5.8, 1.8], [7.2, 3.6, 6.1, 2.5], [6.5, 3.2, 5.1, 2. ], [6.4, 2.7, 5.3, 1.9], [6.8, 3. , 5.5, 2.1], [5.7, 2.5, 5. , 2. ], [5.8, 2.8, 5.1, 2.4], [6.4, 3.2, 5.3, 2.3], [6.5, 3. , 5.5, 1.8], [7.7, 3.8, 6.7, 2.2], [7.7, 2.6, 6.9, 2.3], [6. , 2.2, 5. , 1.5], [6.9, 3.2, 5.7, 2.3], [5.6, 2.8, 4.9, 2. ], [7.7, 2.8, 6.7, 2. ], [6.3, 2.7, 4.9, 1.8], [6.7, 3.3, 5.7, 2.1], [7.2, 3.2, 6. , 1.8], [6.2, 2.8, 4.8, 1.8], [6.1, 3. , 4.9, 1.8], [6.4, 2.8, 5.6, 2.1], [7.2, 3. , 5.8, 1.6], [7.4, 2.8, 6.1, 1.9], [7.9, 3.8, 6.4, 2. ], [6.4, 2.8, 5.6, 2.2], [6.3, 2.8, 5.1, 1.5], [6.1, 2.6, 5.6, 1.4], [7.7, 3. , 6.1, 2.3], [6.3, 3.4, 5.6, 2.4], [6.4, 3.1, 5.5, 1.8], [6. , 3. , 4.8, 1.8], [6.9, 3.1, 5.4, 2.1], [6.7, 3.1, 5.6, 2.4], [6.9, 3.1, 5.1, 2.3], [5.8, 2.7, 5.1, 1.9], [6.8, 3.2, 5.9, 2.3], [6.7, 3.3, 5.7, 2.5], [6.7, 3. , 5.2, 2.3], [6.3, 2.5, 5. , 1.9], [6.5, 3. , 5.2, 2. ], [6.2, 3.4, 5.4, 2.3], [5.9, 3. , 5.1, 1.8]])
y
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
X_scaled
array([[-9.00681170e-01, 1.01900435e+00, -1.34022653e+00, -1.31544430e+00], [-1.14301691e+00, -1.31979479e-01, -1.34022653e+00, -1.31544430e+00], [-1.38535265e+00, 3.28414053e-01, -1.39706395e+00, -1.31544430e+00], [-1.50652052e+00, 9.82172869e-02, -1.28338910e+00, -1.31544430e+00], [-1.02184904e+00, 1.24920112e+00, -1.34022653e+00, -1.31544430e+00], [-5.37177559e-01, 1.93979142e+00, -1.16971425e+00, -1.05217993e+00], [-1.50652052e+00, 7.88807586e-01, -1.34022653e+00, -1.18381211e+00], [-1.02184904e+00, 7.88807586e-01, -1.28338910e+00, -1.31544430e+00], [-1.74885626e+00, -3.62176246e-01, -1.34022653e+00, -1.31544430e+00], [-1.14301691e+00, 9.82172869e-02, -1.28338910e+00, -1.44707648e+00], [-5.37177559e-01, 1.47939788e+00, -1.28338910e+00, -1.31544430e+00], [-1.26418478e+00, 7.88807586e-01, -1.22655167e+00, -1.31544430e+00], [-1.26418478e+00, -1.31979479e-01, -1.34022653e+00, -1.44707648e+00], [-1.87002413e+00, -1.31979479e-01, -1.51073881e+00, -1.44707648e+00], [-5.25060772e-02, 2.16998818e+00, -1.45390138e+00, -1.31544430e+00], [-1.73673948e-01, 3.09077525e+00, -1.28338910e+00, -1.05217993e+00], [-5.37177559e-01, 1.93979142e+00, -1.39706395e+00, -1.05217993e+00], [-9.00681170e-01, 1.01900435e+00, -1.34022653e+00, -1.18381211e+00], [-1.73673948e-01, 1.70959465e+00, -1.16971425e+00, -1.18381211e+00], [-9.00681170e-01, 1.70959465e+00, -1.28338910e+00, -1.18381211e+00], [-5.37177559e-01, 7.88807586e-01, -1.16971425e+00, -1.31544430e+00], [-9.00681170e-01, 1.47939788e+00, -1.28338910e+00, -1.05217993e+00], [-1.50652052e+00, 1.24920112e+00, -1.56757623e+00, -1.31544430e+00], [-9.00681170e-01, 5.58610819e-01, -1.16971425e+00, -9.20547742e-01], [-1.26418478e+00, 7.88807586e-01, -1.05603939e+00, -1.31544430e+00], [-1.02184904e+00, -1.31979479e-01, -1.22655167e+00, -1.31544430e+00], [-1.02184904e+00, 7.88807586e-01, -1.22655167e+00, -1.05217993e+00], [-7.79513300e-01, 1.01900435e+00, -1.28338910e+00, -1.31544430e+00], [-7.79513300e-01, 7.88807586e-01, -1.34022653e+00, -1.31544430e+00], [-1.38535265e+00, 3.28414053e-01, -1.22655167e+00, -1.31544430e+00], [-1.26418478e+00, 9.82172869e-02, -1.22655167e+00, -1.31544430e+00], [-5.37177559e-01, 7.88807586e-01, -1.28338910e+00, -1.05217993e+00], [-7.79513300e-01, 2.40018495e+00, -1.28338910e+00, -1.44707648e+00], [-4.16009689e-01, 2.63038172e+00, -1.34022653e+00, -1.31544430e+00], [-1.14301691e+00, 9.82172869e-02, -1.28338910e+00, -1.31544430e+00], [-1.02184904e+00, 3.28414053e-01, -1.45390138e+00, -1.31544430e+00], [-4.16009689e-01, 1.01900435e+00, -1.39706395e+00, -1.31544430e+00], [-1.14301691e+00, 1.24920112e+00, -1.34022653e+00, -1.44707648e+00], [-1.74885626e+00, -1.31979479e-01, -1.39706395e+00, -1.31544430e+00], [-9.00681170e-01, 7.88807586e-01, -1.28338910e+00, -1.31544430e+00], [-1.02184904e+00, 1.01900435e+00, -1.39706395e+00, -1.18381211e+00], [-1.62768839e+00, -1.74335684e+00, -1.39706395e+00, -1.18381211e+00], [-1.74885626e+00, 3.28414053e-01, -1.39706395e+00, -1.31544430e+00], [-1.02184904e+00, 1.01900435e+00, -1.22655167e+00, -7.88915558e-01], [-9.00681170e-01, 1.70959465e+00, -1.05603939e+00, -1.05217993e+00], [-1.26418478e+00, -1.31979479e-01, -1.34022653e+00, -1.18381211e+00], [-9.00681170e-01, 1.70959465e+00, -1.22655167e+00, -1.31544430e+00], [-1.50652052e+00, 3.28414053e-01, -1.34022653e+00, -1.31544430e+00], [-6.58345429e-01, 1.47939788e+00, -1.28338910e+00, -1.31544430e+00], [-1.02184904e+00, 5.58610819e-01, -1.34022653e+00, -1.31544430e+00], [ 1.40150837e+00, 3.28414053e-01, 5.35408562e-01, 2.64141916e-01], [ 6.74501145e-01, 3.28414053e-01, 4.21733708e-01, 3.95774101e-01], [ 1.28034050e+00, 9.82172869e-02, 6.49083415e-01, 3.95774101e-01], [-4.16009689e-01, -1.74335684e+00, 1.37546573e-01, 1.32509732e-01], [ 7.95669016e-01, -5.92373012e-01, 4.78571135e-01, 3.95774101e-01], [-1.73673948e-01, -5.92373012e-01, 4.21733708e-01, 1.32509732e-01], [ 5.53333275e-01, 5.58610819e-01, 5.35408562e-01, 5.27406285e-01], [-1.14301691e+00, -1.51316008e+00, -2.60315415e-01, -2.62386821e-01], [ 9.16836886e-01, -3.62176246e-01, 4.78571135e-01, 1.32509732e-01], [-7.79513300e-01, -8.22569778e-01, 8.07091462e-02, 2.64141916e-01], [-1.02184904e+00, -2.43394714e+00, -1.46640561e-01, -2.62386821e-01], [ 6.86617933e-02, -1.31979479e-01, 2.51221427e-01, 3.95774101e-01], [ 1.89829664e-01, -1.97355361e+00, 1.37546573e-01, -2.62386821e-01], [ 3.10997534e-01, -3.62176246e-01, 5.35408562e-01, 2.64141916e-01], [-2.94841818e-01, -3.62176246e-01, -8.98031345e-02, 1.32509732e-01], [ 1.03800476e+00, 9.82172869e-02, 3.64896281e-01, 2.64141916e-01], [-2.94841818e-01, -1.31979479e-01, 4.21733708e-01, 3.95774101e-01], [-5.25060772e-02, -8.22569778e-01, 1.94384000e-01, -2.62386821e-01], [ 4.32165405e-01, -1.97355361e+00, 4.21733708e-01, 3.95774101e-01], [-2.94841818e-01, -1.28296331e+00, 8.07091462e-02, -1.30754636e-01], [ 6.86617933e-02, 3.28414053e-01, 5.92245988e-01, 7.90670654e-01], [ 3.10997534e-01, -5.92373012e-01, 1.37546573e-01, 1.32509732e-01], [ 5.53333275e-01, -1.28296331e+00, 6.49083415e-01, 3.95774101e-01], [ 3.10997534e-01, -5.92373012e-01, 5.35408562e-01, 8.77547895e-04], [ 6.74501145e-01, -3.62176246e-01, 3.08058854e-01, 1.32509732e-01], [ 9.16836886e-01, -1.31979479e-01, 3.64896281e-01, 2.64141916e-01], [ 1.15917263e+00, -5.92373012e-01, 5.92245988e-01, 2.64141916e-01], [ 1.03800476e+00, -1.31979479e-01, 7.05920842e-01, 6.59038469e-01], [ 1.89829664e-01, -3.62176246e-01, 4.21733708e-01, 3.95774101e-01], [-1.73673948e-01, -1.05276654e+00, -1.46640561e-01, -2.62386821e-01], [-4.16009689e-01, -1.51316008e+00, 2.38717193e-02, -1.30754636e-01], [-4.16009689e-01, -1.51316008e+00, -3.29657076e-02, -2.62386821e-01], [-5.25060772e-02, -8.22569778e-01, 8.07091462e-02, 8.77547895e-04], [ 1.89829664e-01, -8.22569778e-01, 7.62758269e-01, 5.27406285e-01], [-5.37177559e-01, -1.31979479e-01, 4.21733708e-01, 3.95774101e-01], [ 1.89829664e-01, 7.88807586e-01, 4.21733708e-01, 5.27406285e-01], [ 1.03800476e+00, 9.82172869e-02, 5.35408562e-01, 3.95774101e-01], [ 5.53333275e-01, -1.74335684e+00, 3.64896281e-01, 1.32509732e-01], [-2.94841818e-01, -1.31979479e-01, 1.94384000e-01, 1.32509732e-01], [-4.16009689e-01, -1.28296331e+00, 1.37546573e-01, 1.32509732e-01], [-4.16009689e-01, -1.05276654e+00, 3.64896281e-01, 8.77547895e-04], [ 3.10997534e-01, -1.31979479e-01, 4.78571135e-01, 2.64141916e-01], [-5.25060772e-02, -1.05276654e+00, 1.37546573e-01, 8.77547895e-04], [-1.02184904e+00, -1.74335684e+00, -2.60315415e-01, -2.62386821e-01], [-2.94841818e-01, -8.22569778e-01, 2.51221427e-01, 1.32509732e-01], [-1.73673948e-01, -1.31979479e-01, 2.51221427e-01, 8.77547895e-04], [-1.73673948e-01, -3.62176246e-01, 2.51221427e-01, 1.32509732e-01], [ 4.32165405e-01, -3.62176246e-01, 3.08058854e-01, 1.32509732e-01], [-9.00681170e-01, -1.28296331e+00, -4.30827696e-01, -1.30754636e-01], [-1.73673948e-01, -5.92373012e-01, 1.94384000e-01, 1.32509732e-01], [ 5.53333275e-01, 5.58610819e-01, 1.27429511e+00, 1.71209594e+00], [-5.25060772e-02, -8.22569778e-01, 7.62758269e-01, 9.22302838e-01], [ 1.52267624e+00, -1.31979479e-01, 1.21745768e+00, 1.18556721e+00], [ 5.53333275e-01, -3.62176246e-01, 1.04694540e+00, 7.90670654e-01], [ 7.95669016e-01, -1.31979479e-01, 1.16062026e+00, 1.31719939e+00], [ 2.12851559e+00, -1.31979479e-01, 1.61531967e+00, 1.18556721e+00], [-1.14301691e+00, -1.28296331e+00, 4.21733708e-01, 6.59038469e-01], [ 1.76501198e+00, -3.62176246e-01, 1.44480739e+00, 7.90670654e-01], [ 1.03800476e+00, -1.28296331e+00, 1.16062026e+00, 7.90670654e-01], [ 1.64384411e+00, 1.24920112e+00, 1.33113254e+00, 1.71209594e+00], [ 7.95669016e-01, 3.28414053e-01, 7.62758269e-01, 1.05393502e+00], [ 6.74501145e-01, -8.22569778e-01, 8.76433123e-01, 9.22302838e-01], [ 1.15917263e+00, -1.31979479e-01, 9.90107977e-01, 1.18556721e+00], [-1.73673948e-01, -1.28296331e+00, 7.05920842e-01, 1.05393502e+00], [-5.25060772e-02, -5.92373012e-01, 7.62758269e-01, 1.58046376e+00], [ 6.74501145e-01, 3.28414053e-01, 8.76433123e-01, 1.44883158e+00], [ 7.95669016e-01, -1.31979479e-01, 9.90107977e-01, 7.90670654e-01], [ 2.24968346e+00, 1.70959465e+00, 1.67215710e+00, 1.31719939e+00], [ 2.24968346e+00, -1.05276654e+00, 1.78583195e+00, 1.44883158e+00], [ 1.89829664e-01, -1.97355361e+00, 7.05920842e-01, 3.95774101e-01], [ 1.28034050e+00, 3.28414053e-01, 1.10378283e+00, 1.44883158e+00], [-2.94841818e-01, -5.92373012e-01, 6.49083415e-01, 1.05393502e+00], [ 2.24968346e+00, -5.92373012e-01, 1.67215710e+00, 1.05393502e+00], [ 5.53333275e-01, -8.22569778e-01, 6.49083415e-01, 7.90670654e-01], [ 1.03800476e+00, 5.58610819e-01, 1.10378283e+00, 1.18556721e+00], [ 1.64384411e+00, 3.28414053e-01, 1.27429511e+00, 7.90670654e-01], [ 4.32165405e-01, -5.92373012e-01, 5.92245988e-01, 7.90670654e-01], [ 3.10997534e-01, -1.31979479e-01, 6.49083415e-01, 7.90670654e-01], [ 6.74501145e-01, -5.92373012e-01, 1.04694540e+00, 1.18556721e+00], [ 1.64384411e+00, -1.31979479e-01, 1.16062026e+00, 5.27406285e-01], [ 1.88617985e+00, -5.92373012e-01, 1.33113254e+00, 9.22302838e-01], [ 2.49201920e+00, 1.70959465e+00, 1.50164482e+00, 1.05393502e+00], [ 6.74501145e-01, -5.92373012e-01, 1.04694540e+00, 1.31719939e+00], [ 5.53333275e-01, -5.92373012e-01, 7.62758269e-01, 3.95774101e-01], [ 3.10997534e-01, -1.05276654e+00, 1.04694540e+00, 2.64141916e-01], [ 2.24968346e+00, -1.31979479e-01, 1.33113254e+00, 1.44883158e+00], [ 5.53333275e-01, 7.88807586e-01, 1.04694540e+00, 1.58046376e+00], [ 6.74501145e-01, 9.82172869e-02, 9.90107977e-01, 7.90670654e-01], [ 1.89829664e-01, -1.31979479e-01, 5.92245988e-01, 7.90670654e-01], [ 1.28034050e+00, 9.82172869e-02, 9.33270550e-01, 1.18556721e+00], [ 1.03800476e+00, 9.82172869e-02, 1.04694540e+00, 1.58046376e+00], [ 1.28034050e+00, 9.82172869e-02, 7.62758269e-01, 1.44883158e+00], [-5.25060772e-02, -8.22569778e-01, 7.62758269e-01, 9.22302838e-01], [ 1.15917263e+00, 3.28414053e-01, 1.21745768e+00, 1.44883158e+00], [ 1.03800476e+00, 5.58610819e-01, 1.10378283e+00, 1.71209594e+00], [ 1.03800476e+00, -1.31979479e-01, 8.19595696e-01, 1.44883158e+00], [ 5.53333275e-01, -1.28296331e+00, 7.05920842e-01, 9.22302838e-01], [ 7.95669016e-01, -1.31979479e-01, 8.19595696e-01, 1.05393502e+00], [ 4.32165405e-01, 7.88807586e-01, 9.33270550e-01, 1.44883158e+00], [ 6.86617933e-02, -1.31979479e-01, 7.62758269e-01, 7.90670654e-01]])
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
lda = LinearDiscriminantAnalysis(n_components=2, solver='svd')
X_lda = lda.fit_transform(X, y)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5 ,4))
sns.scatterplot(X_pca[:,0], X_pca[:,1], hue=y, palette='Set1', ax=ax[0])
sns.scatterplot(X_lda[:,0], X_lda[:,1], hue=y, palette='Set1', ax=ax[1])
ax[0].set_title("PCA de IRIS", fontsize=15, pad=15)
ax[1].set_title("LDA de IRIS", fontsize=15, pad=15)
ax[0].set_xlabel("PC1", fontsize=12)
ax[0].set_ylabel("PC2", fontsize=12)
ax[1].set_xlabel("LD1", fontsize=12)
ax[1].set_ylabel("LD2", fontsize=12)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0, 0.5, 'LD2')
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
# Creamos los datos
X, y = make_moons(n_samples = 500, random_state=42)
sns.scatterplot(X[:, 0], X[:, 1], hue=y, palette='Set1')
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
<matplotlib.axes._subplots.AxesSubplot at 0x7f367c4450d0>
Implementemos PCA y Kernel PCA y veamos las diferencias
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15, random_state=42)
X_kpca = kpca.fit_transform(X)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(13.5 ,4))
sns.scatterplot(X_pca[:, 0], X_pca[:, 1], hue=y, palette='Set1', ax=ax[0])
sns.scatterplot(X_kpca[:, 0], X_kpca[:, 1], hue=y, palette='Set1', ax=ax[1])
ax[0].set_title("PCA", fontsize=15, pad=15)
ax[1].set_title("RBF Kernel PCA", fontsize=15, pad=15)
ax[0].set_xlabel("Componente 1", fontsize=12)
ax[0].set_ylabel("Componente 2", fontsize=12)
ax[1].set_xlabel("Componente 1", fontsize=12)
ax[1].set_ylabel("Componente 2", fontsize=12)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0, 0.5, 'Componente 2')
El PCA normal no puede transformar datos no lineales en una forma lineal. Después de aplicar Kernel PCA a los mismos datos, las dos clases están linealmente bien separadas (ahora, las clases se pueden dividir dibujando una línea recta vertical).
Aquí, los datos originales tienen una dimensión de 2 y los datos trazados también tienen una dimensión de 2. Entonces, el Kernel PCA realmente redujo la dimensionalidad de los datos? La respuesta es 'Sí' porque la función kernel RBF proyecta temporalmente los datos bidimensionales en un nuevo espacio de características de dimensiones superiores donde las clases se vuelven linealmente separables y luego el algoritmo proyecta esos datos de dimensiones superiores nuevamente en los datos bidimensionales que se puede trazar en un gráfico 2D.
Una limitación del uso de Kernel PCA para la reducción de dimensionalidad es que tenemos que especificar un valor para el hiperparámetro gamma antes de ejecutar el algoritmo y para esto e puede usar una técnica de ajuste de hiperparámetros como Grid Search para encontrar un valor óptimo para la gamma.
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
pca = PCA()
X_pca = pca.fit_transform(X_scaled)
tsne = TSNE()
X_tsne = tsne.fit_transform(X_pca)
/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2. FutureWarning, /usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:793: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2. FutureWarning,
Ahora creamos un pipeline
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
pca = PCA()
tsne = TSNE()
tsne_after_pca = Pipeline([
('std_scaler', sc),
('pca', pca),
('tsne', tsne)
])
X_tsne = tsne_after_pca.fit_transform(X)
/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2. FutureWarning, /usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:793: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2. FutureWarning,
Ahora, aplicamos t-SNE al conjunto de datos Iris. Tiene solo 4 características. Por lo tanto, no necesitamos ejecutar PCA antes de t-SNE
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler
sns.set_style('darkgrid')
iris = load_iris()
X = iris.data
y = iris.target
sc = StandardScaler()
X_scaled = sc.fit_transform(X)
tsne = TSNE(n_components=2, random_state=1)
X_tsne = tsne.fit_transform(X_scaled)
sns.scatterplot(X_tsne[:,0], X_tsne[:,1], hue=y, palette='Set1')
plt.title("t-SNE de IRIS", fontsize=15, pad=15)
/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:783: FutureWarning: The default initialization in TSNE will change from 'random' to 'pca' in 1.2. FutureWarning, /usr/local/lib/python3.7/dist-packages/sklearn/manifold/_t_sne.py:793: FutureWarning: The default learning rate in TSNE will change from 200.0 to 'auto' in 1.2. FutureWarning, /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0.5, 1.0, 't-SNE de IRIS')
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import MDS
sns.set_style('darkgrid')
iris = load_iris()
X = iris.data
y = iris.target
mds = MDS(n_components=2, metric=True, random_state=2)
X_mds = mds.fit_transform(X)
sns.scatterplot(X_mds[:,0], X_mds[:,1], hue=y, palette='Set1')
plt.title("MDS de IRIS dataset", fontsize=15, pad=15)
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0.5, 1.0, 'MDS de IRIS dataset')
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import Isomap
sns.set_style('darkgrid')
iris = load_iris()
X = iris.data
y = iris.target
isomap = Isomap(n_neighbors=5, n_components=2,
eigen_solver='auto')
X_isomap = isomap.fit_transform(X)
sns.scatterplot(X_isomap[:,0], X_isomap[:,1], hue=y, palette='Set1')
plt.title("Isomap de IRIS", fontsize=15, pad=15)
/usr/local/lib/python3.7/dist-packages/sklearn/manifold/_isomap.py:324: UserWarning: The number of connected components of the neighbors graph is 2 > 1. Completing the graph to fit Isomap might be slow. Increase the number of neighbors to avoid this issue. self._fit_transform(X) /usr/local/lib/python3.7/dist-packages/scipy/sparse/_index.py:84: SparseEfficiencyWarning: Changing the sparsity structure of a csr_matrix is expensive. lil_matrix is more efficient. self._set_intXint(row, col, x.flat[0]) /usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation. FutureWarning
Text(0.5, 1.0, 'Isomap de IRIS')