Replicate https://stats.stackexchange.com/questions/190148/autoencoder-pca-tensorflow?rq=1
but using "keras with tensorflow backend" instead of tensorflow directly
print("load libraries")
import numpy as np
from sklearn.decomposition import PCA
import utils
# https://stackoverflow.com/a/34306306/4126114
np.random.seed(0)
N=100000
# Try dataset from https://stats.stackexchange.com/questions/190148/autoencoder-pca-tensorflow?rq=1
from sklearn.preprocessing import scale
def data1(n_samples:int=2000, n_features:int=5):
# n_samples, n_features = 2000, 5
X = np.random.uniform(0, 1, (n_samples, n_features))
X[:, 2] = X[:, 1]**3
X[:, 3] = X[:, 1]*X[:, 2]
X[:, 4] = X[:,1]**2 * X[:, 0]**3
# auto-encoders need [0-1] range data
# If PCA input is (mean=0,std=1) and AE is [0-1], MSE is incomparable
# X = scale(X)
return X
X = data1(N,6)
print('X',X[0:10,:])
utils.myPlot(X)
X_pca = []
X_pca.append(None) # so that the index starts at 1
N_epochs = 25
for n_c_i in [x+1 for x in range(X.shape[1])]: # 'mle'
print("PCA")
pca = PCA(n_components=n_c_i, random_state=888, svd_solver = 'full')
X_pca.append(pca.fit_transform(X))
# print("X_pca_0",X_pca_0[0:10,:])
# print("")
# print("Explained variance ratio (cumsum)", np.cumsum(pca.explained_variance_ratio_))
# print("")
print("n_components = ",n_c_i)
utils.pca_err(X, X_pca[n_c_i])
if n_c_i == X.shape[1]:
print("Explained variance ratio (cumsum)", np.cumsum(pca.explained_variance_ratio_))
# plot
utils.myPlot(X_pca[n_c_i])
# train autoencoder
print("autoencoder")
autoencoder, encoder = utils.buildNetwork(X.shape[1], encoding_dim_ae = n_c_i)
utils.ae_fit_encode_plot_mse(X, autoencoder, encoder, N_epochs, verbose=0)
print("autoencoder")
N_epochs = 25
for n_c_i in [7,10,15,20,25]: # 'mle'
print("n_components = ",n_c_i)
autoencoder, encoder = utils.buildNetwork(X.shape[1], encoding_dim_ae = n_c_i)
utils.ae_fit_encode_plot_mse(X, autoencoder, encoder, N_epochs, verbose=0)