from sklearn.manifold import TSNE
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
import plotly.express as px
import pandas as pd
import numpy as np
df = pd.read_pickle("./imdb.pkl")
mat = np.matrix([x for x in df.embeddings])
tsne_model = TSNE(n_components=2, n_jobs=-1, random_state=42)
low_dim_data = tsne_model.fit_transform(mat)
print('Lower dim data has shape',low_dim_data.shape)
Lower dim data has shape (6012, 2)
# color scheme can be DEFINITELY improved.
tsne_df = pd.DataFrame(np.column_stack((low_dim_data, df.layers)), columns=['x', 'y', 'layers'])
tsne_df['layers'] = tsne_df['layers'].astype(str).replace('\.0', '', regex=True)
fig = px.scatter(
tsne_df, x=tsne_df['x'], y=tsne_df['y'],
color=tsne_df.layers, labels={'color': 'layers'}
)
fig.show()
# color scheme can be DEFINITELY improved.
tsne_df = pd.DataFrame(np.column_stack((low_dim_data, df.layers)), columns=['x', 'y', 'layers'])
tsne_df['layers'] = tsne_df['layers'].astype(int)
fig = px.scatter(
tsne_df, x=tsne_df['x'], y=tsne_df['y'],
color=tsne_df.layers, labels={'color': 'layers'}
)
fig.show()