##### Import all the necessary Libraries
from yellowbrick.text import TSNEVisualizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from yellowbrick.text import UMAPVisualizer
from yellowbrick.datasets import load_hobbies
Uniform Manifold Approximation and Projection (UMAP) is a dimension reduction technique that can be used for visualisation similarly to t-SNE, but also for general non-linear dimension reduction. The algorithm is founded on three assumptions about the data
From these assumptions it is possible to model the manifold with a fuzzy topological structure. The embedding is found by searching for a low dimensional projection of the data that has the closest possible equivalent fuzzy topological structure.
corpus = load_hobbies()
Which can then be used for rapid comparison
def visualize(dim_reduction,encoding,corpus,labels = True,alpha=0.7,metric=None):
if 'tfidf' in encoding.lower():
encode = TfidfVectorizer()
if 'count' in encoding.lower():
encode = CountVectorizer()
docs = encode.fit_transform(corpus.data)
if labels is True:
labels = corpus.target
else:
labels = None
if 'umap' in dim_reduction.lower():
if metric is None:
viz = UMAPVisualizer()
else:
viz = UMAPVisualizer(metric=metric)
if 't-sne' in dim_reduction.lower():
viz = TSNEVisualizer(alpha = alpha)
viz.fit(docs,labels)
viz.show()
visualize('t-sne','tfidf',corpus)
visualize('t-sne','count',corpus,alpha = 0.5)
visualize('t-sne','tfidf',corpus,labels =False)
visualize('umap','tfidf',corpus)
visualize('umap','tfidf',corpus,labels = False)
visualize('umap','count',corpus,metric= 'cosine')