import pydsm
cooc = pydsm.build(pydsm.CooccurrenceDSM, corpus='wiki.20k', window_size=(2,2), language='en')
cooc

import pydsm.weighting as weighting
ppmi = cooc.apply_weighting(weight_func=weighting.ppmi)
ppmi

import pydsm.similarity as similarity
ppmi.nearest_neighbors('moon', sim_func=similarity.cos)

import pydsm.composition as composition
car_brand = ppmi.compose('black', 'white', comp_func=composition.multiplicative)
car_brand

ppmi.nearest_neighbors(car_brand, sim_func=similarity.cos)

ppmi['rock']  # Row selection

ppmi[['correa', 'rock']]  # Multiple rows slection

ppmi['good', 'bad']  # Value selection

ppmi[100:105]  # Rows 100 to 104

ppmi[:, ['good', 'bad']]  # Column selection

ppmi[9000:9010, [0,4,5]]  # Mixing

%matplotlib inline
import pydsm.visualization as visualization
visualization.hexbin(ppmi)

visualization.pcolormesh(ppmi)

visualization.heatmap(ppmi)

visualization.plot_vector(ppmi['model'])

from pydsm import IndexMatrix
mat = ppmi.matrix
mat.sum(axis=1)  # Sum the matrix row-wise

srted = mat.sum(axis=1).sort(ascending=False)  # Sum the matrix along the rows, and sort it.
srted

deleted = mat.delete(srted[:100], axis=0)  # Delete the 100 first rows in srted. 
deleted.sort(axis=0, key=IndexMatrix.sum, ascending=False)  # Sort the matrix according to to sum function. 
                                                            #Note that the hundred first rows are removed.

std = deleted.std(axis=1)  # Return the rowwise standard deviation
std

std + 5

std / 0.5

(std * std).sqrt()  # Multply elementwise by itself, and return the square root

srted.append(ppmi[:,'word'], axis=1)  # Append another matrix column-wise and match index row