import pydsm cooc = pydsm.build(pydsm.CooccurrenceDSM, corpus='wiki.20k', window_size=(2,2), language='en') cooc import pydsm.weighting as weighting ppmi = cooc.apply_weighting(weight_func=weighting.ppmi) ppmi import pydsm.similarity as similarity ppmi.nearest_neighbors('moon', sim_func=similarity.cos) import pydsm.composition as composition car_brand = ppmi.compose('black', 'white', comp_func=composition.multiplicative) car_brand ppmi.nearest_neighbors(car_brand, sim_func=similarity.cos) ppmi['rock'] # Row selection ppmi[['correa', 'rock']] # Multiple rows slection ppmi['good', 'bad'] # Value selection ppmi[100:105] # Rows 100 to 104 ppmi[:, ['good', 'bad']] # Column selection ppmi[9000:9010, [0,4,5]] # Mixing %matplotlib inline import pydsm.visualization as visualization visualization.hexbin(ppmi) visualization.pcolormesh(ppmi) visualization.heatmap(ppmi) visualization.plot_vector(ppmi['model']) from pydsm import IndexMatrix mat = ppmi.matrix mat.sum(axis=1) # Sum the matrix row-wise srted = mat.sum(axis=1).sort(ascending=False) # Sum the matrix along the rows, and sort it. srted deleted = mat.delete(srted[:100], axis=0) # Delete the 100 first rows in srted. deleted.sort(axis=0, key=IndexMatrix.sum, ascending=False) # Sort the matrix according to to sum function. #Note that the hundred first rows are removed. std = deleted.std(axis=1) # Return the rowwise standard deviation std std + 5 std / 0.5 (std * std).sqrt() # Multply elementwise by itself, and return the square root srted.append(ppmi[:,'word'], axis=1) # Append another matrix column-wise and match index row