%reload_ext autoreload %autoreload 2 %matplotlib inline from fastai.nlp import * from sklearn.linear_model import LogisticRegression from sklearn.svm import LinearSVC from torchtext import vocab, data, datasets sl=1000 vocab_size=200000 PATH='data/aclImdb/' names = ['neg','pos'] trn,trn_y = texts_labels_from_folders(f'{PATH}train',names) val,val_y = texts_labels_from_folders(f'{PATH}test',names) trn[0] trn_y[0] veczr = CountVectorizer(ngram_range=(1,3), tokenizer=tokenize, max_features=vocab_size) trn_term_doc = veczr.fit_transform(trn) val_term_doc = veczr.transform(val) trn_term_doc.shape veczr.get_params() # here is the vocabulary vocab = veczr.get_feature_names() vocab[50:55] # Here is how we get a model from a bag of words md = TextClassifierData.from_bow(trn_term_doc, trn_y, val_term_doc, val_y, sl) learner = md.dotprod_nb_learner() learner.fit(0.02, 1, wds=1e-5, cycle_len=1) learner = md.dotprod_nb_learner() learner.fit(0.02, 1, wds=1e-6) veczr = CountVectorizer(tokenizer=tokenize) trn_term_doc = veczr.fit_transform(trn) val_term_doc = veczr.transform(val) x=trn_term_doc y=trn_y p = x[y==1].sum(0)+1 q = x[y==0].sum(0)+1 r = np.log((p/p.sum())/(q/q.sum())) b = np.log(len(p)/len(q)) pre_preds = val_term_doc @ r.T + b preds = pre_preds.T>0 (preds==val_y).mean() pre_preds = val_term_doc.sign() @ r.T + b preds = pre_preds.T>0 (preds==val_y).mean() m = LogisticRegression(C=0.1, fit_intercept=False, dual=True) m.fit(x, y) preds = m.predict(val_term_doc) (preds==val_y).mean() veczr = CountVectorizer(ngram_range=(1,2), tokenizer=tokenize) trn_term_doc = veczr.fit_transform(trn) val_term_doc = veczr.transform(val) y=trn_y x=trn_term_doc.sign() val_x = val_term_doc.sign() p = x[y==1].sum(0)+1 q = x[y==0].sum(0)+1 r = np.log((p/p.sum())/(q/q.sum())) b = np.log(len(p)/len(q)) m = LogisticRegression(C=0.1, fit_intercept=False) m.fit(x, y); preds = m.predict(val_x) (preds.T==val_y).mean() r x_nb = x.multiply(r) m = LogisticRegression(dual=True, C=1, fit_intercept=False) m.fit(x_nb, y); w = m.coef_.T preds = (val_x_nb @ w + m.intercept_)>0 (preds.T==val_y).mean() beta=0.25 val_x_nb = val_x.multiply(r) w = (1-beta)*m.coef_.mean() + beta*m.coef_.T preds = (val_x_nb @ w + m.intercept_)>0 (preds.T==val_y).mean() w2 = w.T[0]*r.A1 preds = (val_x @ w2 + m.intercept_)>0 (preds.T==val_y).mean() class EzLSTM(nn.LSTM): def __init__(self, input_size, hidden_size, *args, **kwargs): super().__init__(input_size, hidden_size, *args, **kwargs) self.num_dirs = 2 if self.bidirectional else 1 self.input_size = input_size self.hidden_size = hidden_size def forward(self, x): h0 = c0 = Variable(torch.zeros(self.num_dirs,x.size(1),self.hidden_size)).cuda() outp,_ = super().forward(x, (h0,c0)) return outp[-1] def init_wgts(m, last_l=-2): c = list(m.children()) for l in c: if isinstance(l, nn.Embedding): l.weight.data.uniform_(-0.05,0.05) elif isinstance(l, (nn.Linear, nn.Conv1d)): xavier_uniform(l.weight.data, gain=calculate_gain('relu')) l.bias.data.zero_() xavier_uniform(c[last_l].weight.data, gain=calculate_gain('linear')); class SeqSize(nn.Sequential): def forward(self, x): for l in self.children(): x = l(x) print(x.size()) return x