#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') import os os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; os.environ["CUDA_VISIBLE_DEVICES"]="0"; # In[2]: import ktrain from ktrain import text # In[3]: # load training and validation data from a folder # download and unzip IMDb dataset: https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz DATADIR = 'data/aclImdb' (x_train, y_train), (x_test, y_test), preproc = text.texts_from_folder(DATADIR, max_features=20000, maxlen=400, ngram_range=1, classes=['pos', 'neg']) # In[4]: from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D def get_model(): model = Sequential() model.add(Embedding(20000+1, 50, input_length=400)) # add 1 for padding token model.add(GlobalAveragePooling1D()) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model model = get_model() # In[5]: learner = ktrain.get_learner(model, train_data=(x_train, y_train), val_data=(x_test, y_test)) # In[6]: learner.lr_find() learner.lr_plot() # In[7]: learner.autofit(0.005, 2) # In[8]: predictor = ktrain.get_predictor(learner.model, preproc) # In[9]: data = [ 'This movie was horrible! The plot was boring. Acting was okay, though.', 'The film really sucked. I want my money back.', 'What a beautiful romantic comedy. 10/10 would see again!'] # In[10]: predictor.predict(data) # In[11]: predictor.save('/tmp/mypred') # In[12]: predictor = ktrain.load_predictor('/tmp/mypred') # In[13]: predictor.predict(['The plot had lots of holes and did not make sense'])