import numpy as np
import pandas as pd
import keras
from keras import Model
from keras.regularizers import l2
from keras.optimizers import (
Adam,
Adamax,
Adagrad,
SGD,
RMSprop
)
from keras.layers import (
Embedding,
Input,
Flatten,
Multiply,
Concatenate,
Dense
)
import sys
sys.path.append('../')
from cf_ec2 import (
GMF,
MLP,
NCF,
Data,
evaluation,
evaluation2
)
Using TensorFlow backend.
train = pd.read_csv('../data/ml-1m.train.rating',sep='\t',header=None,names=['user','item','rating','event_ts'])
test = pd.read_csv('../data/ml-1m.test.rating',sep='\t',header=None,names=['user','item','rating','event_ts'])
train.head(3)
user | item | rating | event_ts | |
---|---|---|---|---|
0 | 0 | 32 | 4 | 978824330 |
1 | 0 | 34 | 4 | 978824330 |
2 | 0 | 4 | 5 | 978824291 |
test.head(3)
user | item | rating | event_ts | |
---|---|---|---|---|
0 | 0 | 25 | 5 | 978824351 |
1 | 1 | 133 | 3 | 978300174 |
2 | 2 | 207 | 4 | 978298504 |
test.user.nunique(), test.shape
(6040, (6040, 4))
dataset = Data(
train=train,
test=test,
col_user='user',
col_item='item',
col_rating='rating',
col_time='event_ts',
binary=True,
n_neg=4,
n_neg_test=100
)
dataset.prepTrainDNN()
dataset.prepTestDNN(group=True)
dataset.negativeSampling()
newItems = set(dataset.items_test)-set(dataset.items)
idx2del = []
for idx,item in enumerate(dataset.items_test):
if item in newItems:
idx2del.append(idx)
length_test_original = len(dataset.users_test)
dataset.users_test = [
dataset.users_test[idx]
for idx in range(length_test_original) if idx not in idx2del
]
dataset.items_test = [
dataset.items_test[idx]
for idx in range(length_test_original) if idx not in idx2del
]
dataset.ratings_test = [
dataset.ratings_test[idx]
for idx in range(length_test_original) if idx not in idx2del
]
n_users = 6040
n_items = 3704
n_factors_gmf = 32
layers_mlp = [64,32,16,8]
reg_gmf = 0.
reg_layers_mlp = [0.,0.,0.,0.]
learning_rate = 0.01
flg_pretrain = ''
filepath = ''
filepath_gmf_pretrain = ''
filepath_mlp_pretrain = ''
num_epochs = 20
batch_size = 100
ncf = NCF(
n_users=n_users,
n_items=n_items,
n_factors_gmf=n_factors_gmf,
layers_mlp=layers_mlp,
reg_gmf=reg_gmf,
reg_layers_mlp=reg_layers_mlp
)
model = ncf.create_model()
model.load_weights('../metadata/ncf/ncf_model_best')
#### compile the model
model.compile(
optimizer=Adam(lr=learning_rate),
loss='binary_crossentropy',
metrics=['accuracy']
)
# estimate accuracy on whole dataset using loaded weights
scores = model.evaluate(
x = [
np.array(dataset.users_test),
np.array(dataset.items_test)
],
y = np.array(dataset.ratings_test),
verbose=0
)
scores
[0.1269758473972751, 0.948145866394043]
model.metrics_names
['loss', 'accuracy']