import d2l
from mxnet import gluon, npx, init
from mxnet.gluon import nn
npx.set_np()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=256)
The model
net = nn.Sequential()
net.add(nn.Dense(256, activation='relu'),
nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
Training
loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(),
'sgd', {'learning_rate': 0.5})
d2l.train_ch3(net, train_iter, test_iter, loss, 10, trainer)