!pip install d2l==0.17.6 import torch from torch import nn from d2l import torch as d2l net = nn.Sequential( # Here, we use a larger 11 x 11 window to capture objects. At the same # time, we use a stride of 4 to greatly reduce the height and width of the # output. Here, the number of output channels is much larger than that in # LeNet nn.Conv2d(1, 96, kernel_size=11, stride=4, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), # Make the convolution window smaller, set padding to 2 for consistent # height and width across the input and output, and increase the number of # output channels nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), # Use three successive convolutional layers and a smaller convolution # window. Except for the final convolutional layer, the number of output # channels is further increased. Pooling layers are not used to reduce the # height and width of input after the first two convolutional layers nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(), nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2), nn.Flatten(), # Here, the number of outputs of the fully-connected layer is several # times larger than that in LeNet. Use the dropout layer to mitigate # overfitting nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(p=0.5), # Output layer. Since we are using Fashion-MNIST, the number of classes is # 10, instead of 1000 as in the paper nn.Linear(4096, 10)) X = torch.randn(1, 1, 224, 224) for layer in net: X=layer(X) print(layer.__class__.__name__,'output shape:\t',X.shape) batch_size = 128 train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) lr, num_epochs = 0.01, 10 d2l.train_ch6(net, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())