softmax 回归实现
大约 2 分钟
softmax 回归实现
导入所需的包和库
import torch
import torch.nn as nn
import d2lzh_pytorch as d2l
获取和读取数据
设置batch_size
大小为256
,利用Data.DataLoader
读取 fashion_mnist 数据集数据。
# 本函数已封装在 d2lzh 包中
def load_data_fashion_mnist(batch_size, root='./Datasets/'):
"""Download the fashion mnist dataset and then load into memory."""
transform = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform)
mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform)
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers)
return train_iter, test_iter
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
定义和初始化模型
由于从DataLoader
读出的数据尺寸为[batch_size x channels x height x width]
,是一个四维张量,故我们需要把features
的尺寸 reshape 到[batch_size x 28*28]
,然后才能输入到全连接层中。但是相较于传统的reshape
或者view()
函数,更聪明的方法是,定义一个nn.Module
的继承类作为压平features
尺寸的特殊层,该层的前馈函数就是改变 x 的尺寸。
class FlattenLayer(nn.Module):
def __init__(self):
super(FlattenLayer, self).__init__()
def forward(self, x):
return x.view(x.shape[0], -1)
net = nn.Sequential(FlattenLayer(), nn.Linear(num_inputs, num_outputs))
init.normal_(net[1].weight, mean=0, std=0.01)
init.constant_(net[1].bias, val=0)
定义交叉熵函数
softmax 回归中我们定义交叉熵函数作为损失函数。
loss = nn.CrossEntropyLoss()
定义优化算法
optim = optim.SGD(net.parameters(), lr=0.1)
训练模型
num_epochs = 5
for epoch in range(num_epochs):
for X, y in train_iter:
output = net(X)
l = loss(output, y).sum()
optim.zero_grad()
l.backward()
optim.step()
print(f"epoch:{epoch+1},loss:{l.item()}")
在 softmax 回归中,我们需要计算训练准确度,因此,对训练模型算法稍作调整:
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return acc_sum / n
num_epochs = 5
for epoch in range(num_epochs):
train_loss_sum = 0
train_acc_sum = 0
n = 0
for X, y in train_iter:
output = net(X)
l = loss(output, y).sum()
optim.zero_grad()
l.backward()
optim.step()
train_loss_sum += l.item()
train_acc_sum += (output.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_iter, net)
print(
f"epoch:{epoch+1},loss:{train_loss_sum/n},train acc:{train_acc_sum/n},test acc:{test_acc}"
)
测试模型
现在通过训练,我们就得到了一个模型网络,直接将待测试数据输入网络,即可得到预测结果。
X,y = next(iter(test_iter)) # 通过构建迭代器获得下一批测试数据
true_label = d2l.get_fashion_mnist_labels(y.numpy())
pred_label = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true+'\n'+pred for true,pred in zip(true_label,pred_label)]
d2l.show_fashion_mnist(X[10:20],titles[10:20]) # 显示前十个数据;