动手学深度学习（Pytorch版）代码实践 -深度学习基础-05Softmax回归基础版

作者：喵喵爱编程 | 2024-06-19 08:28:57

踩

05`Softmax`回归基础版

主要内容

初始化模型参数：定义输入和输出的维度，初始化权重 W 和偏置 b。
定义Softmax函数：实现 Softmax函数，将输入的每个元素转换为概率。
定义模型：实现Softmax回归模型，将每个图像展平为向量并计算输出。
定义损失函数：实现交叉熵损失函数。
定义分类精度计算函数：计算预测正确的数量。
定义累加器类：用于对多个变量进行累加。
定义精度评估函数：计算模型在指定数据集上的精度。
定义训练一个迭代周期的函数：训练模型一个迭代周期，并计算训练损失和准确度。
定义动画绘制实用程序类：用于在动画中绘制训练过程中的损失和准确度。
定义训练函数：训练模型多个迭代周期，并在每个周期后绘制训练损失和准确度。
定义参数更新函数：使用小批量随机梯度下降优化模型的损失函数。
定义预测函数：对测试数据进行预测，并显示前 6 个样本的图像及其真实和预测标签。

import torch
import numpy as np
from IPython import display
import PIL as plt
from d2l import torch as d2l

#设置数据迭代器的批量大小为256
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

#初始化模型参数

#原始数据集中的每个样本都是28*28
#由于softmax回归，输入是一个向量。
#将展平每个图像，把它们看作长度为784的向量。
num_inputs = 784

#因为我们的数据集有10个类别，所以网络输出维度为10
num_outputs = 10

W = torch.normal(0,0.01,size=(num_inputs,num_outputs),requires_grad=True)
b = torch.zeros(num_outputs,requires_grad=True)


#回顾：给定一个矩阵X，我们可以对所有元素求和
# X = torch.tensor([[1.0,2.0,3.0],[4.0,5.0,6.0]])
# #keepdim 表示是否需要保持输出的维度与输入一样
# print(X.shape)
# print(X.sum(0, keepdim = True))
# print(X.sum(1, keepdim = True))
"""
torch.Size([2, 3])
tensor([[5., 7., 9.]])
tensor([[ 6.],
        [15.]])
"""

# 定义softmax函数
def softmax(X):
    X_exp = torch.exp(X)  # 对输入的每个元素求指数
    partition = X_exp.sum(1, keepdim=True)  # 对每行的元素求和
    return X_exp / partition  # 每个元素除以所在行的和，得到概率

#我们将每个元素变成一个非负数。 此外，依据概率原理，每行总和为1
# X = torch.normal(0, 1, (2, 5))
# X_prob = softmax(X)
# print(X)
# print(X_prob)
# print(X_prob.sum(1,keepdim=True))
"""
tensor([[-1.8393,  1.1537, -0.3047,  0.2240, -0.9293],
        [-0.6396,  0.4152, -0.3158, -0.1546, -0.1579]])
tensor([[0.0278, 0.5550, 0.1291, 0.2190, 0.0691],
        [0.1177, 0.3379, 0.1627, 0.1912, 0.1905]])
tensor([[1.0000],
        [1.0000]])

"""

#定义模型-实现softmax回归模型
def net(X):
    #w.shape[0]为784
    #使用reshape函数将每张原始图像展平为向量,然后进行矩阵乘法并加上偏置
    #X变成256 * 784的矩阵
    return softmax(torch.matmul(X.reshape((-1,W.shape[0])), W) + b)


#定义损失函数

#回顾：交叉熵采用真实标签的预测概率的负对数似然
#y_hat[[0,1],y]中的[0,1]指的是第一行和第二行的索引，
#后面的y等价于[0,2]。那么可以这么理解y_hat[0,0]和y_hat[1,2]
y = torch.tensor([0,2])
y_hat = torch.tensor([[0.1,0.2,0.6],[0.3,0.2,0.5]])
# print(y_hat[[0,1]])
# print(y_hat[[0,1],y])

#实现交叉熵损失函数
def cross_entropy(y_hat, y):
    #y_hat[range(len(y_hat)),y]得到真实标量的预测值
    #len(y_hat)是2
    #range(len(y_hat))是range(0,2)
    #range(0,2)是[0,1]
    return -torch.log(y_hat[range(len(y_hat)),y])

# print(cross_entropy(y_hat,y))

# print(range(len(y_hat)))

# print(y_hat.argmax(axis = 1, keepdim=True))
"""
tensor([[2],
        [2]])
"""


#分类精度
def accuracy(y_hat,y): #@save
    """计算预测正确的数量"""
    #判断y_hat.shape是否为二维以上的矩阵
    #并且列数大于1
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        #axis = 1 表示按照每一行
        #argmax(axis = 1)得到每行最大值的下标
        y_hat = y_hat.argmax(axis = 1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())

"""
我们将继续使用之前定义的变量y_hat和y分别作为预测的概率分布和标签。 
可以看到，第一个样本的预测类别是2（该行的最大元素为0.6，索引为2），这与实际标签0不一致。
第二个样本的预测类别是2（该行的最大元素为0.5，索引为2），这与实际标签2一致。 
因此，这两个样本的分类精度率为0.5。
"""
# print(accuracy(y_hat, y) / len(y))

#定义一个实用程序类Accumulator，用于对多个变量进行累加。 
#在下面的evaluate_accuracy函数中， 我们在(Accumulator实例中创建了2个变量， 
#分别用于存储正确预测的数量和预测的总数量)。
class Accumulator:#@save
    """在n个变量上累加"""
    def __init__(self, n) -> None:
        self.data = [0.0] * n
    
    #*args：接收若干个位置参数，转换成元组tuple形式
    #zip() 函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回由这些元组组成的对象
    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    #给类定义了__getitem__方法，则当按照键取值时，可以直接返回__getitem__方法执行的结果
    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net,data_iter): #@save
    """计算在指定数据集上模型的精度"""
    if isinstance(net, torch.nn.Module):
        net.eval() # 将模型设置为评估模式
        """
        model.eval()，不启用 BatchNormalization 和 Dropout。
        此时pytorch会自动把BN和DropOut固定住，不会取平均，而是用训练好的值。
        不然的话，一旦test的batch_size过小，很容易就会因BN层导致模型performance损失较大；
        
        model.train() ：启用 BatchNormalization 和 Dropout。 
        在模型测试阶段使用model.train() 让model变成训练模式。
        此时 dropout和batch normalization的操作在训练，起到防止网络过拟合的问题。
        """
    metric = Accumulator(2)  # 正确预测数、预测总数
    with torch.no_grad():
        for X,y in data_iter:
            #y.numel()为样本总数
            #accuracy(net(X),y)分类正确的样本数
            metric.add(accuracy(net(X),y), y.numel())
    
    return metric[0] / metric[1]

# evaluate_accuracy(net, test_iter)


#训练模型一个迭代周期
def train_epoch_ch3(net, train_iter, loss, updater):  #@save
    """训练模型一个迭代周期"""
    # 将模型设置为训练模式
    if isinstance(net, torch.nn.Module):
        net.train()
    # 训练损失总和、训练准确度总和、样本数
    metric = Accumulator(3)
    for X, y in train_iter:
        # 计算梯度并更新参数
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 使用PyTorch内置的优化器和损失函数
            updater.zero_grad()  # 梯度清零
            l.mean().backward()  # 反向传播计算梯度
            updater.step()       # 更新模型参数
        else:
            # 使用定制的优化器和损失函数
            l.sum().backward()
            updater(X.shape[0])  # 更新模型参数（传入批次大小）
        # 更新累计器：累加损失和准确度，并计数样本数
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # 返回训练损失和训练精度
    return metric[0] / metric[2], metric[1] / metric[2]

#定义一个在动画中绘制数据的实用程序类Animator

class Animator:  # @save
    """在动画中绘制数据"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5)):
        # 初始化一个增量地绘制多条线的绘图器
        if legend is None:
            legend = []
        d2l.use_svg_display()  # 使用svg格式显示图像以获得更清晰的效果
        # 创建一个图形和子图
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 配置坐标轴，使用lambda函数捕获参数
        self.config_axes = lambda: d2l.set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts  # 初始化数据和线条格式

    def add(self, x, y):
        # 向图表中添加多个数据点
        if not hasattr(y, "__len__"):  # 如果y不是列表或数组，转换为列表
            y = [y]
        n = len(y)  # 数据点数量
        if not hasattr(x, "__len__"):  # 如果x不是列表或数组，转换为与y长度相同的列表
            x = [x] * n
        if not self.X:  # 如果X还未初始化，初始化为包含n个空列表的列表
            self.X = [[] for _ in range(n)]
        if not self.Y:  # 如果Y还未初始化，初始化为包含n个空列表的列表
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):  # 遍历每个数据点
            if a is not None and b is not None:  # 如果数据点有效，添加到X和Y
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()  # 清除当前的子图
        for x, y, fmt in zip(self.X, self.Y, self.fmts):  # 绘制每条线
            self.axes[0].plot(x, y, fmt)
        self.config_axes()  # 配置坐标轴
        display.display(self.fig)  # 显示图形
        display.clear_output(wait=True)  # 清除输出以便动态更新图形

# 训练函数
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):  # @save
    """训练模型"""
    # 创建一个Animator实例，用于动态绘制训练过程中的损失和准确度
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
                        legend=['train loss', 'train acc', 'test acc'])
    
    # 迭代训练周期
    for epoch in range(num_epochs):
        # 训练一个周期，并返回训练损失和训练准确度
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        # 计算在测试集上的准确度
        test_acc = evaluate_accuracy(net, test_iter)

        # 向动画中添加当前周期的训练损失、训练准确度和测试准确度
        animator.add(epoch + 1, train_metrics + (test_acc,))
    
    # 获取最后一个训练周期的训练损失和训练准确度
    train_loss, train_acc = train_metrics

    # 断言训练损失应小于0.5，确保模型已成功训练
    assert train_loss < 0.5, train_loss

    # 断言训练准确度应在0.7到1之间
    assert train_acc <= 1 and train_acc > 0.7, train_acc

    # 断言测试准确度应在0.7到1之间
    assert test_acc <= 1 and test_acc > 0.7, test_acc

    # 假设此时train_loss = 0.55。
    # 若条件满足则正常运行，否则引发 AssertionError，并显示 train_loss 的值 0.55

#小批量随机梯度下降来优化模型的损失函数
lr = 0.1
def updater(batch_size):
    return d2l.sgd([W, b], lr, batch_size)

num_epochs = 10
#训练函数
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, updater)
# d2l.plt.show()

# 预测
def predict_ch3(net, test_iter, n=6):  # @save
    """预测标签"""
    # 获取一个批次的数据
    for X, y in test_iter:
        break  # 仅使用第一个批次的数据进行预测
    # 获取真实标签
    trues = d2l.get_fashion_mnist_labels(y)
    # 获取预测标签，net(X) 得到预测的概率分布，.argmax(axis=1) 得到预测的类别索引
    preds = d2l.get_fashion_mnist_labels(net(X).argmax(axis=1))
    # 将真实标签和预测标签组合为标题
    titles = [true + '\n' + pred for true, pred in zip(trues, preds)]
    # 显示前 n 个样本的图像及其标题
    d2l.show_images(
        X[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])

# 调用预测函数
predict_ch3(net, test_iter)


d2l.plt.show() #可视化
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294

训练结果：

在这里插入图片描述

预测结果：

在这里插入图片描述

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/喵喵爱编程/article/detail/735897