PyTorch：二、构建卷积神经网络_predeq

作者：我家小花儿 | 2024-04-06 14:27:24

踩

predeq

一、制作自己的数据集

源代码

import torch
from torch.utils.data import Dataset

import pandas as pd
import numpy as np
# txt文件内容 路径 \t 类别 \t 长度 \n
txt_path = 'G:/stock/path.txt'

class SocktData(Dataset):
    dataset = []
    # 【data,label】形式初始化
    def __init__(self,txt_path):
        fh = open(txt_path)
        for line in fh:
            # 移除空格
            line = line.rstrip('\n')
            # 移除制表符
            line = line.split('\t')
            # 因为label=line[1]是字符串，训练时用的是数字，进行转换
            if line[1] == 'Rising':
                label = 0
            elif line[1] == 'Falling':
                label = 1
            else:
                label = 2
            self.dataset.append([line[0],label])
     # 返回data长度
    def __len__(self):
        return len(self.data)
    # 获取单个元素
    def __getitem__(self, index):
        data_path,label = self.dataset[index]
        # pd 读取数据 pd.DataFrame格式
        data = pd.read_csv(data_path)
        # data_array np.array格式
        data_array = np.array(data)
        # data_tensor Torch.tensor格式
        data_tensor = torch.Tensor(data_array)
        # tensor二维[32,9]转三维[1,32,9] 后面追加的代码！
        data_tensor = data_tensor.unsqueeze(0)
        print('----------------------getitem--------------------')
        print('------------------打印data_tensor类型-------------')
        print(data_tensor.size())
        return data_tensor,label
# 制作数据集
train_set = SocktData(txt_path)
# 加载数据集
data,label = train_set[0]
print('-----------打印第一个数据内容和标签类型-------------')
print('type(data) = ',type(data))
print('type(label) = ',type(label))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

执行结果：
评价：目前数据集构建好了~ 准备构建卷积神经网络。

二、构建卷积神经网络

源代码

import torch.nn as nn
import torch.nn.functional as F

depth = [4,8] # 第一层卷积、第二层卷积
data_row = 32 # 数据行数32
data_col = 9  # 数据列数 9
batch_size = 2 

class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1,depth[0],5,padding=2)
        self.pool = nn.MaxPool2d(2,2)   # data_row/2 data_col/2
        self.conv2 = nn.Conv2d(depth[0],depth[1],5,padding=2)
        self.fc1 = nn.Linear(112,24)
        self.fc2 = nn.Linear(24,3)
    def forward(self,x):
        x = F.relu(self.conv1(x)) 
        x = self.pool(x) 
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1,112)
        x = F.relu(self.fc1(x))
        x= F.dropout(x,training=self.training)
        x = self.fc2(x)
        x = F.log_softmax(x,dim = 0)
        return x
# 有个问题，就是输入数据是32行的，输出的结果是31行 torch.Size([1,31,9])


"""计算预测正确率的函数，其中predictions是模型给出的一组预测结果，batch_size行num_classes列的矩阵，labels是数据之中的正确的答案"""
def accuracy(predictions,labels):
    # torch.max的输出：our(tuple,optional维度) - the result tuple of two output tensors(max,max_indices)
    pred = torch.max(predictions.data,1)[1] # 对于任意一行（一个样本）的输出指的第一个维度，求最大，得到每一行最大元素的下标
    right_num = pred.eq(labels.data.view_as(pred)).sum() # 将下标与labels中包含的类别进行比较，并累计得到比较正确的数量
    return right_num,len(labels) # 返回正确的数量和这一次一共比较了多少元素

net = ConvNet()

criterion = nn.CrossEntropyLoss() # Loss函数的定义，交叉熵
optimizer = torch.optim.SGD(net.parameters(),lr=0.001,momentum = 0.9)

record = [] # 记录准确率等数值的list
weights = [] # 每若干步就纪录一次卷积核

num_epochs = 2
for epoch in range(num_epochs):
    train_accuracy = []
    for batch_id,(data,label) in enumerate(train_loader):
        net.train()
#        print(batch_id,data.size(),label)

        output = net(data) # forward
        loss = criterion(output,label)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        accuracies = accuracy(output,label)
        train_accuracy.append(accuracies)

        print('Epoch [{}/{}] \tLoss{:.0f}'.format(
            batch_id*batch_size,len(train_loader.dataset),loss.item()) # end format
        ) # end print
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

执行结果
损失函数打印错误
评价：
需要改进的地方：
（1）全连接层：参数是否定义错误？
（2）损失函数：损失函数记录target和output差别，并且进行纠偏；使得映射相对正确；
（3）样本数量问题：样本数量28是否太小？重复过多是否有影响？
（4）遇到问题，数据列数为9列，如何池化呢？数据都比较重要的情况下，应该不能够填0吧~
（5）简单卷积神经网络就构建好啦~ 还有待改进，明天加油！！！

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/我家小花儿/article/detail/372477