赞
踩
nn.Module.train()和nn.Module.eval()的作用
.train()之后进入训练模式,例如Dropout、BatchNorm等操作将为训练模式
.eval()之后进行评估模式,例如Dropout、BatchNorm等操作将为评估模式(评估就是测试)
如果模型中没有这些层,那么使用.train()/.eval()和不使用的效果相同
# model.py from torch import nn # 搭建神经网络 class Tudui(nn.Module): def __init__(self): super(Tudui, self).__init__() self.model = nn.Sequential( nn.Conv2d(3, 32, 5, 1, 2), nn.MaxPool2d(2), nn.Conv2d(32, 32, 5, 1, 2), nn.MaxPool2d(2), nn.Conv2d(32, 64, 5, 1, 2), nn.MaxPool2d(2), nn.Flatten(), nn.Linear(64*4*4, 64), nn.Linear(64, 10) ) def forward(self, x): x = self.model(x) return x
import torch import torchvision from torch.utils.tensorboard import SummaryWriter from model import * from torch import nn from torch.utils.data import DataLoader # 下载数据集 train_data = torchvision.datasets.CIFAR10(root="../data", train=True, transform=torchvision.transforms.ToTensor(), download=True) test_data = torchvision.datasets.CIFAR10(root="../data", train=False, transform=torchvision.transforms.ToTensor(), download=True) # 数据集长度 train_data_size = len(train_data) test_data_size = len(test_data) print("训练数据集的长度为:{}".format(train_data_size)) print("测试数据集的长度为:{}".format(test_data_size)) # 加载数据集 train_dataloader = DataLoader(train_data, batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) # 创建网络模型 tudui = Tudui() # 损失函数 loss_fn = nn.CrossEntropyLoss() # 优化器 optimizer = torch.optim.SGD(tudui.parameters(), lr=1e-2) # 添加tensorboard writer = SummaryWriter("../logs_train") total_train_step = 0 total_test_step = 0 epoch = 10 for i in range(epoch): print("-------第 {} 轮训练开始-------".format(i+1)) # 训练步骤开始 tudui.train() for data in train_dataloader: imgs, targets = data outputs = tudui(imgs) loss = loss_fn(outputs, targets) # 优化器优化模型 optimizer.zero_grad() loss.backward() optimizer.step() total_train_step = total_train_step + 1 if total_train_step % 100 == 0: print("训练次数:{}, Loss: {}".format(total_train_step, loss.item())) writer.add_scalar("train_loss", loss.item(), total_train_step) # 测试步骤开始 tudui.eval() total_test_loss = 0 total_accuracy = 0 with torch.no_grad(): for data in test_dataloader: imgs, targets = data outputs = tudui(imgs) loss = loss_fn(outputs, targets) total_test_loss = total_test_loss + loss.item() accuracy = (outputs.argmax(1) == targets).sum() total_accuracy = total_accuracy + accuracy print("整体测试集上的Loss: {}".format(total_test_loss)) print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size)) writer.add_scalar("test_loss", total_test_loss, total_test_step) writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step) total_test_step = total_test_step + 1 torch.save(tudui.state_dict(), "tudui_{}.pth".format(i)) print("模型已保存") writer.close()
(重要)opencv PIL SummaryWriter读写图片的形状和顺序
from torch.utils.tensorboard import SummaryWriter from PIL import Image import numpy as np # 定义 writer = SummaryWriter("logs") # 写入标量 for i in range(100): writer.add_scalar("y = x ** 2 + 1", i ** 2 + 1, i) writer.add_scalar("y = x", i, i) # 写入图片(可以是特征图) writer.add_image("img", np.array(Image.open(r"hymenoptera\train\ants\0013035.jpg")), 1, dataformats="HWC") writer.add_image("img", np.array(Image.open(r"hymenoptera\train\ants\5650366_e22b7e1065.jpg")), 2, dataformats="HWC") # 关闭 writer.close() # 在Terminal中使用命令 tensorboard --logdir=??? [--port=???] 打开 # 例如默认端口6006 tensorboard --logdir=logs # 指定端口6007 tensorboard --logdir=logs --port=6007
# 从tensorboard文件中读取数据 from tensorboard.backend.event_processing import event_accumulator # 加载日志 参数为存储日志文件的文件夹 ea = event_accumulator.EventAccumulator("logs") ea.Reload() # 获取日志中所有 标量表 的title keys = ea.scalars.Keys() # 根据title获取 表量表 scalarsTable = ea.scalars.Items(keys[0]) # 打印前3条数据 print(scalarsTable[0:3]) # [ScalarEvent(wall_time=1653532538.0441616, step=0, value=1.0), # ScalarEvent(wall_time=1653532539.0454786, step=1, value=2.0), # ScalarEvent(wall_time=1653532540.0469015, step=2, value=5.0)] # 打印前3条数据 的step value for i in scalarsTable[0:3]: print(i.step, i.value) # 0 1.0 # 1 2.0 # 2 5.0
(重要)opencv PIL SummaryWriter读写图片的形状和顺序
from PIL import Image import numpy as np from torch.utils.tensorboard import SummaryWriter from torchvision import transforms writer = SummaryWriter("logs") # ToTensor # 将shape HWC range[0, 255] 的 numpy或PIL Image 转换为 shape CHW range[0.0, 1.0] 的 tensor img_path = "hymenoptera/train/ants/0013035.jpg" trans_totensor = transforms.ToTensor() img1 = Image.open(img_path) img_tensor1 = trans_totensor(img1) writer.add_image("ToTensor", img_tensor1, 1) img_tensor2 = trans_totensor(np.array(img1)) writer.add_image("ToTensor", img_tensor2, 2) # Normalize 不同的channel分别 减均值除以方差 trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) img_norm = trans_norm(img_tensor1) writer.add_image("Normalize", img_norm, 1) # Resize 改变图片形状 trans_resize = transforms.Resize((512, 512)) img_resize = trans_resize(img1) img_tensor = trans_totensor(img_resize) writer.add_image("Resize", img_tensor, 1) # Compose 组合多个Transforms trans_pose = transforms.Compose([transforms.Resize((512, 512)), transforms.ToTensor()]) img_tensor = trans_pose(img1) writer.add_image("Compose", img_tensor, 1) # RandomCrop 在图片上随机裁剪指定大小 trans_pose2 = transforms.Compose([transforms.RandomCrop((500, 1000)), transforms.ToTensor()]) for i in range(10): img_crop = trans_pose2(img1) writer.add_image("RandomCrop", img_crop, i) writer.close()
import torch from torch.utils.data import Dataset, DataLoader from PIL import Image import os from torchvision import transforms from torch.utils.tensorboard import SummaryWriter # 以hymenoptera数据集为例 ans标签0 bees标签1 class MyData(Dataset): def __init__(self, data_path, train=False, transform=None, target_transform=None): super(MyData, self).__init__() self.transform = transform self.target_transform = target_transform self.inputs = [] self.labels = [] if train: ants = os.listdir(data_path + "/train/ants") bees = os.listdir(data_path + "/train/bees") for i in ants: self.inputs.append(os.path.join(data_path, "train/ants", i)) self.labels.append(0) for i in bees: self.inputs.append(os.path.join(data_path, "train/bees", i)) self.labels.append(1) else: ants = os.listdir(data_path + "/val/ants") bees = os.listdir(data_path + "/val/bees") for i in ants: self.inputs.append(os.path.join(data_path, "val/ants", i)) self.labels.append(0) for i in bees: self.inputs.append(os.path.join(data_path, "val/bees", i)) self.labels.append(1) def __getitem__(self, idx): img_name = self.inputs[idx] label = self.labels[idx] img = Image.open(img_name) # 某些非jpg png例如jif的模式不是RGB,需转换为RGB if img.mode != "RGB": img = img.convert("RGB") if self.transform is not None: img = self.transform(img) if self.target_transform is not None: label = self.target_transform(label) return img, label def __len__(self): assert self.inputs.__len__() == self.labels.__len__() return len(self.inputs) dataset_transform = transforms.Compose([transforms.Resize((512, 512)), transforms.ToTensor()]) def one_hot(x, class_count=2): return torch.eye(class_count)[x, :] train_loader = DataLoader(dataset=MyData("hymenoptera", True, transform=dataset_transform, target_transform=one_hot), batch_size=64, shuffle=True, num_workers=0, drop_last=True) test_loader = DataLoader(dataset=MyData("hymenoptera", False, transform=dataset_transform, target_transform=one_hot), batch_size=64, shuffle=True, num_workers=0, drop_last=True) writer = SummaryWriter("logs") for i, data in enumerate(train_loader): inputs, labels = data writer.add_images("train", inputs, i) for i, data in enumerate(test_loader): inputs, labels = data writer.add_images("test", inputs, i) writer.close()
import torch.nn.functional from torch.utils.data import DataLoader from torchvision import transforms, datasets dataset_transform = transforms.Compose([transforms.ToTensor()]) def one_hot(x, class_count=10): return torch.eye(class_count)[x, :] test_set = datasets.CIFAR10(root="./dataset", train=False, transform=dataset_transform, target_transform=one_hot, download=True) # 数据集 batch_size 是否打乱顺序 工作线程数 抛弃最后一个不全的batch test_loader = DataLoader(dataset=test_set, batch_size=64, shuffle=True, num_workers=0, drop_last=True) for i in test_loader: pass next(iter(test_loader))
F.conv2d,需要自己定义和初始化weight和bias的Tensor传入
nn.Conv2d,只需要指定配置参数即可,故该种较为常用
import torch from torch.nn.parameter import Parameter from torch import nn from torch.nn import functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() # out_channels, in_channels, kH, kW self.weight = Parameter(torch.randn(6, 3, 5, 5)) # out_channels self.bias = Parameter(torch.zeros(6)) def forward(self, x): x = F.conv2d(x, self.weight, self.bias, stride=(2, 3), padding=(4, 6)) return x # minibatch, in_channels, iH, iW input = torch.randn(64, 3, 256, 256) net = Net() output = net(input) # torch.Size([64, 6, 130, 88]) print(output.shape) print(output)
import torch from torch.nn.parameter import Parameter from torch import nn from torch.nn import functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() # out_channels, in_channels, kH, kW self.weight = Parameter(torch.randn(6, 3, 5, 5)) # out_channels self.bias = Parameter(torch.zeros(6)) def forward(self, x): x = F.conv2d(x, self.weight, self.bias, stride=(2, 3), padding=(4, 6)) return x # minibatch, in_channels, iH, iW input = torch.randn(64, 3, 256, 256) net = Net() output = net(input) # torch.Size([64, 6, 130, 88]) print(output.shape) print(output)
from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.tensorboard import SummaryWriter class Tudui(nn.Module): def __init__(self): super(Tudui, self).__init__() self.model1 = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, x): x = self.model1(x) return x tudui = Tudui() input = torch.ones((64, 3, 32, 32)) output = tudui(input)
import torchvision from torch import nn # 仅加载模型,不加载参数 vgg16_false = torchvision.models.vgg16(pretrained=False) # 加载模型,同时加载预训练的参数,并且显示参数的下载进度 vgg16_true = torchvision.models.vgg16(pretrained=True, progress=True) # vgg16_false和vgg16_true除了一个为初始化参数,一个为预训练的参数,其他没有任何区别 # 打印模型结构 # print(vgg16_true) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): ReLU(inplace=True) # (5): Dropout(p=0.5, inplace=False) # (6): Linear(in_features=4096, out_features=1000, bias=True) # ) # ) # 1. 添加结构 # 1.1 最外层添加 线性层 # vgg16_true.add_module('add_linear', nn.Linear(1000, 10)) # print(vgg16_true) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): ReLU(inplace=True) # (5): Dropout(p=0.5, inplace=False) # (6): Linear(in_features=4096, out_features=1000, bias=True) # ) # (add_linear): Linear(in_features=1000, out_features=10, bias=True) # ) # 1.2 在(classifier)中添加 线性层 # vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10)) # print(vgg16_true) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): ReLU(inplace=True) # (5): Dropout(p=0.5, inplace=False) # (6): Linear(in_features=4096, out_features=1000, bias=True) # (add_linear): Linear(in_features=1000, out_features=10, bias=True) # ) # ) # 2. 修改结构 # vgg16_true.classifier[6] = nn.Linear(4096, 10) # print(vgg16_true) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): ReLU(inplace=True) # (5): Dropout(p=0.5, inplace=False) # (6): Linear(in_features=4096, out_features=10, bias=True) 该行发生了改变 # ) # ) # 3. 删除结构 # 将要删除的层改为Identity层(恒等变换,输入即输出,该层没有任何参数) # vgg16_true.classifier[4] = nn.Identity() # vgg16_true.classifier[5] = nn.Identity() # print(vgg16_true) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): Identity() # (5): Identity() # (6): Linear(in_features=4096, out_features=1000, bias=True) # ) # )
保存 模型结构+模型参数
陷阱:对于自己的所定义的模型,需要能够找到模型的定义(在load()所在文件中 或者 import 模型定义文件)
import torch
import torchvision
vgg16 = torchvision.models.vgg16(pretrained=False)
torch.save(vgg16, "vgg16_method1.pth")
import torch model = torch.load("vgg16_method1.pth") print(model) # VGG( # ... # (classifier): Sequential( # (0): Linear(in_features=25088, out_features=4096, bias=True) # (1): ReLU(inplace=True) # (2): Dropout(p=0.5, inplace=False) # (3): Linear(in_features=4096, out_features=4096, bias=True) # (4): ReLU(inplace=True) # (5): Dropout(p=0.5, inplace=False) # (6): Linear(in_features=4096, out_features=1000, bias=True) # ) # )
import torch from torch import nn class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3) def forward(self, x): x = self.conv1(x) return x net = Net() torch.save(net, "net_method1.pth")
import torch from torch import nn # model = torch.load("net_method1.pth") # AttributeError: Can't get attribute 'Net' on <module '__main__' from 'E:/Z-D盘桌面/Learn/A小土堆/Code/test2.py'> class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=3) def forward(self, x): x = self.conv1(x) return x model = torch.load("net_method1.pth") print(model) # Net( # (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1)) # )
# 以这种方式,无论保存时模型是在cpu/gpu上,我们均可以将其加载到cpu/gpu上 import torch from torch import nn class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) def forward(self, input): output = self.fc(input) return output data = torch.randn(64, 100) # 情况1:模型在cpu上保存,在cpu上加载,data在cpu上 # model = torch.load("MyNet_10_cpu.pth") # print(torch.argmax(model(data), dim=1)) # 情况2:模型在gpu上保存,在gpu上加载,data在cpu上,将data移入gpu # model = torch.load("MyNet_10_gpu.pth") # print(torch.argmax(model(data.cuda()), dim=1)) # 情况3:模型在gpu上保存,在gpu上加载,data在cpu上,将模型移入cpu # 方式1 # model = torch.load("MyNet_10_gpu.pth") # model = model.cpu() # print(torch.argmax(model(data), dim=1)) # 方式2 加载时映射到cpu # model = torch.load("MyNet_10_gpu.pth", map_location=torch.device('cpu')) # print(torch.argmax(model(data), dim=1))
推荐的原因是因为其保存的文件比较小
仅保存参数(以字典的方式,不保存模型)
import torch
import torchvision
vgg16 = torchvision.models.vgg16(pretrained=False)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")
import torch
import torchvision
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
# 以这种方式,无论保存时模型是在cpu/gpu上,我们均可以将其加载到cpu/gpu上 import torch from torch import nn class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) def forward(self, input): output = self.fc(input) return output data = torch.randn(64, 100) # model = MyNet() # model.load_state_dict(torch.load("MyNet_10_cpu.pth")) # print(torch.argmax(model(data), dim=1)) # model = MyNet() # model.load_state_dict(torch.load("MyNet_10_gpu.pth")) # print(torch.argmax(model(data), dim=1)) # model = MyNet().cuda() # model.load_state_dict(torch.load("MyNet_10_cpu.pth")) # print(torch.argmax(model(data.cuda()), dim=1)) # model = MyNet().cuda() # model.load_state_dict(torch.load("MyNet_10_gpu.pth")) # print(torch.argmax(model(data.cuda()), dim=1))
需要转移到cuda上的部分包括:网络模型;数据(输入,标注);
nn.CrossEntropyLoss()和F.cross_entropy(),因为不包含任何的参数,故不用移动到cuda
特别注意事项
import torch from torch import nn from torch.nn import Parameter # LayerNorm包含参数Parameter,但成员变量self.layerNorm将其参数和MyNet模型绑定在了一起 # 故随着myNet.cuda(),self.layerNorm的参数自动到cuda上 class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) self.layerNorm = nn.LayerNorm(10) def forward(self, input): output = self.fc(input) return self.layerNorm(output) # 如果不是成员变量,则LayerNorm的参数不会随着myNet.cuda()移动到cuda上,需要手动移动 class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) def forward(self, input): output = self.fc(input) return nn.LayerNorm(10).cuda()(output) # 普通的tensor,无论是否为成员变量均不可能和模型绑定在一起,故需要手动.cuda() # 对于Parameter类型,则可以通过成员变量绑定在模型上,不用手动.cuda() # 此处有一个特例,若tensor是一个标量则不用管 class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) def forward(self, input): w = torch.randn(64, 10) output = self.fc(input) return output * w.cuda() class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) self.w = torch.randn(64, 10).cuda() def forward(self, input): output = self.fc(input) return output * self.w class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) self.w = Parameter(torch.randn(64, 10)) def forward(self, input): output = self.fc(input) return output * self.w
转移到cuda上有两种方式
import torch from torch import nn class MyNet(nn.Module): def __init__(self): super(MyNet, self).__init__() self.fc = nn.Linear(100, 10) self.layerNorm = nn.LayerNorm(10) def forward(self, input): output = self.fc(input) return self.layerNorm(output) # 默认在cpu上 myNet = MyNet() # 方式1 if torch.cuda.is_available(): myNet = myNet.cuda() # 转移到cuda # 方式2 if torch.cuda.is_available(): myNet = myNet.to(device='cuda')
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。