赞
踩
下列代码对原始数据集进行了处理,方便了对数据集的访问
import torch from torch.utils.data import Dataset from PIL import Image import os class MyData(Dataset): def __init__(self, root_dir, label_dir): # self 的作用是指定一个类里的全局变量 self.root_dir = root_dir self.label_dir = label_dir self.path = os.path.join(self.root_dir, self.label_dir) # join函数,连接路径 self.img_path = os.listdir(self.path) def __getitem__(self, idex): img_name = self.img_path[idex] img_item_path = os.path.join(self.root_dir, self.label_dir, img_name) img = Image.open(img_item_path) label = self.label_dir return img, label def __len__(self): return len(self.img_path) # 返回列表的长度 root_dir = "dataset/train" positive_label_dir = "positive" negative_label_dir = "negative" positive_dataset = MyData(root_dir, positive_label_dir) negative_dataset = MyData(root_dir, negative_label_dir) train_dataset = positive_dataset + negative_dataset ##数据集的拼接操作
上述情况,label是体现在文件夹的名称内,而使用最多的情况通常是这样的

positive_label文件夹内包含了与图片同名的txt文件,打开txt文件就是该图片的label,实现给图片创建对应txt的代码如下:
import os
root_dir = "dataset/train"
target_dir = "positive"
img_path = os.listdir(os.path.join(root_dir,target_dir))
label = target_dir
out_dir = "positive_label"
for i in img_path:
file_name = i.split(".png")[0]
with open(os.path.join(root_dir,out_dir,"{}.txt".format(file_name)),"w") as f:
f.write(label)
tensorboard我理解为一个看板,对结果进行可视化展示
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter("logs") #创建类的一个实例 将事件文件(图像等)存到logs文件夹下
# writer.add_image()
for i in range(100):
writer.add_scalar("y=0.5x",i,2*i) #第一个i是y轴,第二个i是x轴,改变第一个参数可以避免几个图杂揉在一起的情况
writer.close()
#打开事件文件
#在Terminal窗口,输入命令:tensorboard --logdir=logs
#修改主机接口:tensorboard --logdir=logs --port=6007
from torch.utils.tensorboard import SummaryWriter import numpy as np from PIL import Image writer = SummaryWriter("logs") #创建类的一个实例 将事件文件(图像等)存到logs文件夹下 image_path = "dataset/train/positive/1 (6).png" img_PIL = Image.open(image_path) img_array = np.array(img_PIL) #将PIL的文件格式转换为array格式,这样才能被add_image读取 #print(img_array.shape) array转化的图像通常是(H,W,C)格式的,因此要在add_image函数中说明 writer.add_image("test", img_array, 2, dataformats='HWC') #1是指global_step,这里是指第一步 #step可以展现出每一步的变化,非常炫酷 #for i in range(100): #writer.add_scalar("y=0.5x",i,2*i) #第一个i是y轴,第二个i是x轴,改变第一个参数可以避免几个图杂揉在一起的情况 writer.close() #打开事件文件 #在Terminal窗口,输入命令:tensorboard --logdir=logs #修改主机接口:tensorboard --logdir=logs --port=6007
transform可以理解为一个工具箱,里面有很多工具(class类)
from PIL import Image from torch.utils.tensorboard import SummaryWriter from torchvision import transforms import cv2 #transforms相当于一个工具箱,里面有很多工具 #pycharm的左侧有个”structure“,可以方便的看到源代码的结构,相当于工具箱的说明书,前面几个类是最常用的 ####python中的用法 ###通过transform.ToTensor 去解决两个问题 ###1.transform如何使用 ###2.tensor的数据类型相较于其他数据类型有啥区别 img_path = "dataset/train/positive/1 (1).png" img = Image.open(img_path) #这是PIL格式转化为tensor格式的方法 # print(img) <PIL.PngImagePlugin.PngImageFile image mode=RGBA size=96x160 at 0x17C1459D240> tensor_trans = transforms.ToTensor() #创建实例:tensor_trans tensor_img = tensor_trans(img) #将PIL格式的图片转化为tensor数据类型的图片 ###2.tensor数据类型内多了很多卷积神经网络需要的参数,这是其他数据类型不具备的特点 cv_img = cv2.imread(img_path) #数据类型是numpy array writer = SummaryWriter("logs") writer.add_image("Tensor_img", tensor_img, 1) writer.close()
下面介绍python中类的概念,关于__call__函数的用法
class Person:
def __call__(self, name):
print("__call__"+"hello"+" "+name)
def hello(self, name):
print("hello"+" "+name)
person = Person()
person("zyj") #这种调用方法是直接传入一个参数,默认就会传递给__call__函数
person.hello("zy") #这种调用方法需要用点后接需指定的函数
下面介绍了transforms中常见的类的用法
from torch.utils.tensorboard import SummaryWriter from torchvision import transforms from PIL import Image writer = SummaryWriter("logs") img = Image.open("dataset/train/positive/1 (14).jpg") #ToTensor的使用 trans_totensor = transforms.ToTensor() img_tensor = trans_totensor(img) writer.add_image("To_Tensor", img_tensor) #Normalize的使用(归一化) trans_norm = transforms.Normalize([6,5,0.5],[5,0.5,5]) ###第一个列表表示平均值 #,第二个列表表示标准差 ##归一化的过程: #计算公式:input(channel) = (input[channel] - mean[channel])/std[channel] #其中,mean和std是transforms.Normalize中需要传入的参数(用户自定义) #此处img_tensor的范围在[0,1],经过上述归一化后,范围变为了[-1,1] img_norm = trans_norm(img_tensor) writer.add_image("Normalize", img_norm, 1) ##Resize的使用 给定图片的尺寸。如果给两个数字,就会用长宽去匹配 #但是如果只给一个数字,就会用图片最小的边去匹配这个数字 print(img.size) trans_resize = transforms.Resize((512, 512)) img_resize = trans_resize(img_tensor) #print(img_resize) writer.add_image("Resize", img_resize) ##Randomcrop的用法 随机裁剪 trans_random = transforms.RandomCrop(512) trans_compose2 = transforms.Compose([trans_random, trans_totensor]) for i in range(10): img_crop = trans_compose2(img) writer.add_image("Randcrop", img_crop, i) writer.close()

#dataloader的作用是到dataset中取数据 import torchvision from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter test_data = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor()) test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False) #batch_size表示每次抓牌的张数;shuffle表示第一次抓取和第二次抓取牌的顺序是否需要打乱 #如果为true表示两次抓取顺序不一样;num_workers表示进程数量,默认为0表示利用主进程 #drop_last表示牌的总数除以batch_size,如果除不尽的话,剩余的牌是否需要舍弃 #测试数据集的第一张图片及target img, target = test_data[0] print(img.shape) print(target) writer = SummaryWriter("dataloader") step = 0 for data in test_loader: imgs, targets = data #print(imgs.shape)-->torch.Size([4, 3, 32, 32]) 四张图片,三个通道 #print(targets) -->tensor([0, 8, 6, 5]) writer.add_images("test_data", imgs, step) step = step + 1 writer.close()


1d,2d,3d表示纬度,图片是二维的

input需要有四个信息参数,(minibatch,in_channels,iH,iW)
因此需要利用torch.reshape更改尺寸
import torch import torch.nn.functional as F input = torch.tensor([[1, 2, 0, 3, 1], [0, 1, 2, 3, 1], [1, 2, 1, 0, 1], [5, 2, 3, 1, 1], [2, 1, 0, 1, 1]]) kernel = torch.tensor([[1,2,1], [0,1,0], [2,1,0]]) input = torch.reshape(input, (1,1,5,5)) kernel = torch.reshape(kernel, (1,1,3,3)) output = F.conv2d(input,kernel,stride=1) print(output) ''' 输出为四维是因为前面的reshape tensor([[[[10, 12, 12], [18, 16, 16], [13, 9, 4]]]]) ''' ##padding 对图像的边缘进行填充 output2 = F.conv2d(input,kernel,stride=1, padding=1)#默认填充0 print(output2) ''' tensor([[[[ 1, 3, 4, 10, 8], [ 5, 10, 12, 12, 7], [ 7, 18, 16, 16, 9], [11, 13, 9, 4, 6], [14, 13, 9, 7, 4]]]]) '''
上下两个区别是什么?


kernel_size的参数不需要给出,会自动调整
in_channel很好理解,就是图片的通道数
out_channel需要自己设置,当和in_channel的通道数不一样时,卷积核的个数会不一样
import torch import torchvision from torch import nn from torch.nn import Conv2d from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(), download=False) dataloader = DataLoader(dataset, batch_size=64) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0) def forward(self, x): x = self.conv1(x) return x zyj = ZYJ() print(zyj) ''' ZYJ( (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1)) )''' writer = SummaryWriter("juanji") step = 0 for data in dataloader: imgs, targets = data output = zyj(imgs) #print(imgs.shape) #->torch.Size([64, 3, 32, 32]) #print(output.shape) #->torch.Size([64, 6, 30, 30]) writer.add_images("input", imgs, step) ''' 此处经常报错 AssertionError: size of input tensor and input format are different. 解决办法是writer.add_image—>writer.add_images ''' #output的通道数是6个,无法可视化,这里使用了一个不严谨的方法,将output进行reshape output = torch.reshape(output, (-1, 3, 30, 30)) #这里的-1我们并不清楚 writer.add_images("output", output, step) step = step + 1 writer.close()


上图为vgg16架构
1->2经过了一次卷积+函数激活操作,前后尺寸不变,说明对padding进行了调试,input_channel = 3, output_channel = 64
具体调试(计算方法)如下图

1.理解池化


参数:

(stride的默认值是核的尺寸)

dilation 空洞卷积。如图

cell_mode可以选择Floor 和 Ceiling模式,默认为False(Floor)模式

计算公式

import torch from torch import nn from torch.nn import MaxPool2d input = torch.tensor([[1,2,0,3,1], [0,1,2,3,1], [1,2,1,0,0], [5,2,3,1,1], [2,1,0,1,1]], dtype=torch.float32) #注意修改数据类型,否则报错:"max_pool2d" not implemented for 'Long' input = torch.reshape(input, (-1,1,5,5)) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True) def forward(self, input): output = self.maxpool1(input) return output zyj = ZYJ() output = zyj(input) print(output)
ReLU函数

import torch from torch import nn from torch.nn import ReLU input = torch.tensor([[1, -0.5], [-1, 3]]) output = torch.reshape(input, (-1, 1, 2, 2)) print(output.shape) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.relu1 = ReLU() #inplace默认为False # 意思是不在原数据上改动 def forward(self, input): output = self.relu1(input) return output zyj = ZYJ() output = zyj(input) print(output)
以下为对图像处理
import torch import torchvision from torch import nn from torch.nn import ReLU, Sigmoid from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10('dataset', train=False, download=False, transform=torchvision.transforms.ToTensor()) dataloader = DataLoader(dataset, batch_size=64) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.relu1 = ReLU() #inplace默认为False # 意思是不在原数据上改动 self.sigmoid1 = Sigmoid() def forward(self, input): output = self.sigmoid1(input) return output zyj = ZYJ() writer = SummaryWriter("logs_sigmoid") step = 0 for data in dataloader: imgs, target = data writer.add_images("input", imgs, global_step=step) output = zyj(imgs) writer.add_images("output", output, global_step=step) step = step + 1 writer.close()


用得不多
用得不多
下图为线性层,对应的in_feature = d, out_feature = L ,bias = True


import torch import torchvision from torch import nn from torch.nn import Linear from torch.utils.data import DataLoader dataset = torchvision.datasets.CIFAR10("dataset", train=False, transform=torchvision.transforms.ToTensor() , download=False) dataloader = DataLoader(dataset, batch_size=64, drop_last=True) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.linear1 = Linear(196608,10) def forward(self,input): output = self.linear1(input) return output zyj = ZYJ() for data in dataloader: imgs, targets = data print(imgs.shape) output = torch.reshape(imgs, (1, 1, 1, -1)) #上行代码可以用 output = torch.flatten(imgs)替代 #效果一样,最后得到的数据维数不一样 print(output.shape) output = zyj(output) print(output.shape)
防止过拟合
自然语言处理使用得多
计算两个值的误差
…
网络结构搭建

import torch from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential from torch.utils.tensorboard import SummaryWriter class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.model = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 32, 5, padding=2), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, input): output = self.model(input) return output zyj = ZYJ() input = torch.ones(64, 3, 32, 32) output = zyj(input) print(output.shape) writer = SummaryWriter("logs_seq") writer.add_graph(zyj, input) writer.close()

import torch from torch import nn from torch.nn import L1Loss, MSELoss ####L1Loss inputs = torch.tensor([1, 2, 3],dtype=torch.float32) targets = torch.tensor([4, 2, 3],dtype=torch.float32) inputs = torch.reshape(inputs, (1,1,1,3)) targets = torch.reshape(targets, (1,1,1,3)) loss = L1Loss(reduction='sum') result = loss(inputs, targets) print(result) ##MSELoss loss_mse = MSELoss() result_mse = loss_mse(inputs, targets) print(result_mse) ##CrossEntropyLoss 交叉熵(原理没听懂) x = torch.tensor([0.1, 0.2, 0.3]) y = torch.tensor([1]) x = torch.reshape(x, (1, 3)) loss_cross = nn.CrossEntropyLoss() result_cross = loss_cross(x, y) print(result_cross)
下面搭建了一个神经网络
import torch import torchvision from torch import nn from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential, ReLU, Sigmoid from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter dataset = torchvision.datasets.CIFAR10(root="dataset", train=False, transform=torchvision.transforms.ToTensor() ,download=False) dataloader = DataLoader(dataset, batch_size=1) class ZYJ(nn.Module): def __init__(self): super(ZYJ, self).__init__() self.model = Sequential( Conv2d(3, 32, 5, padding=2), MaxPool2d(2), ReLU(), Conv2d(32, 32, 5, padding=2), Sigmoid(), MaxPool2d(2), Conv2d(32, 64, 5, padding=2), MaxPool2d(2), Flatten(), Linear(1024, 64), Linear(64, 10) ) def forward(self, input): output = self.model(input) return output zyj = ZYJ() loss = nn.CrossEntropyLoss() for data in dataloader: imgs, targets = data output = zyj(imgs) #print(output) #print(targets) result_loss = loss(output, targets) #print(result_loss) result_loss.backward() #反向传播获得梯度,后续用优化器进行优化参数


loss = nn.CrossEntropyLoss() optim = torch.optim.SGD(zyj.parameters(), lr=0.01) for epoch in range(50): running_loss = 0.0 for data in dataloader: imgs, targets = data output = zyj(imgs) #print(output) #print(targets) result_loss = loss(output, targets) optim.zero_grad() #将上一次的梯度数据删除 #print(result_loss) result_loss.backward() #反向传播获得梯度,后续用优化器进行优化参数 optim.step() #print(result_loss) running_loss = running_loss + result_loss print(running_loss)
pretrained为true时,就是带参数的

改模型两个思路:1.将最后一个线性层的out_feature改成10

2.添加一个线性层


方式一保存的是模型的结构+模型的参数
方式二保存的是模式的参数

载入方法 ,推荐第二种,第一种更简单
import torch from torch.utils.tensorboard import SummaryWriter from model_self import * import torchvision from torch.nn import Conv2d from torch.optim import SGD from torch.utils.data import DataLoader from torch import nn from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear # 准备数据及 train_data = torchvision.datasets.CIFAR10('./cifar10', True, transform=torchvision.transforms.ToTensor(),download=False) test_data = torchvision.datasets.CIFAR10('./cifar10', False, transform=torchvision.transforms.ToTensor(),download=False) # 求长度 train_data_size = len(train_data) test_data_size = len(test_data) print("训练数据及长度:{}".format(train_data_size)) print("测试数据集长度:{}".format(test_data_size)) # 加载数据及 train_dataloader = DataLoader(train_data, batch_size=64) test_dataloader = DataLoader(test_data, batch_size=64) # 搭建网络 # 创建网络模型 lyy = Lyy() # 创建损失函数 loss_fn = nn.CrossEntropyLoss() # 优化器 learning_rate = 1e-2 optimizer = torch.optim.SGD(lyy.parameters(),lr=learning_rate) # 设置训练网络参数 total_train_step = 0 total_test_step = 0 epoch = 10 # 添加tensorboard writer = SummaryWriter("logs") for i in range(epoch): print("-----第{}轮训练开始了-----".format(i+1)) # 训练步骤开始 for data in train_dataloader: imgs, tragets = data output = lyy(imgs) loss = loss_fn(output, tragets) optimizer.zero_grad() loss.backward() optimizer.step() total_train_step += 1 if total_train_step % 100 == 0: print("训练次数:{},Loss:{}".format(total_train_step, loss.item())) writer.add_scalar("train_loss", loss.item(), total_train_step) # 测试步骤开始 total_test_loss = 0 total_accuracy = 0 with torch.no_grad(): #将网络设置为no_grad模式 for data in test_dataloader: imgs, tragets = data output = lyy(imgs) loss = loss_fn(output, tragets) total_test_loss += loss accuracy = (output.argmax(1) -- tragets).sum() total_accuracy += accuracy print("整体测试机上误差:{}".format(total_test_loss)) print("整体测试机上的正确率:{}".format(total_accuracy/test_data_size)) writer.add_scalar("test_loss", total_test_loss, total_test_step) writer.add_scalar("test_accuracy", total_accuracy/total_test_step) total_test_step += 1 # torch.save(lyy, "lyy_{}.pth".format(i)) # print("模型已保存") writer.close()

第一种方法:

第二种方法:device(“cpu”)or device(“cuda”)

import torch import torchvision from PIL import Image from model import * img_path = "test_imgs/1.jpg" image = Image.open(img_path) print(image) image = image.convert('RGB') transform = torchvision.transforms.Compose([torchvision.transforms.Resize((32, 32)), torchvision.transforms.ToTensor()]) image = transform(image) print(image.shape) model = torch.load("model/tudui_9.pth", map_location=torch.device('cpu')) print(model) image = torch.reshape(image, (1, 3, 32, 32)) print(image.shape) model.eval() with torch.no_grad(): #image = image.cuda() output = model(image) print(output) print(output.argmax(1)) # 'airplane'=0 # 'automobile'=1 # 'brid'=2 # 'cat'=3 # 'deer'=4 # 'dog'=5 # 'frog'=6 # 'horse'=7 # 'ship'=8 # 'truck'=9
完结撒花(历时一个月,我可真够拖延的)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。