赞
踩
pytorch关于nlp的教程视频和实战:
链接:https://pan.baidu.com/s/1tbvLeUTPvxKy_gcVhiT8rw
提取码:hgkk
pytorch教程:
http://pytorchchina.com/
https://pytorch-cn.readthedocs.io/zh/latest/
http://pytorch123.com/
b站视频链接:https://www.bilibili.com/video/av66421076
import time import torch print(torch.__version__) print(torch.cuda.is_available) a = torch.randn(10000, 1000) b = torch.randn(1000, 2000) t0 = time.time() c = torch.matmul(a, b) t1 = time.time() print(a.device, t1 - t0, c.norm(2)) device = torch.device('cuda') a = a.to(device) b = b.to(device) # 第一次在cuda上运行时,没有完成一些环境的初始化,因此会花费一定的时间 t0 = time.time() c = torch.matmul(a, b) t2 = time.time() print(a.device, t2 - t0, c.norm(2)) # 第二次运行就是正常的gpu加速后的速度 t0 = time.time() c = torch.matmul(a, b) t2 = time.time() print(a.device, t2 - t0, c.norm(2)) console: 1.2.0 <function is_available at 0x0000014EA22A72F0> cpu 0.484722375869751 tensor(141163.7188) cuda:0 2.04582142829895 tensor(141564.8281, device='cuda:0') cuda:0 0.007996797561645508 tensor(141564.8281, device='cuda:0')
import torch x = torch.tensor(1.) # requires_grad=True 告诉pytorch需要对a,b,c求导 a = torch.tensor(1., requires_grad=True) b = torch.tensor(2., requires_grad=True) c = torch.tensor(3., requires_grad=True) y = a ** 2 * x + b * x + c print('before: ', a.grad, b.grad, c.grad) # 使用pytorch对y分别对a,b,c求导 grads = torch.autograd.grad(y, [a, b, c]) print('after: ', grads[0], grads[1], grads[2]) console: before: None None None after: tensor(2.) tensor(1.) tensor(1.)
Tensors 类似于 NumPy 的 ndarrays ,同时 Tensors 可以使用 GPU 进行计算。
# 其实这句函数之后,即使在低版本的python2.X,当使用print函数时,
# 须python3.X那样加括号使用。tips:python2.X中print不需要括号,
# 而在python3.X中则需要。
from __future__ import print_function
import torch
x = torch.empty(5, 3)
print(x)
console:
# 输出:
tensor(1.00000e-04 *
[[-0.0000, 0.0000, 1.5135],
[ 0.0000, 0.0000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[ 0.0000, 0.0000, 0.0000],
[ 0.0000, 0.0000, 0.0000]])
x = torch.rand(5, 3)
print(x)
console:
tensor([[ 0.6291, 0.2581, 0.6414],
[ 0.9739, 0.8243, 0.2276],
[ 0.4184, 0.1815, 0.5131],
[ 0.5533, 0.5440, 0.0718],
[ 0.2908, 0.1850, 0.5297]])
x = torch.zeros(5, 3, dtype=torch.long)
print(x)
console:
tensor([[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]])
x = torch.tensor([5.5, 3])
print(x)
console:
tensor([ 5.5000, 3.0000])
x = x.new_ones(5, 3, dtype=torch.double) # new_* methods take in sizes print(x) x = torch.randn_like(x, dtype=torch.float) # override dtype!重写数据类型 print(x) # result has the same size 最后的矩阵同样大小 console: tensor([[ 1., 1., 1.], [ 1., 1., 1.], [ 1., 1., 1.], [ 1., 1., 1.], [ 1., 1., 1.]], dtype=torch.float64) tensor([[-0.2183, 0.4477, -0.4053], [ 1.7353, -0.0048, 1.2177], [-1.1111, 1.0878, 0.9722], [-0.7771, -0.2174, 0.0412], [-2.1750, 1.3609, -0.3322]])
print(x.size())
console:
torch.Size([5, 3])
注意: torch.Size 是一个元组,所以它支持左右的元组操作。
y = torch.rand(5, 3)
print(x + y)
console:
tensor([[-0.1859, 1.3970, 0.5236],
[ 2.3854, 0.0707, 2.1970],
[-0.3587, 1.2359, 1.8951],
[-0.1189, -0.1376, 0.4647],
[-1.8968, 2.0164, 0.1092]])
print(torch.add(x, y))
console:
tensor([[-0.1859, 1.3970, 0.5236],
[ 2.3854, 0.0707, 2.1970],
[-0.3587, 1.2359, 1.8951],
[-0.1189, -0.1376, 0.4647],
[-1.8968, 2.0164, 0.1092]])
result = torch.empty(5, 3)
# 将输出结果赋值给result
torch.add(x, y, out=result)
print(result)
console:
tensor([[-0.1859, 1.3970, 0.5236],
[ 2.3854, 0.0707, 2.1970],
[-0.3587, 1.2359, 1.8951],
[-0.1189, -0.1376, 0.4647],
[-1.8968, 2.0164, 0.1092]])
# adds x to y
y.add_(x)
print(y)
console:
tensor([[-0.1859, 1.3970, 0.5236],
[ 2.3854, 0.0707, 2.1970],
[-0.3587, 1.2359, 1.8951],
[-0.1189, -0.1376, 0.4647],
[-1.8968, 2.0164, 0.1092]])
注意: 任何使张量会发生变化的操作都有一个前缀 ‘’。例如:x.copy(y), x.t_(), 将会改变 x.
print(x[:, 1])
console:
tensor([ 0.4477, -0.0048, 1.0878, -0.2174, 1.3609])
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8) # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())
console:
torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
x = torch.randn(1)
print(x)
print(x.item())
console:
tensor([ 0.9422])
0.9422121644020081
# 将torch的张量转换为numpy的数组 a = torch.ones(5) b = a.numpy() print(a) print(b) # 此处演示当修改numpy数组之后,与之相关联的tensor也会相应被修改 a.add_(1) print(a) print(b) console: tensor([1., 1., 1., 1., 1.]) [1. 1. 1. 1. 1.] tensor([2., 2., 2., 2., 2.]) [2. 2. 2. 2. 2.]
import numpy as np
import torch
a = np.ones(5)
b = torch.Tensor(a)
np.add(a, 1, out=a)
print(a)
print(b)
console:
[2. 2. 2. 2. 2.]
tensor([1., 1., 1., 1., 1.])
if torch.cuda.is_available(): x = torch.rand(5, 5) # 一个cuda设备对象 device = torch.device("cuda") # 直接而在gpu上创建tensor y = torch.ones_like(x, device=device) # 或者直接用字符串.to("cuda") x = x.to(device) z = x + y print(z) print(z.to("cpu", torch.double)) console: tensor([[1.4933, 1.9654, 1.8140, 1.1782, 1.9465], [1.4439, 1.9591, 1.0066, 1.6454, 1.2359], [1.2481, 1.5360, 1.9592, 1.3101, 1.6361], [1.0741, 1.6382, 1.2640, 1.9733, 1.7078], [1.8020, 1.4749, 1.4589, 1.8869, 1.2460]], device='cuda:0') tensor([[1.4933, 1.9654, 1.8140, 1.1782, 1.9465], [1.4439, 1.9591, 1.0066, 1.6454, 1.2359], [1.2481, 1.5360, 1.9592, 1.3101, 1.6361], [1.0741, 1.6382, 1.2640, 1.9733, 1.7078], [1.8020, 1.4749, 1.4589, 1.8869, 1.2460]], dtype=torch.float64)
x = torch.ones(2, 2, requires_grad=True) print(x) # 针对张量做一个操作 y = x + 2 print(y) # y 作为操作的结果被创建,所以它有 grad_fn print(y.grad_fn) # 针对张量做更多操作 z = y * y * 3 out = z.mean() print(z, out) console: tensor([[1., 1.], [1., 1.]], requires_grad=True) # 每个张量都有一个 .grad_fn 属性保存着创建了张量的 Function 的引用 tensor([[3., 3.], [3., 3.]], grad_fn=<AddBackward0>) <AddBackward0 object at 0x000001989F20D7B8> tensor([[27., 27.], [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)
import torch
a = torch.randn(2, 2)
a = ((a * .3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)
console:
False
True
<SumBackward0 object at 0x000001B498D8D7B8>
backward should be called only on a scalar (i.e, 1-element tensor) or with gradient w.r.t the variable
import torch x = torch.ones(2, 2, requires_grad=True) print(x) # 针对张量做一个操作 y = x + 2 print(y) # y 作为操作的结果被创建,所以它有 grad_fn print(y.grad_fn) # 针对张量做更多操作 z = y * y * 3 out = z.mean() print(z) print(out) out.backward() print(x.grad) console: tensor([[1., 1.], [1., 1.]], requires_grad=True) tensor([[3., 3.], [3., 3.]], grad_fn=<AddBackward0>) <AddBackward0 object at 0x00000210E6595080> tensor([[27., 27.], [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>) tensor([[4.5000, 4.5000], [4.5000, 4.5000]])
import torch # .requires_grad=True 的张量自动求导。 x = torch.randn(3, requires_grad=True) y = x * 2 while y.data.norm() < 1000: y = y * 2 print(x) print(y) # 现在在这种情况下,y 不再是一个标量。torch.autograd 不能够直接计算整个雅可比, # 但是如果我们只想要雅可比向量积,只需要简单的传递向量给 backward 作为参数。 v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float) y.backward(v) print(x.grad) # 你可以通过将代码包裹在 with torch.no_grad(),来停止对从跟踪历史中的 # .requires_grad=True 的张量自动求导。 print(x.requires_grad) print((x ** 2).requires_grad) with torch.no_grad(): print((x ** 2).requires_grad) console: tensor([ 0.2052, 0.6057, -0.6355], requires_grad=True) tensor([ 420.3014, 1240.4666, -1301.4670], grad_fn=<MulBackward0>) tensor([2.0480e+02, 2.0480e+03, 2.0480e-01]) True True False
神经网络可以通过 torch.nn 包来构建。
现在对于自动梯度(autograd)有一些了解,神经网络是基于自动梯度 (autograd)来定义一些模型。一个 nn.Module 包括层和一个方法 forward(input) 它会返回输出(output)。
例如,看一下数字图片识别的网络:
这是一个简单的前馈神经网络,它接收输入,让输入一个接着一个的通过一些层,最后给出输出。
一个典型的神经网络训练过程包括以下几点:
# -*- coding: utf-8 -*- """ Neural Networks =============== Neural networks can be constructed using the ``torch.nn`` package. Now that you had a glimpse of ``autograd``, ``nn`` depends on ``autograd`` to define models and differentiate them. An ``nn.Module`` contains layers, and a method ``forward(input)`` that returns the ``output``. For example, look at this network that classifies digit images: .. figure:: /_static/img/mnist.png :alt: convnet convnet It is a simple feed-forward network. It takes the input, feeds it through several layers one after the other, and then finally gives the output. A typical training procedure for a neural network is as follows: - Define the neural network that has some learnable parameters (or weights) - Iterate over a dataset of inputs - Process input through the network - Compute the loss (how far is the output from being correct) - Propagate gradients back into the network’s parameters - Update the weights of the network, typically using a simple update rule: ``weight = weight - learning_rate * gradient`` Define the network ------------------ Let’s define this network: """ import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() # 1 input image channel, 6 output channels, 5x5 square convolution # kernel self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) # an affine operation: y = Wx + b self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # If the size is a square you can only specify a single number x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s in size: num_features *= s return num_features net = Net() print(net) print('---------------------') ######################################################################## # You just have to define the ``forward`` function, and the ``backward`` # function (where gradients are computed) is automatically defined for you # using ``autograd``. # You can use any of the Tensor operations in the ``forward`` function. # # The learnable parameters of a model are returned by ``net.parameters()`` # 一个模型可训练的参数可以通过调用 net.parameters() 返回: params = list(net.parameters()) print(len(params)) print(params[0].size()) # conv1's .weight print('---------------------') ######################################################################## # Let try a random 32x32 input # Note: Expected input size to this net(LeNet) is 32x32. To use this net on # MNIST dataset, please resize the images from the dataset to 32x32. # 让我们尝试随机生成一个 32x32 的输入。注意:期望的输入维度是 32x32 。 # 为了使用这个网络在 MNIST 数据及上,你需要把数据集中的图片维度修改为 32x32。 input = torch.randn(1, 1, 32, 32) out = net(input) print(out) print('---------------------') ######################################################################## # Zero the gradient buffers of all parameters and backprops with random # gradients: # 把所有参数梯度缓存器置零,用随机的梯度来反向传播 net.zero_grad() out.backward(torch.randn(1, 10)) print('---------------------') ######################################################################## # .. note:: # # ``torch.nn`` only supports mini-batches. The entire ``torch.nn`` # package only supports inputs that are a mini-batch of samples, and not # a single sample. # # For example, ``nn.Conv2d`` will take in a 4D Tensor of # ``nSamples x nChannels x Height x Width``. # # If you have a single sample, just use ``input.unsqueeze(0)`` to add # a fake batch dimension. # # Before proceeding further, let's recap all the classes you’ve seen so far. # # **Recap:** # - ``torch.Tensor`` - A *multi-dimensional array* with support for autograd # operations like ``backward()``. Also *holds the gradient* w.r.t. the # tensor. # - ``nn.Module`` - Neural network module. *Convenient way of # encapsulating parameters*, with helpers for moving them to GPU, # exporting, loading, etc. # - ``nn.Parameter`` - A kind of Tensor, that is *automatically # registered as a parameter when assigned as an attribute to a* # ``Module``. # - ``autograd.Function`` - Implements *forward and backward definitions # of an autograd operation*. Every ``Tensor`` operation, creates at # least a single ``Function`` node, that connects to functions that # created a ``Tensor`` and *encodes its history*. # # **At this point, we covered:** # - Defining a neural network # - Processing inputs and calling backward # # **Still Left:** # - Computing the loss # - Updating the weights of the network # # Loss Function # ------------- # A loss function takes the (output, target) pair of inputs, and computes a # value that estimates how far away the output is from the target. # # There are several different # `loss functions <https://pytorch.org/docs/nn.html#loss-functions>`_ under the # nn package . # A simple loss is: ``nn.MSELoss`` which computes the mean-squared error # between the input and the target. # # For example: output = net(input) target = torch.randn(10) # a dummy target, for example target = target.view(1, -1) # make it the same shape as output criterion = nn.MSELoss() loss = criterion(output, target) print(loss) print('---------------------') ######################################################################## # Now, if you follow ``loss`` in the backward direction, using its # ``.grad_fn`` attribute, you will see a graph of computations that looks # like this: # # :: # # input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d # -> view -> linear -> relu -> linear -> relu -> linear # -> MSELoss # -> loss # # So, when we call ``loss.backward()``, the whole graph is differentiated # w.r.t. the loss, and all Tensors in the graph that has ``requires_grad=True`` # will have their ``.grad`` Tensor accumulated with the gradient. # # For illustration, let us follow a few steps backward: print(loss.grad_fn) # MSELoss print(loss.grad_fn.next_functions[0][0]) # Linear print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU print('---------------------') ######################################################################## # Backprop # -------- # To backpropagate the error all we have to do is to ``loss.backward()``. # You need to clear the existing gradients though, else gradients will be # accumulated to existing gradients. # # # Now we shall call ``loss.backward()``, and have a look at conv1's bias # gradients before and after the backward. net.zero_grad() # zeroes the gradient buffers of all parameters print('conv1.bias.grad before backward') print(net.conv1.bias.grad) loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad) print('---------------------') ######################################################################## # Now, we have seen how to use loss functions. # # **Read Later:** # # The neural network package contains various modules and loss functions # that form the building blocks of deep neural networks. A full list with # documentation is `here <https://pytorch.org/docs/nn>`_. # # **The only thing left to learn is:** # # - Updating the weights of the network # # Update the weights # ------------------ # The simplest update rule used in practice is the Stochastic Gradient # Descent (SGD): # # ``weight = weight - learning_rate * gradient`` # # We can implement this using simple python code: # # .. code:: python # # learning_rate = 0.01 # for f in net.parameters(): # f.data.sub_(f.grad.data * learning_rate) # # However, as you use neural networks, you want to use various different # update rules such as SGD, Nesterov-SGD, Adam, RMSProp, etc. # To enable this, we built a small package: ``torch.optim`` that # implements all these methods. Using it is very simple: import torch.optim as optim # create your optimizer optimizer = optim.SGD(net.parameters(), lr=0.01) # in your training loop: optimizer.zero_grad() # zero the gradient buffers output = net(input) loss = criterion(output, target) loss.backward() optimizer.step() # Does the update print('---------------------') ############################################################### # .. Note:: # # Observe how gradient buffers had to be manually set to zero using # ``optimizer.zero_grad()``. This is because gradients are accumulated # as explained in `Backprop`_ section. console: Net( (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) (fc1): Linear(in_features=400, out_features=120, bias=True) (fc2): Linear(in_features=120, out_features=84, bias=True) (fc3): Linear(in_features=84, out_features=10, bias=True) ) --------------------- 10 torch.Size([6, 1, 5, 5]) --------------------- tensor([[-0.0588, -0.0427, -0.1616, 0.0437, 0.0163, 0.0543, -0.1478, -0.0592, -0.0509, 0.0549]], grad_fn=<AddmmBackward>) --------------------- --------------------- tensor(0.4980, grad_fn=<MseLossBackward>) --------------------- <MseLossBackward object at 0x0000024FE0C6A8D0> <AddmmBackward object at 0x0000024F8313D4A8> <AccumulateGrad object at 0x0000024FE0C6A8D0> --------------------- conv1.bias.grad before backward tensor([0., 0., 0., 0., 0., 0.]) conv1.bias.grad after backward tensor([-5.8280e-03, 1.1338e-02, 1.7925e-03, -6.9680e-07, 9.8157e-03, 2.1737e-03]) --------------------- ---------------------
你刚定义了一个前馈函数,然后反向传播函数被自动通过 autograd 定义了。你可以使用任何张量操作在前馈函数上。
output = net(input)
target = torch.randn(10) # a dummy target, for example
target = target.view(1, -1) # make it the same shape as output
criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)
console:
tensor(0.6660, grad_fn=<MseLossBackward>)
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
-> view -> linear -> relu -> linear -> relu -> linear
-> MSELoss
-> loss
print(loss.grad_fn) # MSELoss
print(loss.grad_fn.next_functions[0][0]) # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
console:
<MseLossBackward object at 0x0000019183144C18>
<AddmmBackward object at 0x0000019183144D30>
<AccumulateGrad object at 0x0000019183144D30>
net.zero_grad() # zeroes the gradient buffers of all parameters print('conv1.bias.grad before backward') print(net.conv1.bias.grad) loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad) console: conv1.bias.grad before backward tensor([0., 0., 0., 0., 0., 0.]) conv1.bias.grad after backward tensor([-5.8280e-03, 1.1338e-02, 1.7925e-03, -6.9680e-07, 9.8157e-03, 2.1737e-03])
weight = weight - learning_rate * gradient
learning_rate = 0.01
for f in net.parameters():
f.data.sub_(f.grad.data * learning_rate)
import torch.optim as optim
# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)
# in your training loop:
optimizer.zero_grad() # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step() # Does the update
通常来说,当你处理图像,文本,语音或者视频数据时,你可以使用标准 python 包将数据加载成 numpy 数组格式,然后将这个数组转换成 torch.*Tensor
特别是对于视觉,我们已经创建了一个叫做 totchvision 的包,该包含有支持加载类似Imagenet,CIFAR10,MNIST 等公共数据集的数据加载模块 torchvision.datasets 和支持加载图像数据数据转换模块 torch.utils.data.DataLoader。
这提供了极大的便利,并且避免了编写“样板代码”。
对于本教程,我们将使用CIFAR10数据集,它包含十个类别:‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’。CIFAR-10 中的图像尺寸为33232,也就是RGB的3层颜色通道,每层通道内的尺寸为32*32。
# -*- coding: utf-8 -*- """ Training a Classifier ===================== This is it. You have seen how to define neural networks, compute loss and make updates to the weights of the network. Now you might be thinking, What about data? ---------------- Generally, when you have to deal with image, text, audio or video data, you can use standard python packages that load data into a numpy array. Then you can convert this array into a ``torch.*Tensor``. - For images, packages such as Pillow, OpenCV are useful - For audio, packages such as scipy and librosa - For text, either raw Python or Cython based loading, or NLTK and SpaCy are useful Specifically for vision, we have created a package called ``torchvision``, that has data loaders for common datasets such as Imagenet, CIFAR10, MNIST, etc. and data transformers for images, viz., ``torchvision.datasets`` and ``torch.utils.data.DataLoader``. This provides a huge convenience and avoids writing boilerplate code. For this tutorial, we will use the CIFAR10 dataset. It has the classes: ‘airplane’, ‘automobile’, ‘bird’, ‘cat’, ‘deer’, ‘dog’, ‘frog’, ‘horse’, ‘ship’, ‘truck’. The images in CIFAR-10 are of size 3x32x32, i.e. 3-channel color images of 32x32 pixels in size. .. figure:: /_static/img/cifar10.png :alt: cifar10 cifar10 Training an image classifier ---------------------------- We will do the following steps in order: 1. Load and normalizing the CIFAR10 training and test datasets using ``torchvision`` 2. Define a Convolutional Neural Network 3. Define a loss function 4. Train the network on the training data 5. Test the network on the test data 1. Loading and normalizing CIFAR10 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Using ``torchvision``, it’s extremely easy to load CIFAR10. """ import torch import torchvision import torchvision.transforms as transforms ######################################################################## # The output of torchvision datasets are PILImage images of range [0, 1]. # We transform them to Tensors of normalized range [-1, 1]. transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') ######################################################################## # Let us show some of the training images, for fun. import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): img = img / 2 + 0.5 # unnormalize npimg = img.numpy() plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() # show images imshow(torchvision.utils.make_grid(images)) # print labels print(' '.join('%5s' % classes[labels[j]] for j in range(4))) ######################################################################## # 2. Define a Convolutional Neural Network # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Copy the neural network from the Neural Networks section before and modify it to # take 3-channel images (instead of 1-channel images as it was defined). import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net() ######################################################################## # 3. Define a Loss function and optimizer # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Let's use a Classification Cross-Entropy loss and SGD with momentum. import torch.optim as optim criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) ######################################################################## # 4. Train the network # ^^^^^^^^^^^^^^^^^^^^ # # This is when things start to get interesting. # We simply have to loop over our data iterator, and feed the inputs to the # network and optimize. for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training') ######################################################################## # 5. Test the network on the test data # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # We have trained the network for 2 passes over the training dataset. # But we need to check if the network has learnt anything at all. # # We will check this by predicting the class label that the neural network # outputs, and checking it against the ground-truth. If the prediction is # correct, we add the sample to the list of correct predictions. # # Okay, first step. Let us display an image from the test set to get familiar. dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) ######################################################################## # Okay, now let us see what the neural network thinks these examples above are: outputs = net(images) ######################################################################## # The outputs are energies for the 10 classes. # Higher the energy for a class, the more the network # thinks that the image is of the particular class. # So, let's get the index of the highest energy: _, predicted = torch.max(outputs, 1) print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) ######################################################################## # The results seem pretty good. # # Let us look at how the network performs on the whole dataset. correct = 0 total = 0 with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: %d %%' % ( 100 * correct / total)) ######################################################################## # That looks waaay better than chance, which is 10% accuracy (randomly picking # a class out of 10 classes). # Seems like the network learnt something. # # Hmmm, what are the classes that performed well, and the classes that did # not perform well: class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % ( classes[i], 100 * class_correct[i] / class_total[i])) ######################################################################## # Okay, so what next? # # How do we run these neural networks on the GPU? # # Training on GPU # ---------------- # Just like how you transfer a Tensor on to the GPU, you transfer the neural # net onto the GPU. # # Let's first define our device as the first visible cuda device if we have # CUDA available: device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assume that we are on a CUDA machine, then this should print a CUDA device: print(device) ######################################################################## # The rest of this section assumes that `device` is a CUDA device. # # Then these methods will recursively go over all modules and convert their # parameters and buffers to CUDA tensors: # # .. code:: python # # net.to(device) # # # Remember that you will have to send the inputs and targets at every step # to the GPU too: # # .. code:: python # # inputs, labels = inputs.to(device), labels.to(device) # # Why dont I notice MASSIVE speedup compared to CPU? Because your network # is realllly small. # # **Exercise:** Try increasing the width of your network (argument 2 of # the first ``nn.Conv2d``, and argument 1 of the second ``nn.Conv2d`` – # they need to be the same number), see what kind of speedup you get. # # **Goals achieved**: # # - Understanding PyTorch's Tensor library and neural networks at a high level. # - Train a small neural network to classify images # # Training on multiple GPUs # ------------------------- # If you want to see even more MASSIVE speedup using all of your GPUs, # please check out :doc:`data_parallel_tutorial`. # # Where do I go next? # ------------------- # # - :doc:`Train neural nets to play video games </intermediate/reinforcement_q_learning>` # - `Train a state-of-the-art ResNet network on imagenet`_ # - `Train a face generator using Generative Adversarial Networks`_ # - `Train a word-level language model using Recurrent LSTM networks`_ # - `More examples`_ # - `More tutorials`_ # - `Discuss PyTorch on the Forums`_ # - `Chat with other users on Slack`_ # # .. _Train a state-of-the-art ResNet network on imagenet: https://github.com/pytorch/examples/tree/master/imagenet # .. _Train a face generator using Generative Adversarial Networks: https://github.com/pytorch/examples/tree/master/dcgan # .. _Train a word-level language model using Recurrent LSTM networks: https://github.com/pytorch/examples/tree/master/word_language_model # .. _More examples: https://github.com/pytorch/examples # .. _More tutorials: https://github.com/pytorch/tutorials # .. _Discuss PyTorch on the Forums: https://discuss.pytorch.org/ # .. _Chat with other users on Slack: https://pytorch.slack.com/messages/beginner/ console: Files already downloaded and verified Files already downloaded and verified plane cat deer horse [1, 2000] loss: 2.286 [1, 4000] loss: 1.921 [1, 6000] loss: 1.731 [1, 8000] loss: 1.616 [1, 10000] loss: 1.568 [1, 12000] loss: 1.484 [2, 2000] loss: 1.408 [2, 4000] loss: 1.382 [2, 6000] loss: 1.345 [2, 8000] loss: 1.322 [2, 10000] loss: 1.304 [2, 12000] loss: 1.271 Finished Training GroundTruth: cat ship ship plane Predicted: cat ship plane plane Accuracy of the network on the 10000 test images: 55 % Accuracy of plane : 63 % Accuracy of car : 50 % Accuracy of bird : 38 % Accuracy of cat : 41 % Accuracy of deer : 37 % Accuracy of dog : 41 % Accuracy of frog : 77 % Accuracy of horse : 63 % Accuracy of ship : 70 % Accuracy of truck : 71 % cuda:0
import torch import torchvision import torchvision.transforms as transforms transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
import matplotlib.pyplot as plt import numpy as np # functions to show an image def imshow(img): print(img) img = img / 2 + 0.5 # unnormalize npimg = img.numpy() print(npimg) plt.imshow(np.transpose(npimg, (1, 2, 0))) plt.show() # get some random training images dataiter = iter(trainloader) images, labels = dataiter.next() # show images imshow(torchvision.utils.make_grid(images,nrow=2, padding=1)) # print labels print(' '.join('%5s' % classes[labels[j]] for j in range(4))) console: plane dog car cat
import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(3, 6, 5) self.pool = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(6, 16, 5) self.fc1 = nn.Linear(16 * 5 * 5, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) def forward(self, x): x = self.pool(F.relu(self.conv1(x))) x = self.pool(F.relu(self.conv2(x))) x = x.view(-1, 16 * 5 * 5) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x net = Net()
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(2): # loop over the dataset multiple times running_loss = 0.0 for i, data in enumerate(trainloader, 0): # get the inputs inputs, labels = data # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0 print('Finished Training') console: [1, 2000] loss: 2.210 [1, 4000] loss: 1.856 [1, 6000] loss: 1.638 [1, 8000] loss: 1.578 [1, 10000] loss: 1.514 [1, 12000] loss: 1.471 [2, 2000] loss: 1.391 [2, 4000] loss: 1.380 [2, 6000] loss: 1.381 [2, 8000] loss: 1.333 [2, 10000] loss: 1.293 [2, 12000] loss: 1.299 Finished Training
dataiter = iter(testloader) images, labels = dataiter.next() # print images imshow(torchvision.utils.make_grid(images)) print('GroundTruth: ', ' '.join('%5s' % classes[labels[j]] for j in range(4))) #################################### outputs = net(images) _, predicted = torch.max(outputs, 1) print('Predicted: ', ' '.join('%5s' % classes[predicted[j]] for j in range(4))) console: GroundTruth: cat ship ship plane Predicted: cat ship plane plane
correct = 0
total = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (
100 * correct / total))
console:
Accuracy of the network on the 10000 test images: 55 %
class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in testloader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(4): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): print('Accuracy of %5s : %2d %%' % ( classes[i], 100 * class_correct[i] / class_total[i])) console: Accuracy of plane : 63 % Accuracy of car : 50 % Accuracy of bird : 38 % Accuracy of cat : 41 % Accuracy of deer : 37 % Accuracy of dog : 41 % Accuracy of frog : 77 % Accuracy of horse : 63 % Accuracy of ship : 70 % Accuracy of truck : 71 %
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Assume that we are on a CUDA machine, then this should print a CUDA device:
print(device)
net.to(device)
# 记住你也必须在每一个步骤向GPU发送输入和目标:
inputs, labels = inputs.to(device), labels.to(device)
console:
cuda:0
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。