赞
踩
目录:
1、基于crnn图像序列预测-pytorch代码实现——加载自己的数据集
2、基于crnn图像序列预测-pytorch代码实现——模型介绍
3、基于crnn图像序列预测-pytorch代码实现——训练过程及常见错误
在这里介绍卷积循环网络模型,包括简单的一层卷积层的conv1_LSTM,以及卷积部分迁移学习VGG和RESNET模型的VGG_LSTM,RESNET_LSTM。
关键在于层与层之间的size match,将卷积部分提取的特征作为LSTM输入的seq_len。需要注意的是,VGG网络特征层输出的尺寸都是512 * 7 * 7,所以LSTM的input_size=512 * 7 * 7
CONV_LSTM代码如下:
class ConvLSTM(nn.Module): def __init__(self, lstm_hidden_size=256, num_lstm_layers=1, bidirectional=True): super(ConvLSTM, self).__init__() self.num_directions = 2 if bidirectional else 1 self.num_lstm_layers = num_lstm_layers self.lstm_hidden_size = lstm_hidden_size # [B, 3, 224, 224] -> [B, 16, 112, 112] self.conv1 = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1, padding=1), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.conv2 = nn.Sequential(nn.Conv2d(16, 32, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2), ) #[B, 32, 56, 112] self.lstm1 = nn.LSTM(input_size=16 * 112 * 112, hidden_size=lstm_hidden_size, num_layers=num_lstm_layers, batch_first=True, dropout=0.5, bidirectional=True) # [B, 112, lstm_hidden_size] self.linear1 = nn.Sequential(nn.Linear(lstm_hidden_size * self.num_directions * num_lstm_layers, 64), nn.ReLU(inplace=True)) self.output_layer = nn.Linear(64, 3) def init_hidden(self, x): batch_size = x.size(0) h = x.data.new( self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_() c = x.data.new( self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_() return Variable(h).cuda(), Variable(c).cuda() def forward(self, x): # x shape: [B, 9, 3, 224, 224] B = x.size(0) x = x.view(B * 9, 3, 224, 224) output = self.conv1(x) # [B*9, 16, 112, 112] output = output.view(B * 9, -1).transpose(0, 1).contiguous().view(16 * 112 * 112, B, 9) output = output.permute(1, 2, 0) # -> [B, 9, 16*112*112] h, c = self.init_hidden(output) output, (h, c) = self.lstm1(output, (h, c)) # h: (num_layers * num_directions, batch, lstm_hidden_size) h = h.transpose_(0, 1).contiguous().view(B, -1) # -> [B, num_layers * num_directions*lstm_hidden_size] output = self.linear1(h) # [B, 64] output = self.output_layer(output) # [B, 3] return output
VGG_LSTM代码如下:
class VGG_LSTM(nn.Module): def __init__(self, lstm_hidden_size=256, num_lstm_layers=1, bidirectional=True): super(VGG_LSTM, self).__init__() net = models.vgg16(pretrained=True) net.classifier = nn.Sequential() self.num_directions = 2 if bidirectional else 1 self.num_lstm_layers = num_lstm_layers self.lstm_hidden_size = lstm_hidden_size # [B, 3, 224, 224] -> [B, 512, 7, 7] self.features = net self.lstm1 = nn.LSTM(input_size=512 * 7 * 7, hidden_size=lstm_hidden_size, num_layers=num_lstm_layers, batch_first=True, dropout=0.5, bidirectional=bidirectional) # [B, 7, lstm_hidden_size] self.linear1 = nn.Sequential(nn.Linear(lstm_hidden_size * self.num_directions * num_lstm_layers, 64), nn.ReLU(inplace=True)) self.output_layer = nn.Linear(64, 3) def init_hidden(self, x): batch_size = x.size(0) h = x.data.new( self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_() c = x.data.new( self.num_directions * self.num_lstm_layers, batch_size, self.lstm_hidden_size).zero_() return Variable(h).cuda(), Variable(c).cuda() def forward(self, x): # x shape: [B, 9, 3, 224, 224] B = x.size(0) x = x.view(B * 9, 3, 224, 224) output = self.features(x) # [B*9, 512, 7, 7] output = output.view(B * 9, -1).transpose(0, 1).contiguous().view(512 * 7 * 7, B, 9) output = output.permute(1, 2, 0) # -> [B, 9, 512*7*7] h, c = self.init_hidden(output) output, (h, c) = self.lstm1(output, (h, c)) # h: (num_layers * num_directions, batch, lstm_hidden_size) h = h.transpose_(0, 1).contiguous().view(B, -1) # -> [B, num_layers * num_directions*lstm_hidden_size] output = self.linear1(h) # [B, 64] output = self.output_layer(output) # [B, 3] return output
RESNET_LSTM等,就只需改下代码 net = models.resnet8(pretrained=True),同样pytorch已有的模型都能迁移过来。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。