当前位置:   article > 正文

Transformer(cpu)代码复现_transformers cpu

transformers cpu

逐行注释,逐行解析。可直接运行。
code from https://github.com/graykode/nlp-tutorial/tree/master/5-1.Transformer

  1. import numpy as np
  2. import torch
  3. import torch.nn as nn
  4. import math
  5. import time
  6. import torch.optim as optim
  7. from torch.utils.data import Dataset,DataLoader
  8. # 13. MyDataset
  9. class MyDataset(Dataset):
  10. # 读数据
  11. def __init__(self, enc_inputs, dec_inputs, target_batch):
  12. self.enc_inputs = enc_inputs
  13. self.dec_inputs = dec_inputs
  14. self.target_batch = target_batch
  15. # 返回数据长度(有几行数据)
  16. def __len__(self):
  17. return len(self.enc_inputs)
  18. # return self.enc_inputs.shape[0]
  19. # 返回相对位置上的元素,会比make_batch函数返回的tensor数据少一个维度
  20. def __getitem__(self, idx):
  21. return self.enc_inputs[idx], self.dec_inputs[idx], self.target_batch[idx]
  22. # 12. make_batch
  23. def make_batch(sentences):
  24. input_batch = [[src_vocab[n] for n in sentences[0].split()]] # [[1, 2, 3, 4, 0]]
  25. output_batch = [[tgt_vocab[n] for n in sentences[1].split()]] # [[5, 1, 2, 3, 4]]
  26. target_batch = [[tgt_vocab[n] for n in sentences[2].split()]] # [[1, 2, 3, 4, 6]]
  27. return torch.LongTensor(input_batch), torch.LongTensor(output_batch), torch.LongTensor(target_batch)
  28. # 11. get_attn_subsequent_mask
  29. def get_attn_subsequent_mask(seq):
  30. attn_shape = [seq.size(0), seq.size(1), seq.size(1)] # [1, 5, 5]
  31. subsequence_mask = np.triu(np.ones(attn_shape), k=1) # ndarray [1, 5, 5]
  32. # .byte() is equivalent to self.to(torch.uint8)
  33. subsequence_mask = torch.from_numpy(subsequence_mask).byte() # [1, 5, 5]
  34. return subsequence_mask
  35. # 10. DecoderLayer:包含三个部分,带Mask的多头自注意力层、交互注意力层、前馈神经网络
  36. class DecoderLayer(nn.Module):
  37. def __init__(self):
  38. super(DecoderLayer, self).__init__()
  39. self.dec_self_attn = MultiHeadAttention()
  40. self.dec_enc_attn = MultiHeadAttention()
  41. self.pos_fnn = PoswiseFeedForwardNet()
  42. # [1, 5, 512] [1, 5, 512] [1, 5, 5] [1, 5, 5]
  43. def forward(self, dec_inputs, enc_outputs, dec_self_attn_mask, dec_enc_attn_mask):
  44. # dec_self_attn===[1, 8, 5, 5] dec_outputs===[1, 5, 512]
  45. dec_outputs, dec_self_attn = self.dec_self_attn(dec_inputs, dec_inputs, dec_inputs, dec_self_attn_mask)
  46. # dec_enc_attn===[1, 8, 5, 5] dec_outputs===[1, 5, 512]
  47. dec_outputs, dec_enc_attn = self.dec_enc_attn(dec_outputs, enc_outputs, enc_outputs, dec_enc_attn_mask)
  48. dec_outputs = self.pos_fnn(dec_outputs) # [1, 5, 512]
  49. return dec_outputs, dec_self_attn, dec_enc_attn
  50. # 9. Decoder包含三个部分:词向量Embedding、位置编码部分、(带Masked自注意力层、交互注意力层、前馈神经网络)
  51. class Decoder(nn.Module):
  52. def __init__(self):
  53. super(Decoder, self).__init__()
  54. self.tgt_emb = nn.Embedding(tgt_vocab_size, d_model) # [7, 512]
  55. self.pos_emb = PositionalEncoding(d_model)
  56. self.layers = nn.ModuleList([DecoderLayer() for _ in range(n_layers)]) # 10.
  57. def forward(self, dec_inputs, enc_inputs, enc_outputs):
  58. dec_outputs = self.tgt_emb(dec_inputs) # [1, 5, 512]
  59. dec_outputs = self.pos_emb(dec_outputs.transpose(0, 1)).transpose(0, 1) # [1, 5, 512]--->[5, 1, 512]--->[1, 5, 512]
  60. #
    声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/352545
    推荐阅读
    相关标签