赞
踩
pytorch模型(.pt文件)转tensorflow(.pb文件)
need:
transform model of pytorch1.x to tensorflow2.x,
deploy for tf-serving
目的: pytorch进行实验等, tf-serving部署最后的模型;
需求: pytorch1.x的模型(.pt文件)转tensorflow1.x/2.x的模型(.pb文件—savedmodel格式);
思路:
a.pytorch模型(.pt)先转化为ONNX模型(使用torch.onnx.export, 一些通用框架, 没有独有的网络架构);
b.ONNX模型转化为tensorflow模型的savedmodel形式(使用onnx_tf, 可能variables.data会很空, 但不影响部署);
python==3.8
tensorflow==2.8.0
tensorflow-addons==0.16.1
tensorflow-probability==0.16.0
keras==2.8.0
torch==1.8.0
transformers==4.15.0
onnx==1.8.1
onnx-tf==1.8.0
protobuf==3.19.2
1. configure address, eg. pretrained_model_name_or_path = "../ernie-tiny"
2. python t11_pytorch_to_onnx_to_tensorflow.py
Some weights of the model checkpoint at E:/DATA/bert-model/00_pytorch/ernie-tiny were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.weight'] - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model). - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model). pytorch-model-predict: [[0.5206347107887268, 0.5481777787208557, 0.7332082986831665, 0.5, 0.5749790668487549, 0.5696589946746826, 0.5643221139907837]] model_save_path\onnx\tc_model.onnx onnx-model-predict: [array([[0.52063483, 0.54817796, 0.73320824, 0.5 , 0.5749791 , 0.569659 , 0.564322 ]], dtype=float32)] 2022-12-12 09:39:15.005585: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX AVX2 To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. 2022-12-12 09:39:15.390812: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2497 MB memory: -> device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:01:00.0, compute capability: 7.5 2022-12-12 09:39:23.588536: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them. WARNING:absl:Found untraced functions such as gen_tensor_dict while saving (showing 1 of 1). These functions will not be directly callable after loading. tensorflow_model_predict: WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named "keras_metadata.pb" in the SavedModel directory. WARNING:tensorflow:SavedModel saved prior to TF 2.5 detected when loading Keras model. Please ensure that you are saving the model with model.save() or tf.keras.models.save_model(), *NOT* tf.saved_model.save(). To confirm, there should be a file named "keras_metadata.pb" in the SavedModel directory. ['serving_default'] {'output_0': TensorSpec(shape=(None, 7), dtype=tf.float32, name='output_0')} [<tf.Tensor: shape=(1, 7), dtype=float32, numpy= array([[0.52063483, 0.54817796, 0.73320824, 0.5 , 0.5749791 , 0.569659 , 0.56432205]], dtype=float32)>]
# !/usr/bin/python # -*- coding: utf-8 -*- # @time : 2022/12/7 21:00 # @author : Mo # @function: pytorch to onnx import json import os from transformers import BertConfig, BertTokenizer, BertModel from argparse import Namespace from torch import nn import numpy as np import torch class FCLayer(nn.Module): def __init__(self, input_dim, output_dim, dropout_rate=0.1, is_active=True, is_dropout=True, active_type="mish"): """ FC-Layer, mostly last output of model args: input_dim: input dimension, 输入维度, eg. 768 output_dim: output dimension, 输出维度, eg. 32 dropout_rate: dropout rate, 随机失活, eg. 0.1 is_dropout: use dropout or not, 是否使用随机失活dropout, eg. True is_active: use activation or not, 是否使用激活函数如tanh, eg. True active_type: type of activate function, 激活函数类型, eg. "tanh", "relu" Returns: Tensor of batch. """ super(FCLayer, self).__init__() self.linear = nn.Linear(input_dim, output_dim) self.dropout = nn.Dropout(dropout_rate) # probability of an element to be zeroed self.is_dropout = is_dropout self.active_type = active_type self.is_active = is_active self.softmax = nn.Softmax(1) self.sigmoid = nn.Sigmoid() self.relu = nn.ReLU(inplace=True) self.tanh = nn.Tanh() self.gelu = nn.GELU() def forward(self, x): if self.is_dropout: x = self.dropout(x) x = self.linear(x) if self.is_active: if self.active_type.upper() == "MISH": x = x * torch.tanh(nn.functional.softplus(x)) elif self.active_type.upper() == "SWISH": x = x * torch.sigmoid(x) elif self.active_type.upper() == "TANH": x = self.tanh(x) elif self.active_type.upper() == "GELU": x = self.gelu(x) elif self.active_type.upper() == "RELU": x = self.relu(x) else: x = self.relu(x) return x class TCGraph(nn.Module): def __init__(self, graph_config, tokenizer): # 预训练语言模型读取 self.graph_config = graph_config pretrained_config, pretrained_model = BertConfig, BertModel self.pretrained_config = pretrained_config.from_pretrained(graph_config.pretrained_model_name_or_path, output_hidden_states=graph_config.output_hidden_states) self.pretrained_config.update({"gradient_checkpointing": True}) super(TCGraph, self).__init__() if self.graph_config.is_train: self.pretrain_model = pretrained_model.from_pretrained(graph_config.pretrained_model_name_or_path, config=self.pretrained_config) self.pretrain_model.resize_token_embeddings(len(tokenizer)) else: self.pretrain_model = pretrained_model(self.pretrained_config) self.pretrain_model.resize_token_embeddings(len(tokenizer)) # 如果用隐藏层输出 if self.graph_config.output_hidden_states: self.dense = FCLayer( int(self.pretrained_config.hidden_size * len(self.graph_config.output_hidden_states)), self.graph_config.num_labels, is_dropout=self.graph_config.is_dropout, is_active=self.graph_config.is_active, active_type=self.graph_config.active_type) else: self.dense = FCLayer(self.pretrained_config.hidden_size, self.graph_config.num_labels, is_dropout=self.graph_config.is_dropout, is_active=self.graph_config.is_active, active_type=self.graph_config.active_type) # 损失函数, loss self.loss_bce = torch.nn.BCELoss() # 激活层/随即失活层 self.sigmoid = torch.nn.Sigmoid() def forward(self, input_ids, attention_mask, token_type_ids, labels=None): output = self.pretrain_model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) if self.graph_config.output_hidden_states: x = output[2] hidden_states_idx = [i for i in range(len(x))] # cls-concat cls = torch.cat([x[i][:, 0, :] for i in self.graph_config.output_hidden_states if i in hidden_states_idx], dim=-1) else: # CLS cls = output[0][:, 0, :] # CLS logits = self.dense(cls) # full-connect: FCLayer if labels is not None: # loss logits_sigmoid = self.sigmoid(logits) loss = self.loss_bce(logits_sigmoid.view(-1), labels.view(-1)) return loss, logits else: logits = self.sigmoid(logits) return logits def save_json(lines, path: str, encoding: str = "utf-8", indent: int = 4): """ Write Line of List<json> to file Args: lines: lines of list[str] which need save path: path of save file, such as "json.txt" encoding: type of encoding, such as "utf-8", "gbk" """ with open(path, "w", encoding=encoding) as fj: fj.write(json.dumps(lines, ensure_ascii=False, indent=indent)) fj.close() def t11_pytorch_model_to_onnx(): """ pytorch 模型 转 onnx 格式 """ model_save_path = "model_save_path" num_labels = 7 path_onnx = os.path.join(model_save_path, "onnx", "tc_model.onnx") path_onnx_dir = os.path.split(path_onnx)[0] if not os.path.exists(path_onnx_dir): os.makedirs(path_onnx_dir) model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path model_config["path_onnx"] = path_onnx model_config["num_labels"] = num_labels tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path) tc_config = Namespace(**model_config) tc_model = TCGraph(graph_config=tc_config, tokenizer=tokenizer) device = "cuda:{}".format(tc_config.CUDA_VISIBLE_DEVICES) if (torch.cuda.is_available() \ and tc_config.is_cuda and tc_config.CUDA_VISIBLE_DEVICES != "-1") else "cpu" # batch_data = [[[1, 2, 3, 4]*32]*32, [[1,0]*64]*32, [[0,1]*64]*32] text = "macropodus" tokens = tokenizer.encode_plus(text, max_length=128, truncation=True) batch_data = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()} tc_model.to(device) tc_model.eval() with torch.no_grad(): inputs = {"input_ids": torch.tensor(batch_data.get("input_ids")).to(device), "attention_mask": torch.tensor(batch_data.get("attention_mask")).to(device), "token_type_ids": torch.tensor(batch_data.get("token_type_ids")).to(device), } output = tc_model(**inputs) print("\npytorch-model-predict:") print(output.detach().cpu().numpy().tolist()) input_names = ["input_ids", "attention_mask", "token_type_ids"] output_names = ["outputs"] torch.onnx.export(model=tc_model, args=(inputs["input_ids"], inputs["attention_mask"], inputs["token_type_ids"]), f=path_onnx, input_names=input_names, output_names=output_names, # Be carefule to write this names opset_version=10, # 9, 10, 11, 12 do_constant_folding=True, use_external_data_format=True, dynamic_axes={ "input_ids": {0: "batch", 1: "sequence"}, "attention_mask": {0: "batch", 1: "sequence"}, "token_type_ids": {0: "batch", 1: "sequence"}, output_names[0]: {0: "batch"} } ) def t111_tet_onnx(): """ 测试onnx模型 """ from onnxruntime import ExecutionMode, InferenceSession, SessionOptions from transformers import BertTokenizer import numpy as np pretrained_model_name_or_path = model_config["pretrained_model_name_or_path"] path_onnx = model_config["path_onnx"] print(path_onnx) # Create the tokenizer, InferenceSession tokenizer = BertTokenizer.from_pretrained(pretrained_model_name_or_path) options = SessionOptions() options.intra_op_num_threads = 1 options.execution_mode = ExecutionMode.ORT_SEQUENTIAL sess = InferenceSession(path_onnx, options, providers=['CPUExecutionProvider'], # ['CUDAExecutionProvider'], # ) text = "macropodus" tokens = tokenizer.encode_plus(text, max_length=128, truncation=True) tokens = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()} output = sess.run(None, tokens) print("\nonnx-model-predict:") print(output) def t12_onnx_to_tensorflow(): """ onnx模型 转 tensorflow """ from onnx_tf.backend import prepare import onnx model_save_path = model_config["model_save_path"] path_tensorflow = os.path.join(model_save_path, "tensorflow") path_onnx = model_config["path_onnx"] model_config["path_tensorflow"] = path_tensorflow model_onnx = onnx.load(path_onnx) tf_rep = prepare(model_onnx, device="CPU") tf_rep.export_graph(path_tensorflow) def t121_tet_tensorflow(): """加载tensorflow模型测试""" from transformers import BertTokenizerFast import numpy as np import keras pretrained_model_name_or_path = model_config["pretrained_model_name_or_path"] path_tensorflow = model_config["path_tensorflow"] print("\ntensorflow_model_predict: ") new_model = keras.models.load_model(path_tensorflow) print(list(new_model.signatures.keys())) infer = new_model.signatures["serving_default"] print(infer.structured_outputs) text = "macropodus" tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path) tokens = tokenizer.encode_plus(text, max_length=128, truncation=True) tokens = {name: np.atleast_2d(value).astype(np.int64) for name, value in tokens.items()} output = new_model(**tokens) print(output) ee = 0 model_config = { "path_finetune": "", "path_onnx": "", "path_tensorflow": "", "CUDA_VISIBLE_DEVICES": "0", # 环境, GPU-CPU, "-1"/"0"/"1"/"2"... "USE_TORCH": "1", # transformers使用torch, 因为脚本是torch写的 "output_hidden_states": None, # [6,11] # 输出层, 即取第几层transformer的隐藏输出, list "pretrained_model_name_or_path": "", # 预训练模型地址 "model_save_path": "model_save_path", # 训练模型保存-训练完毕模型目录 "config_name": "tc.config", # 训练模型保存-超参数文件名 "model_name": "tc.model", # 训练模型保存-全量模型 "path_train": None, # 验证语料地址, 必传, string "path_dev": None, # 验证语料地址, 必传, 可为None "path_tet": None, # 验证语料地址, 必传, 可为None "task_type": "TC-MULTI-CLASS", # 任务类型, 依据数据类型自动更新, "TC-MULTI-CLASS", "TC-MULTI-LABEL", TC为text-classification的缩写 "model_type": "BERT", # 预训练模型类型, 如bert, roberta, ernie "loss_type": "BCE", # "BCE", # 损失函数类型, # multi-class: 可选 None(BCE), BCE, BCE_LOGITS, MSE, FOCAL_LOSS, DICE_LOSS, LABEL_SMOOTH, MIX; # multi-label: SOFT_MARGIN_LOSS, PRIOR_MARGIN_LOSS, FOCAL_LOSS, CIRCLE_LOSS, DICE_LOSS, MIX等 "batch_size": 32, # 批尺寸 "num_labels": 0, # 类别数, 自动更新 "max_len": 0, # 最大文本长度, -1则为自动获取覆盖0.95数据的文本长度, 0为取得最大文本长度作为maxlen "epochs": 21, # 训练轮次 "lr": 1e-5, # 学习率 "grad_accum_steps": 1, # 梯度积累多少步 "max_grad_norm": 1.0, # 最大标准化梯度 "weight_decay": 5e-4, # 模型参数l2权重 "dropout_rate": 0.1, # 随即失活概率 "adam_eps": 1e-8, # adam优化器超参 "seed": 2021, # 随机种子, 3407, 2021 "stop_epochs": 4, # 早停轮次 "evaluate_steps": 320, # 评估步数 "save_steps": 320, # 存储步数 "warmup_steps": -1, # 预热步数 "ignore_index": 0, # 忽略的index "max_steps": -1, # 最大步数, -1表示取满epochs "is_train": True, # 是否训练, 另外一个人不是(而是预测) "is_cuda": True, # 是否使用gpu, 另外一个不是gpu(而是cpu) "is_adv": False, # 是否使用对抗训练(默认FGM) "is_dropout": True, # 最后几层输出是否使用随即失活 "is_active": True, # 最后几层输出是否使用激活函数, 如FCLayer/SpanLayer层 "active_type": "RELU", # 最后几层输出使用的激活函数, 可填写RELU/SIGMOID/TANH/MISH/SWISH/GELU "save_best_mertics_key": ["micro_avg", "f1-score"], # 模型存储的判别指标, index-1可选: [micro_avg, macro_avg, weighted_avg], # index-2可选: [precision, recall, f1-score] "multi_label_threshold": 0.5, # 多标签分类时候生效, 大于该阈值则认为预测对的 "xy_keys": ["text", "label"], # text,label在file中对应的keys "label_sep": "|myz|", # "|myz|" 多标签数据分割符, 用于多标签分类语料中 "len_rate": 1, # 训练数据和验证数据占比, float, 0-1闭区间 "adv_emb_name": "word_embeddings.", # emb_name这个参数要换成你模型中embedding的参数名, model.embeddings.word_embeddings.weight "adv_eps": 1.0, # 梯度权重epsilon "ADDITIONAL_SPECIAL_TOKENS": ["[macropodus]", "[macadam]"], # 新增特殊字符 "prior": None, # 类别先验分布, 自动设置, 为一个label_num类别数个元素的list, json无法保存np.array "l2i": None, "i2l": None, "len_corpus": None, # 训练语料长度 "prior_count": None, # 每个类别样本频次 } if __name__ == '__main__': ee = 0 pretrained_model_name_or_path = "E:/DATA/bert-model/00_pytorch/ernie-tiny" # like bert model_config["pretrained_model_name_or_path"] = pretrained_model_name_or_path ### pytorch 模型 转 onnx 格式 t11_pytorch_model_to_onnx() ### 测试onnx模型 t111_tet_onnx() ### onnx模型 转 tensorflow t12_onnx_to_tensorflow() ### 测试saved_model模型 t121_tet_tensorflow() save_json(model_config, os.path.join(model_config["model_save_path"], "model_config.json"))
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。