赞
踩
在网上下载了60多幅包含西瓜和冬瓜的图像组成melon数据集,使用 EISeg 工具进行标注,然后使用 eiseg2yolov8 脚本将.json文件转换成YOLOv8支持的.txt文件,并自动生成YOLOv8支持的目录结构,包括melon.yaml文件,其内容如下:
- path: ../datasets/melon_seg # dataset root dir
- train: images/train # train images (relative to 'path')
- val: images/val # val images (relative to 'path')
- test: # test images (optional)
-
- # Classes
- names:
- 0: watermelon
- 1: wintermelon
对melon数据集进行训练的Python实现如下:最终生成的模型文件有best.pt、best.onnx、best.torchscript
- import argparse
- import colorama
- from ultralytics import YOLO
-
- def parse_args():
- parser = argparse.ArgumentParser(description="YOLOv8 train")
- parser.add_argument("--yaml", required=True, type=str, help="yaml file")
- parser.add_argument("--epochs", required=True, type=int, help="number of training")
- parser.add_argument("--task", required=True, type=str, choices=["detect", "segment"], help="specify what kind of task")
-
- args = parser.parse_args()
- return args
-
- def train(task, yaml, epochs):
- if task == "detect":
- model = YOLO("yolov8n.pt") # load a pretrained model
- elif task == "segment":
- model = YOLO("yolov8n-seg.pt") # load a pretrained model
- else:
- print(colorama.Fore.RED + "Error: unsupported task:", task)
- raise
-
- results = model.train(data=yaml, epochs=epochs, imgsz=640) # train the model
-
- metrics = model.val() # It'll automatically evaluate the data you trained, no arguments needed, dataset and settings remembered
-
- model.export(format="onnx") #, dynamic=True) # export the model, cannot specify dynamic=True, opencv does not support
- # model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
- model.export(format="torchscript") # libtorch
-
- if __name__ == "__main__":
- colorama.init()
- args = parse_args()
-
- train(args.task, args.yaml, args.epochs)
-
- print(colorama.Fore.GREEN + "====== execution completed ======")

以下是使用onnxruntime接口加载onnx文件进行实例分割的C++实现代码:
- namespace {
-
- constexpr bool cuda_enabled{ false };
- constexpr int input_size[2]{ 640, 640 }; // {height,width}, input shape (1, 3, 640, 640) BCHW and output shape(s): detect:(1,6,8400); segment:(1,38,8400),(1,32,160,160)
- constexpr float confidence_threshold{ 0.45 }; // confidence threshold
- constexpr float iou_threshold{ 0.50 }; // iou threshold
- constexpr float mask_threshold{ 0.50 }; // segment mask threshold
-
- #ifdef _MSC_VER
- constexpr char* onnx_file{ "../../../data/best.onnx" };
- constexpr char* torchscript_file{ "../../../data/best.torchscript" };
- constexpr char* images_dir{ "../../../data/images/predict" };
- constexpr char* result_dir{ "../../../data/result" };
- constexpr char* classes_file{ "../../../data/images/labels.txt" };
- #else
- constexpr char* onnx_file{ "data/best.onnx" };
- constexpr char* torchscript_file{ "data/best.torchscript" };
- constexpr char* images_dir{ "data/images/predict" };
- constexpr char* result_dir{ "data/result" };
- constexpr char* classes_file{ "data/images/labels.txt" };
- #endif
-
- std::vector<std::string> parse_classes_file(const char* name)
- {
- std::vector<std::string> classes;
-
- std::ifstream file(name);
- if (!file.is_open()) {
- std::cerr << "Error: fail to open classes file: " << name << std::endl;
- return classes;
- }
-
- std::string line;
- while (std::getline(file, line)) {
- auto pos = line.find_first_of(" ");
- classes.emplace_back(line.substr(0, pos));
- }
-
- file.close();
- return classes;
- }
-
- auto get_dir_images(const char* name)
- {
- std::map<std::string, std::string> images; // image name, image path + image name
-
- for (auto const& dir_entry : std::filesystem::directory_iterator(name)) {
- if (dir_entry.is_regular_file())
- images[dir_entry.path().filename().string()] = dir_entry.path().string();
- }
-
- return images;
- }
-
- std::wstring ctow(const char* str)
- {
- //std::wstring_convert<std::codecvt_utf8<wchar_t>>().from_bytes(std::string); // std::string -> std::wstring
- constexpr size_t len{ 128 };
- wchar_t wch[len];
- swprintf(wch, len, L"%hs", str);
-
- return std::wstring(wch);
- }
-
- float image_preprocess(const cv::Mat& src, cv::Mat& dst)
- {
- cv::cvtColor(src, dst, cv::COLOR_BGR2RGB);
-
- float scalex = src.cols * 1.f / input_size[1];
- float scaley = src.rows * 1.f / input_size[0];
-
- if (scalex > scaley)
- cv::resize(dst, dst, cv::Size(input_size[1], static_cast<int>(src.rows / scalex)));
- else
- cv::resize(dst, dst, cv::Size(static_cast<int>(src.cols / scaley), input_size[0]));
-
- cv::Mat tmp = cv::Mat::zeros(input_size[0], input_size[1], CV_8UC3);
- dst.copyTo(tmp(cv::Rect(0, 0, dst.cols, dst.rows)));
- dst = tmp;
-
- return (scalex > scaley) ? scalex : scaley;
- }
-
- template<typename T>
- void image_to_blob(const cv::Mat& src, T* blob)
- {
- for (auto c = 0; c < 3; ++c) {
- for (auto h = 0; h < src.rows; ++h) {
- for (auto w = 0; w < src.cols; ++w) {
- blob[c * src.rows * src.cols + h * src.cols + w] = (src.at<cv::Vec3b>(h, w)[c]) / 255.f;
- }
- }
- }
- }
-
- void get_masks(const cv::Mat& features, const cv::Mat& proto, const std::vector<int>& output1_sizes, const cv::Mat& frame, const cv::Rect box, cv::Mat& mk)
- {
- const cv::Size shape_src(frame.cols, frame.rows), shape_input(input_size[1], input_size[0]), shape_mask(output1_sizes[3], output1_sizes[2]);
-
- cv::Mat res = (features * proto).t();
- res = res.reshape(1, { shape_mask.height, shape_mask.width });
- // apply sigmoid to the mask
- cv::exp(-res, res);
- res = 1.0 / (1.0 + res);
- cv::resize(res, res, shape_input);
-
- float scalex = shape_src.width * 1.0 / shape_input.width;
- float scaley = shape_src.height * 1.0 / shape_input.height;
- cv::Mat tmp;
- if (scalex > scaley)
- cv::resize(res, tmp, cv::Size(shape_src.width, static_cast<int>(shape_input.height * scalex)));
- else
- cv::resize(res, tmp, cv::Size(static_cast<int>(shape_input.width * scaley), shape_src.height));
-
- cv::Mat dst = tmp(cv::Rect(0, 0, shape_src.width, shape_src.height));
- mk = dst(box) > mask_threshold;
- }
-
- void draw_boxes_mask(const std::vector<std::string>& classes, const std::vector<int>& ids, const std::vector<float>& confidences,
- const std::vector<cv::Rect>& boxes, const std::vector<cv::Mat>& masks, const std::string& name, cv::Mat& frame)
- {
- std::cout << "image name: " << name << ", number of detections: " << ids.size() << std::endl;
-
- std::random_device rd;
- std::mt19937 gen(rd());
- std::uniform_int_distribution<int> dis(100, 255);
- cv::Mat mk = frame.clone();
-
- std::vector<cv::Scalar> colors;
- for (auto i = 0; i < classes.size(); ++i)
- colors.emplace_back(cv::Scalar(dis(gen), dis(gen), dis(gen)));
-
- for (auto i = 0; i < ids.size(); ++i) {
- cv::rectangle(frame, boxes[i], colors[ids[i]], 2);
-
- std::string class_string = classes[ids[i]] + ' ' + std::to_string(confidences[i]).substr(0, 4);
- cv::Size text_size = cv::getTextSize(class_string, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
- cv::Rect text_box(boxes[i].x, boxes[i].y - 40, text_size.width + 10, text_size.height + 20);
-
- cv::rectangle(frame, text_box, colors[ids[i]], cv::FILLED);
- cv::putText(frame, class_string, cv::Point(boxes[i].x + 5, boxes[i].y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
-
- mk(boxes[i]).setTo(colors[ids[i]], masks[i]);
- }
-
- cv::addWeighted(frame, 0.5, mk, 0.5, 0, frame);
-
- //cv::imshow("Inference", frame);
- //cv::waitKey(-1);
-
- std::string path(result_dir);
- cv::imwrite(path + "/" + name, frame);
- }
-
- void post_process_mask(const cv::Mat& output0, const cv::Mat& output1, const std::vector<int>& output1_sizes, const std::vector<std::string>& classes, const std::string& name, cv::Mat& frame)
- {
- std::vector<int> class_ids;
- std::vector<float> confidences;
- std::vector<cv::Rect> boxes;
- std::vector<std::vector<float>> masks;
-
- float scalex = frame.cols * 1.f / input_size[1]; // note: image_preprocess function
- float scaley = frame.rows * 1.f / input_size[0];
- auto scale = (scalex > scaley) ? scalex : scaley;
-
- const float* data = (float*)output0.data;
- for (auto i = 0; i < output0.rows; ++i) {
- cv::Mat scores(1, classes.size(), CV_32FC1, (float*)data + 4);
- cv::Point class_id;
- double max_class_score;
-
- cv::minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
-
- if (max_class_score > confidence_threshold) {
- confidences.emplace_back(max_class_score);
- class_ids.emplace_back(class_id.x);
- masks.emplace_back(std::vector<float>(data + 4 + classes.size(), data + output0.cols)); // 32
-
- float x = data[0];
- float y = data[1];
- float w = data[2];
- float h = data[3];
-
- int left = std::max(0, std::min(int((x - 0.5 * w) * scale), frame.cols));
- int top = std::max(0, std::min(int((y - 0.5 * h) * scale), frame.rows));
- int width = std::max(0, std::min(int(w * scale), frame.cols - left));
- int height = std::max(0, std::min(int(h * scale), frame.rows - top));
- boxes.emplace_back(cv::Rect(left, top, width, height));
- }
-
- data += output0.cols;
- }
-
- std::vector<int> nms_result;
- cv::dnn::NMSBoxes(boxes, confidences, confidence_threshold, iou_threshold, nms_result);
-
- cv::Mat proto = output1.reshape(0, { output1_sizes[1], output1_sizes[2] * output1_sizes[3] });
-
- std::vector<int> ids;
- std::vector<float> confs;
- std::vector<cv::Rect> rects;
- std::vector<cv::Mat> mks;
- for (size_t i = 0; i < nms_result.size(); ++i) {
- auto index = nms_result[i];
- ids.emplace_back(class_ids[index]);
- confs.emplace_back(confidences[index]);
- boxes[index] = boxes[index] & cv::Rect(0, 0, frame.cols, frame.rows);
-
- cv::Mat mk;
- get_masks(cv::Mat(masks[index]).t(), proto, output1_sizes, frame, boxes[index], mk);
- mks.emplace_back(mk);
- rects.emplace_back(boxes[index]);
- }
-
- draw_boxes_mask(classes, ids, confs, rects, mks, name, frame);
- }
-
- } // namespace
-
- int test_yolov8_segment_onnxruntime()
- {
- try {
- Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo");
- Ort::SessionOptions session_option;
-
- if (cuda_enabled) {
- OrtCUDAProviderOptions cuda_option;
- cuda_option.device_id = 0;
- session_option.AppendExecutionProvider_CUDA(cuda_option);
- }
-
- session_option.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
- session_option.SetIntraOpNumThreads(1);
- session_option.SetLogSeverityLevel(3);
-
- Ort::Session session(env, ctow(onnx_file).c_str(), session_option);
- Ort::AllocatorWithDefaultOptions allocator;
- std::vector<const char*> input_node_names, output_node_names;
- std::vector<std::string> input_node_names_, output_node_names_;
-
- for (auto i = 0; i < session.GetInputCount(); ++i) {
- Ort::AllocatedStringPtr input_node_name = session.GetInputNameAllocated(i, allocator);
- input_node_names_.emplace_back(input_node_name.get());
- }
-
- for (auto i = 0; i < session.GetOutputCount(); ++i) {
- Ort::AllocatedStringPtr output_node_name = session.GetOutputNameAllocated(i, allocator);
- output_node_names_.emplace_back(output_node_name.get());
- }
-
- for (auto i = 0; i < input_node_names_.size(); ++i)
- input_node_names.emplace_back(input_node_names_[i].c_str());
- for (auto i = 0; i < output_node_names_.size(); ++i)
- output_node_names.emplace_back(output_node_names_[i].c_str());
-
- std::unique_ptr<float[]> blob(new float[input_size[0] * input_size[1] * 3]);
- std::vector<int64_t> input_node_dims{ 1, 3, input_size[1], input_size[0] };
-
- auto classes = parse_classes_file(classes_file);
- if (classes.size() == 0) {
- std::cerr << "Error: fail to parse classes file: " << classes_file << std::endl;
- return -1;
- }
-
- if (!std::filesystem::exists(result_dir)) {
- std::filesystem::create_directories(result_dir);
- }
-
- for (const auto& [key, val] : get_dir_images(images_dir)) {
- cv::Mat frame = cv::imread(val, cv::IMREAD_COLOR);
- if (frame.empty()) {
- std::cerr << "Warning: unable to load image: " << val << std::endl;
- continue;
- }
-
- auto tstart = std::chrono::high_resolution_clock::now();
- cv::Mat rgb;
- image_preprocess(frame, rgb);
- image_to_blob(rgb, blob.get());
- Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
- Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob.get(), 3 * input_size[1] * input_size[0], input_node_dims.data(), input_node_dims.size());
- auto output_tensors = session.Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, input_node_names.size(), output_node_names.data(), output_node_names.size());
- if (output_tensors.size() != 2) {
- std::cerr << "Error: output must have 2 layers: " << output_tensors.size() << std::endl;
- return -1;
- }
-
- // output0
- std::vector<int64_t> output0_node_dims = output_tensors[0].GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();
- auto output0 = output_tensors[0].GetTensorMutableData<float>();
- cv::Mat data0 = cv::Mat(output0_node_dims[1], output0_node_dims[2], CV_32F, output0);
- data0 = data0.t();
-
- // output1
- std::vector<int64_t> output1_node_dims = output_tensors[1].GetTypeInfo().GetTensorTypeAndShapeInfo().GetShape();
- auto output1 = output_tensors[1].GetTensorMutableData<float>();
- std::vector<int> sizes;
- for (auto val : output1_node_dims)
- sizes.emplace_back(val);
- cv::Mat data1 = cv::Mat(sizes, CV_32F, output1);
-
- auto tend = std::chrono::high_resolution_clock::now();
- std::cout << "elapsed millisenconds: " << std::chrono::duration_cast<std::chrono::milliseconds>(tend - tstart).count() << " ms" << std::endl;
-
- post_process_mask(data0, data1, sizes, classes, key, frame);
- }
- }
- catch (const std::exception& e) {
- std::cerr << "Error: " << e.what() << std::endl;
- return -1;
- }
-
- return 0;
- }

labels.txt文件内容如下:仅2类
- watermelon 0
- wintermelon 1
说明:
1.这里使用的onnxruntime版本为1.18.0;
2.windows下,onnxruntime库在debug和release为同一套库,在debug和release下均可执行;
3.通过指定变量cuda_enabled判断走cpu还是gpu流程 ;
4.windows下,onnxruntime中有些接口参数为wchar_t*,而linux下为char*,因此在windows下需要单独做转换,这里通过ctow函数实现从char*到wchar_t的转换。
执行结果如下图所示:同样的预测图像集,与opencv dnn结果相似,它们具有相同的后处理流程;下面显示的耗时是在cpu下,gpu下仅20毫秒左右
其中一幅图像的分割结果如下图所示:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。