YOLO数据集txt格式标签转换代码

作者：神奇cpp | 2024-07-23 10:47:31

踩

YOLO数据集txt格式标签转换代码

如题，本文提供labelme标注的json文件和labelimg标注的xml文件转成yolo txt格式的标签文件，其他数据集标签处理问题，见如下博文：

作物计数方法汇总_作物计数+聚类-CSDN博客

作物计数方法之合并信息生成json标签的方法_fsc147-CSDN博客

FSC147数据集格式解析_fsc-147-CSDN博客

（1）json文件转yolo txt标签


import json
import os
 
def convert(img_size, box):
    x1_center = box[0] + (box[2]-box[0]) / 2.0
    y1_center = box[1] + (box[3]-box[1]) / 2.0
    
    w_1 = box[2] - box[0]
    h_1 = box[3] - box[1]
    
    x1_normal = x1_center / img_size[0]
    y1_normal = y1_center / img_size[1]
    
    w_1_normal = w_1 / img_size[0]
    y_1_normal = h_1 / img_size[1]
    
    return (x1_normal, y1_normal, w_1_normal, y_1_normal)
 
 
def decode_json(json_floder_path, json_name):
    txt_name = 'D:/TEST/label_yolo_test/txt/' + json_name[0:-5] + '.txt'   # 改为自己的txt标签存储路径
    txt_file = open(txt_name, 'w')
 
    json_path = os.path.join(json_floder_path, json_name)
    data = json.load(open(json_path, 'r', encoding='utf-8'))
 
    img_w = data['imageWidth']
    img_h = data['imageHeight']
 
    for i in data['shapes']:
 
        if (i['shape_type'] == 'rectangle' and i['label'] == 'cotton_flower'):  # 这里cotton_flower改成自己的标签类别
            x1 = float(i['points'][0][0])
            y1 = float(i['points'][0][1])
            x2 = float(i['points'][1][0])
            y2 = float(i['points'][1][1])
            print(x1)
            print(y1)
            print(x2)
            print(y2)
            print(img_w)
            print(img_h)
 
            bb = (x1, y1, x2, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write( '0' + " " + " ".join([str(i) for i in bbox]) + '\n')
 
        elif (i['shape_type'] == 'rectangle' and i['label'] == 'potato_flower'):  # 多类别标签就加判定条件
            x1 = float(i['points'][0][0])
            y1 = float(i['points'][0][1])
            x2 = float(i['points'][1][0])
            y2 = float(i['points'][1][1])
            print(x1)
            print(y1)
            print(x2)
            print(y2)
            print(img_w)
            print(img_h)
 
            bb = (x1, y1, x2, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write( '1' + " " + " ".join([str(i) for i in bbox]) + '\n')
 
 
if __name__ == "__main__":
 
    json_floder_path = 'D:/TEST/label_yolo_test/Json'  #改成自己的json文件存储路径
    json_names = os.listdir(json_floder_path)
    for json_name in json_names:
        decode_json(json_floder_path, json_name)

（2）xml文件转yolo txt标签


import os, shutil, random
from tqdm import tqdm
 
def split_img(img_path, label_path, split_list):
    try :   
        Data = r'Cotton_flower_dataset/flower'
        # Data是创建的文件夹路径 
        # os.mkdir(Data)   # 
        train_img_dir = Data + '/images/train'
        val_img_dir = Data + '/images/val'
        test_img_dir = Data + '/images/test'
 
        train_label_dir = Data + '/labels/train'
        val_label_dir = Data + '/labels/val'
        test_label_dir = Data + '/labels/test'
 
        # 创建文件夹
        os.makedirs(train_img_dir)
        os.makedirs(train_label_dir)
        os.makedirs(val_img_dir)
        os.makedirs(val_label_dir)
        os.makedirs(test_img_dir)
        os.makedirs(test_label_dir)
 
    except:
        print('文件目录已存在')
        
    train, val, test = split_list
    all_img = os.listdir(img_path)
    all_img_path = [os.path.join(img_path, img) for img in all_img]
    # all_label = os.listdir(label_path)
    # all_label_path = [os.path.join(label_path, label) for label in all_label]
    train_img = random.sample(all_img_path, int(train * len(all_img_path)))
    train_img_copy = [os.path.join(train_img_dir, img.split('/')[-1]) for img in train_img]
    # print(train_img)
    train_label = [toLabelPath(img, label_path) for img in train_img]
 
    train_label_copy = [os.path.join(train_label_dir, label.split('/')[-1]) for label in train_label]
    for i in tqdm(range(len(train_img)), desc='train ', ncols=80, unit='img'):
        _copy(train_img[i], train_img_dir)
        _copy(train_label[i], train_label_dir)
        all_img_path.remove(train_img[i])
    val_img = random.sample(all_img_path, int(val / (val + test) * len(all_img_path)))
    val_label = [toLabelPath(img, label_path) for img in val_img]
    for i in tqdm(range(len(val_img)), desc='val ', ncols=80, unit='img'):
        _copy(val_img[i], val_img_dir)
        _copy(val_label[i], val_label_dir)
        all_img_path.remove(val_img[i])
    test_img = all_img_path
    test_label = [toLabelPath(img, label_path) for img in test_img]
    for i in tqdm(range(len(test_img)), desc='test ', ncols=80, unit='img'):
        _copy(test_img[i], test_img_dir)
        _copy(test_label[i], test_label_dir)
 
 
def _copy(from_path, to_path):
    shutil.copy(from_path, to_path)
 
def toLabelPath(img_path, label_path):
    img = img_path.split('/')[-1]
    label = img.split('\\')[1].split('.jpg')[0] + '.txt'  # 注意路径问题，分割好
    # print(label, "***********")
    return os.path.join(label_path, label)
 
def main():
    img_path =  r"Cotton_flower_dataset/image"      # 图片存放的路径 
    label_path =  r"Cotton_flower_dataset/label_txt"    # txt文件存放的路径 
    split_list = [0.7, 0.2, 0.1]	# 数据集划分比例[train:val:test]
    split_img(img_path, label_path, split_list)
 
if __name__ == '__main__':
    main()

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/神奇cpp/article/detail/869506