赞
踩
我的图片是人眼睛的图片,平均每个人有40张图片,命名是一个人的图片名称的前几个是一样的,后面是按顺序增加的,全部图片是在一个文件夹下,并有一个txt文档罗列出来全部的图片。
目标:将图片分成3大部分,按照7:2:1的比例,分别作为训练集、验证集以及测试集,并实现一个人一个文件夹,文件夹名称是此人图片名称中一样的部分。
话不多说,先上代码~ 好东西要大家分享!
import os import numpy as np file_path = r'E:\datasets\DataPrePro\12.04\box.txt' save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt' save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt' save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt' all_person_labels = [] person_dict = {} person_dict_train = [] person_dict_valid = [] person_dict_test = [] label_dict = {} # 将所有数据按[序号:所有图片]的形式提取 # 生成人序号与标签对于的字典 with open(file_path, 'r') as fp: index = 0 #label_dict = {} ori_lines = fp.readlines() for line in ori_lines: line = line.strip().split('\t') name = line[0].split('/')[-1] data = line[1:] label = name.split('_')[0] if label not in person_dict.keys(): person_dict[label] = [name] else: person_dict[label].append(name) if label not in label_dict: label_dict[label] = index index += 1 print(label_dict) print(label_dict.values()) #按序号取出该序号人的所有图片,并随机打乱顺序然后按7:2:1保持到训练、验证、测试数据集,最后保持。 with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step: for person in person_dict.keys(): img_names = person_dict[person] Num_img = len(img_names) Num_train = int(Num_img * 0.7) Num_valid = int(Num_img * 0.2) Num_test = Num_img - Num_train - Num_valid img_names = np.array(img_names) np.random.shuffle(img_names) img_train = img_names[:Num_train] img_valid = img_names[Num_train:Num_train+Num_valid] img_test = img_names[Num_train+Num_valid:] for img_name in img_train: label = img_name.split(‘_’)[0] strp.write(img_name) strp.write(‘\t’) strp.write(label) strp.write(‘\n’) for img_name in img_valid: label = img_name.split(‘_’)[0] svp.write(img_name) svp.write(‘\t’) svp.write(label) svp.write(‘\n’) for img_name in img_test: label = img_name.split(‘_’)[0] step.write(img_name) step.write(‘\t’) step.write(label) step.write(‘\n’)
import os import numpy as np import shutil file_path = r'E:\datasets\DataPrePro\12.04\box.txt' path = r'E:\datasets\DataPrePro\12.04\imgs' save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt' save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt' save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt' train_path = r'E:\datasets\DataPrePro\12.04\train' valid_path = r'E:\datasets\DataPrePro\12.04\valid' test_path = r'E:\datasets\DataPrePro\12.04\test' all_person_labels = [] person_dict = {} person_dict_train = [] person_dict_valid = [] person_dict_test = [] # 将所有数据按[序号:所有图片]的形式提取 # 生成人序号与标签对于的字典 with open(file_path, 'r') as fp: index = 0 label_dict = {} ori_lines = fp.readlines() for line in ori_lines: line = line.strip().split('\t') name = line[0].split('/')[-1] data = line[1:] label = name.split('_')[0] if label not in person_dict.keys(): person_dict[label] = [name] else: person_dict[label].append(name) if label not in label_dict: label_dict[label] = index index += 1 print(label_dict) print(label_dict.values()) #按序号取出该序号人的所有图片,并随机打乱顺序然后按7:2:1保持到训练、验证、测试数据集,最后保持。 with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step: for person in person_dict.keys(): img_names = person_dict[person] Num_img = len(img_names) Num_train = int(Num_img * 0.7) Num_valid = int(Num_img * 0.2) Num_test = Num_img - Num_train - Num_valid img_names = np.array(img_names) np.random.shuffle(img_names) img_train = img_names[:Num_train] img_valid = img_names[Num_train:Num_train+Num_valid] img_test = img_names[Num_train+Num_valid:] if not os.path.exists(train_path): print(“Create new folder:” + train_path) os.mkdir(train_path) for img_name in img_train: label = img_name.split(‘_’)[0] strp.write(img_name) strp.write(‘\t’) strp.write(label) strp.write(‘\n’) for file in os.listdir(path): if os.path.isfile(path + ‘/‘ + file): if img_name in file: shutil.copy(path + ‘/‘ + file, train_path + ‘/‘ + file) if not os.path.exists(valid_path): print(“Create new folder:” + valid_path) os.mkdir(valid_path) for img_name in img_valid: label = img_name.split(‘_’)[0] svp.write(img_name) svp.write(‘\t’) svp.write(label) svp.write(‘\n’) for file in os.listdir(path): if os.path.isfile(path + ‘/‘ + file): if img_name in file: shutil.copy(path + ‘/‘ + file, valid_path + ‘/‘ + file) if not os.path.exists(test_path): print(“Create new folder:” + test_path) os.mkdir(test_path) for img_name in img_test: label = img_name.split(‘_’)[0] step.write(img_name) step.write(‘\t’) step.write(label) step.write(‘\n’) for file in os.listdir(path): if os.path.isfile(path + ‘/‘ + file): if img_name in file: shutil.copy(path + ‘/‘ + file, test_path + ‘/‘ + file)
import os import shutil train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt' valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt' test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt' path_01 = r'E:\datasets\DataPrePro\12.04\train' path_02 = r'E:\datasets\DataPrePro\12.04\valid' path_03 = r'E:\datasets\DataPrePro\12.04\test' f = open(train_path, 'r') lines = f.readlines() for line in lines: line = line.strip().split(‘\t’) name = line[1] if not os.path.exists(path_01 + ‘/‘ + name): os.mkdir(path_01 + ‘/‘ + name) for file in os.listdir(path_01): if os.path.isfile(path_01 + ‘/‘ + file): if name in file: shutil.move(path_01 + ‘/‘ + file, path_01 + ‘/‘ + name + ‘/‘ + file) f = open(valid_path, ‘r’) lines = f.readlines() for line in lines: line = line.strip().split(‘\t’) name = line[1] if not os.path.exists(path_02 + ‘/‘ + name): os.mkdir(path_02 + ‘/‘ + name) for file in os.listdir(path_02): if os.path.isfile(path_02 + ‘/‘ + file): if name in file: shutil.move(path_02 + ‘/‘ + file, path_02+ ‘/‘ + name + ‘/‘ + file) f = open(test_path, ‘r’) lines = f.readlines() for line in lines: line = line.strip().split(‘\t’) name = line[1] if not os.path.exists(path_03 + ‘/‘ + name): os.mkdir(path_03 + ‘/‘ + name) for file in os.listdir(path_03): if os.path.isfile(path_03 + ‘/‘ + file): if name in file: shutil.move(path_03 + ‘/‘ + file, path_03 + ‘/‘ + name + ‘/‘ + file)
这是本人第一次制作数据集,如有不妥之处,希望大家多多指教,一起进步~
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。