制作自己的图片数据集（附代码）_收集图片形成数据集

作者：不正经 | 2024-03-18 03:21:02

踩

收集图片形成数据集

自从入坑深度学习，一直都是用现有的数据集进行训练网络，今天想自己制作自己的数据集，因此将收集到的图片进行数据集制作。

我的图片是人眼睛的图片，平均每个人有40张图片，命名是一个人的图片名称的前几个是一样的，后面是按顺序增加的，全部图片是在一个文件夹下，并有一个txt文档罗列出来全部的图片。
目标：将图片分成3大部分，按照7:2:1的比例，分别作为训练集、验证集以及测试集，并实现一个人一个文件夹，文件夹名称是此人图片名称中一样的部分。
话不多说，先上代码～好东西要大家分享！

实现将图片进行比例分割，并生成相应的txt，并提取出名称

import os
import numpy as np

file_path = r'E:\datasets\DataPrePro\12.04\box.txt'
save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'

all_person_labels = []
person_dict = {}
person_dict_train = []
person_dict_valid = []
person_dict_test = []
label_dict = {}

# 将所有数据按[序号：所有图片]的形式提取
# 生成人序号与标签对于的字典
with open(file_path, 'r') as fp:
    index = 0
    #label_dict = {}
    ori_lines = fp.readlines()
    for line in ori_lines:
        line = line.strip().split('\t')
        name = line[0].split('/')[-1]
        data = line[1:]
        label = name.split('_')[0]
        if label not in person_dict.keys():
            person_dict[label] = [name]
        else:
            person_dict[label].append(name)
        if label not in label_dict:
            label_dict[label] = index
            index += 1
    print(label_dict)
    print(label_dict.values())


#按序号取出该序号人的所有图片，并随机打乱顺序然后按7：2：1保持到训练、验证、测试数据集，最后保持。
with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step:
    for person in person_dict.keys():
        img_names = person_dict[person]
        Num_img = len(img_names)
        Num_train = int(Num_img * 0.7)
        Num_valid = int(Num_img * 0.2)
        Num_test = Num_img - Num_train - Num_valid
        img_names = np.array(img_names)
        np.random.shuffle(img_names)
        img_train = img_names[:Num_train]
        img_valid = img_names[Num_train:Num_train+Num_valid]
        img_test = img_names[Num_train+Num_valid:]
        for img_name in img_train:
            label = img_name.split(‘_’)[0]
            strp.write(img_name)
            strp.write(‘\t’)
            strp.write(label)
            strp.write(‘\n’)
        for img_name in img_valid:
            label = img_name.split(‘_’)[0]
            svp.write(img_name)
            svp.write(‘\t’)
            svp.write(label)
            svp.write(‘\n’)
        for img_name in img_test:
            label = img_name.split(‘_’)[0]
            step.write(img_name)
            step.write(‘\t’)
            step.write(label)
            step.write(‘\n’)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

下面的部分就添加了将图片按照txt文档中的情况移动到相应的文件夹中：

import os
import numpy as np
import shutil
file_path = r'E:\datasets\DataPrePro\12.04\box.txt'
path = r'E:\datasets\DataPrePro\12.04\imgs'
save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'

train_path = r'E:\datasets\DataPrePro\12.04\train'
valid_path = r'E:\datasets\DataPrePro\12.04\valid'
test_path = r'E:\datasets\DataPrePro\12.04\test'
all_person_labels = []
person_dict = {}
person_dict_train = []
person_dict_valid = []
person_dict_test = []


# 将所有数据按[序号：所有图片]的形式提取
# 生成人序号与标签对于的字典
with open(file_path, 'r') as fp:
    index = 0
    label_dict = {}
    ori_lines = fp.readlines()
    for line in ori_lines:
        line = line.strip().split('\t')
        name = line[0].split('/')[-1]
        data = line[1:]
        label = name.split('_')[0]
        if label not in person_dict.keys():
            person_dict[label] = [name]
        else:
            person_dict[label].append(name)
        if label not in label_dict:
            label_dict[label] = index
            index += 1
    print(label_dict)
    print(label_dict.values())


#按序号取出该序号人的所有图片，并随机打乱顺序然后按7：2：1保持到训练、验证、测试数据集，最后保持。
with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step:
    for person in person_dict.keys():
        img_names = person_dict[person]
        Num_img = len(img_names)
        Num_train = int(Num_img * 0.7)
        Num_valid = int(Num_img * 0.2)
        Num_test = Num_img - Num_train - Num_valid
        img_names = np.array(img_names)
        np.random.shuffle(img_names)
        img_train = img_names[:Num_train]
        img_valid = img_names[Num_train:Num_train+Num_valid]
        img_test = img_names[Num_train+Num_valid:]
        if not os.path.exists(train_path):
            print(“Create new folder:” + train_path)
            os.mkdir(train_path)
        for img_name in img_train:
            label = img_name.split(‘_’)[0]
            strp.write(img_name)
            strp.write(‘\t’)
            strp.write(label)
            strp.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, train_path + ‘/‘ + file)

        if not os.path.exists(valid_path):
            print(“Create new folder:” + valid_path)
            os.mkdir(valid_path)
        for img_name in img_valid:
            label = img_name.split(‘_’)[0]
            svp.write(img_name)
            svp.write(‘\t’)
            svp.write(label)
            svp.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, valid_path + ‘/‘ + file)

        if not os.path.exists(test_path):
            print(“Create new folder:” + test_path)
            os.mkdir(test_path)
        for img_name in img_test:
            label = img_name.split(‘_’)[0]
            step.write(img_name)
            step.write(‘\t’)
            step.write(label)
            step.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, test_path + ‘/‘ + file)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

为了实现将每个人的图片放到同一个文件夹下，代码如下：

import os
import shutil

train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'
path_01 = r'E:\datasets\DataPrePro\12.04\train'
path_02 = r'E:\datasets\DataPrePro\12.04\valid'
path_03 = r'E:\datasets\DataPrePro\12.04\test'

f = open(train_path, 'r')
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_01 + ‘/‘ + name):
        os.mkdir(path_01 + ‘/‘ + name)
    for file in os.listdir(path_01):
        if os.path.isfile(path_01 + ‘/‘ + file):
            if name in file:
                shutil.move(path_01 + ‘/‘ + file, path_01 + ‘/‘ + name + ‘/‘ + file)

f = open(valid_path, ‘r’)
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_02 + ‘/‘ + name):
        os.mkdir(path_02 + ‘/‘ + name)
    for file in os.listdir(path_02):
        if os.path.isfile(path_02 + ‘/‘ + file):
            if name in file:
                shutil.move(path_02 + ‘/‘ + file, path_02+ ‘/‘ + name + ‘/‘ + file)

f = open(test_path, ‘r’)
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_03 + ‘/‘ + name):
        os.mkdir(path_03 + ‘/‘ + name)
    for file in os.listdir(path_03):
        if os.path.isfile(path_03 + ‘/‘ + file):
            if name in file:
                shutil.move(path_03 + ‘/‘ + file, path_03 + ‘/‘ + name + ‘/‘ + file)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45

这是本人第一次制作数据集，如有不妥之处，希望大家多多指教，一起进步～

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/不正经/article/detail/260008