当前位置:   article > 正文

制作自己的图片数据集(附代码)_收集图片形成数据集

收集图片形成数据集
自从入坑深度学习,一直都是用现有的数据集进行训练网络,今天想自己制作自己的数据集,因此将收集到的图片进行数据集制作。

我的图片是人眼睛的图片,平均每个人有40张图片,命名是一个人的图片名称的前几个是一样的,后面是按顺序增加的,全部图片是在一个文件夹下,并有一个txt文档罗列出来全部的图片。
目标:将图片分成3大部分,按照7:2:1的比例,分别作为训练集、验证集以及测试集,并实现一个人一个文件夹,文件夹名称是此人图片名称中一样的部分。
话不多说,先上代码~ 好东西要大家分享!

实现将图片进行比例分割,并生成相应的txt,并提取出名称

import os
import numpy as np

file_path = r'E:\datasets\DataPrePro\12.04\box.txt'
save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'

all_person_labels = []
person_dict = {}
person_dict_train = []
person_dict_valid = []
person_dict_test = []
label_dict = {}

# 将所有数据按[序号:所有图片]的形式提取
# 生成人序号与标签对于的字典
with open(file_path, 'r') as fp:
    index = 0
    #label_dict = {}
    ori_lines = fp.readlines()
    for line in ori_lines:
        line = line.strip().split('\t')
        name = line[0].split('/')[-1]
        data = line[1:]
        label = name.split('_')[0]
        if label not in person_dict.keys():
            person_dict[label] = [name]
        else:
            person_dict[label].append(name)
        if label not in label_dict:
            label_dict[label] = index
            index += 1
    print(label_dict)
    print(label_dict.values())


#按序号取出该序号人的所有图片,并随机打乱顺序然后按7:2:1保持到训练、验证、测试数据集,最后保持。
with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step:
    for person in person_dict.keys():
        img_names = person_dict[person]
        Num_img = len(img_names)
        Num_train = int(Num_img * 0.7)
        Num_valid = int(Num_img * 0.2)
        Num_test = Num_img - Num_train - Num_valid
        img_names = np.array(img_names)
        np.random.shuffle(img_names)
        img_train = img_names[:Num_train]
        img_valid = img_names[Num_train:Num_train+Num_valid]
        img_test = img_names[Num_train+Num_valid:]
        for img_name in img_train:
            label = img_name.split(‘_’)[0]
            strp.write(img_name)
            strp.write(‘\t’)
            strp.write(label)
            strp.write(‘\n’)
        for img_name in img_valid:
            label = img_name.split(‘_’)[0]
            svp.write(img_name)
            svp.write(‘\t’)
            svp.write(label)
            svp.write(‘\n’)
        for img_name in img_test:
            label = img_name.split(‘_’)[0]
            step.write(img_name)
            step.write(‘\t’)
            step.write(label)
            step.write(‘\n’)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68

下面的部分就添加了将图片按照txt文档中的情况移动到相应的文件夹中:

import os
import numpy as np
import shutil
file_path = r'E:\datasets\DataPrePro\12.04\box.txt'
path = r'E:\datasets\DataPrePro\12.04\imgs'
save_train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
save_valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
save_test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'

train_path = r'E:\datasets\DataPrePro\12.04\train'
valid_path = r'E:\datasets\DataPrePro\12.04\valid'
test_path = r'E:\datasets\DataPrePro\12.04\test'
all_person_labels = []
person_dict = {}
person_dict_train = []
person_dict_valid = []
person_dict_test = []


# 将所有数据按[序号:所有图片]的形式提取
# 生成人序号与标签对于的字典
with open(file_path, 'r') as fp:
    index = 0
    label_dict = {}
    ori_lines = fp.readlines()
    for line in ori_lines:
        line = line.strip().split('\t')
        name = line[0].split('/')[-1]
        data = line[1:]
        label = name.split('_')[0]
        if label not in person_dict.keys():
            person_dict[label] = [name]
        else:
            person_dict[label].append(name)
        if label not in label_dict:
            label_dict[label] = index
            index += 1
    print(label_dict)
    print(label_dict.values())


#按序号取出该序号人的所有图片,并随机打乱顺序然后按7:2:1保持到训练、验证、测试数据集,最后保持。
with open(save_train_path, 'w') as strp, open(save_valid_path, 'w') as svp, open(save_test_path, 'w') as step:
    for person in person_dict.keys():
        img_names = person_dict[person]
        Num_img = len(img_names)
        Num_train = int(Num_img * 0.7)
        Num_valid = int(Num_img * 0.2)
        Num_test = Num_img - Num_train - Num_valid
        img_names = np.array(img_names)
        np.random.shuffle(img_names)
        img_train = img_names[:Num_train]
        img_valid = img_names[Num_train:Num_train+Num_valid]
        img_test = img_names[Num_train+Num_valid:]
        if not os.path.exists(train_path):
            print(“Create new folder:” + train_path)
            os.mkdir(train_path)
        for img_name in img_train:
            label = img_name.split(‘_’)[0]
            strp.write(img_name)
            strp.write(‘\t’)
            strp.write(label)
            strp.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, train_path + ‘/‘ + file)

        if not os.path.exists(valid_path):
            print(“Create new folder:” + valid_path)
            os.mkdir(valid_path)
        for img_name in img_valid:
            label = img_name.split(‘_’)[0]
            svp.write(img_name)
            svp.write(‘\t’)
            svp.write(label)
            svp.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, valid_path + ‘/‘ + file)

        if not os.path.exists(test_path):
            print(“Create new folder:” + test_path)
            os.mkdir(test_path)
        for img_name in img_test:
            label = img_name.split(‘_’)[0]
            step.write(img_name)
            step.write(‘\t’)
            step.write(label)
            step.write(‘\n’)
            for file in os.listdir(path):
                if os.path.isfile(path + ‘/‘ + file):
                    if img_name in file:
                        shutil.copy(path + ‘/‘ + file, test_path + ‘/‘ + file)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95

为了实现将每个人的图片放到同一个文件夹下,代码如下:

import os
import shutil

train_path = r'E:\datasets\DataPrePro\12.04\label_train.txt'
valid_path = r'E:\datasets\DataPrePro\12.04\label_valid.txt'
test_path = r'E:\datasets\DataPrePro\12.04\label_test.txt'
path_01 = r'E:\datasets\DataPrePro\12.04\train'
path_02 = r'E:\datasets\DataPrePro\12.04\valid'
path_03 = r'E:\datasets\DataPrePro\12.04\test'

f = open(train_path, 'r')
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_01 + ‘/‘ + name):
        os.mkdir(path_01 + ‘/‘ + name)
    for file in os.listdir(path_01):
        if os.path.isfile(path_01 + ‘/‘ + file):
            if name in file:
                shutil.move(path_01 + ‘/‘ + file, path_01 + ‘/‘ + name + ‘/‘ + file)

f = open(valid_path, ‘r’)
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_02 + ‘/‘ + name):
        os.mkdir(path_02 + ‘/‘ + name)
    for file in os.listdir(path_02):
        if os.path.isfile(path_02 + ‘/‘ + file):
            if name in file:
                shutil.move(path_02 + ‘/‘ + file, path_02+ ‘/‘ + name + ‘/‘ + file)

f = open(test_path, ‘r’)
lines = f.readlines()
for line in lines:
    line = line.strip().split(‘\t’)
    name = line[1]
    if not os.path.exists(path_03 + ‘/‘ + name):
        os.mkdir(path_03 + ‘/‘ + name)
    for file in os.listdir(path_03):
        if os.path.isfile(path_03 + ‘/‘ + file):
            if name in file:
                shutil.move(path_03 + ‘/‘ + file, path_03 + ‘/‘ + name + ‘/‘ + file)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45

这是本人第一次制作数据集,如有不妥之处,希望大家多多指教,一起进步~

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/260008
推荐阅读
相关标签
  

闽ICP备14008679号