当前位置:   article > 正文

PaddleOCR自制身份证数据集训练用于身份证识别

身份证数据集
仅学习交流

其中中文字体为方正黑体,身份证号字体为OCR-B 10 BT.ttf
add_txt()中 size字体大小 draw_x, draw_y坐标 根据自己图片设置

import os
import cv2
import random
import numpy as np
from tqdm import tqdm
from PIL import Image, ImageDraw, ImageFont


def mkdir(path):
    if not os.path.exists(path):
        os.makedirs(path)


class Person(object):
    def __init__(self, name, sex, national, years, month, day, address1, address2, id_card, public):
        self.name = str(name), 
        self.sex = str(sex), 
        self.national = str(national),
        self.years = str(years), 
        self.month = str(month), 
        self.day = str(day), 
        self.address1 = str(address1),
        self.address2 = str(address2),
        self.id_card = str(id_card),
        self.public = str(public)
        
        
def first_name():    
    """
    功能 : 随机姓氏  百家姓
    """
    with open(ROOT + 'first_name.txt', 'r', encoding='utf-8') as f:
        first_name_list = [line.rstrip('\n') for line in f]
        total_lines = len(first_name_list)
        idx = random.randint(0, total_lines - 1)
    
    return first_name_list[idx]


def name():
    """
    功能 : 随机汉字txt   参考PaddleOCR自带的识别中文txt
    """
    with open(ROOT + 'name.txt', 'r', encoding='utf-8') as f:
        name_list = [line.rstrip('\n') for line in f]
        total_lines = len(name_list)
        idx = random.randint(0, total_lines - 1)

    return name_list[idx]


def GBK2312():   
    """
    功能 : 随机生成一个汉字GBK   所有字都随机可使用这一函数
    """
    head = random.randint(0xb0, 0xf7)
    body = random.randint(0xa1, 0xf9)  # 在head区号为55的那一块最后5个汉字是乱码,为了方便缩减下范围
    val = f'{head:x}{body:x}'
    st = bytes.fromhex(val).decode('gb2312')
    return st


def second_name():
    """
    功能 : 随机取数组中字符, 取到空字符则没有second_name    '\u00B7'为新疆名字中间的·
    """
    second_name_list = [name(), '', '\u00B7']
    _rand = random.randint(0, 30)
    if (_rand < 15):
        n = 0
    elif (_rand < 29):
        n = 1
    else:
        n = 2    

    return second_name_list[n]


def last_name():
    """
    功能 : 随机生成名字最后一位字
    """
    return name()


def create_name():
    """
    功能 : 随机生成名字
    """
    name = first_name() + second_name() + last_name()
    return name


def sex_word():
    """
    功能 : 随机生成性别
    """
    nums = random.randint(0, 1)
    sex_list = ['男', '女']
    
    return sex_list[nums]


def national_name():
    """
    功能 : 随机生成民族
    """
    with open(ROOT + 'nation.txt', 'r', encoding='utf-8') as f:
        national_list = [line.rstrip('\n') for line in f]
        total_lines = len(national_list)
        idx = random.randint(0, total_lines - 1)
    
    return national_list[idx]


def address_line1():  
    """
    功能 : 随机生成省,市,地址   可随机 可自制省市address.txt
    """
    address_list = ''
    with open(ROOT + 'address.txt', 'r', encoding='utf-8') as f:
        address = [line.rstrip('\n') for line in f]
        idx = random.randint(0, 327)
        address_list = address[idx]
        a = 11 - len(address_list)     # 地址第一行11个字
        for _ in range(a):
            address_list += name()
            
    return address_list


def address_line2():  
    """
    功能 : 随机生成地址
    """
    address_list = ''
    _rand = random.randint(5, 7)
    for _ in range(_rand):
        address_list += name()
 
    return address_list


def random_id_card():
    """
    功能 : 随机生成18位身份证ID
    """
    num_str = ''
    for _ in range(17): #  
        num_str += str(random.randint(0, 9))
        
    _rand = random.randint(0, 10)
    if _rand == 10:
        num_str += 'X'
    else:
        num_str += str(random.randint(0, 9))
    
    return num_str


def public():
    """
    功能 : 国徽面随机生成机关
    """
    public_list = ''
    _rand = random.randint(3, 6)
    for _ in range(_rand):
        public_list += name()
        
    public_list += '公安局'
    
    return public_list


def to_str(per):
    """
    功能 : 将元祖转为str
    """
    _str =''.join(per)
    
    return _str


# 定义写字函数
def add_txt(image, size, draw_x, draw_y, txt, Font='方正黑体.ttf'):

    setFont = ImageFont.truetype(ROOT + 'IDTemplate/%s' % Font, size)
    draw = ImageDraw.Draw(image)
    draw.text((draw_x, draw_y), txt, font=setFont, fill=(0, 0, 0))
    
    return image
 

def draw_txt(ori_image, img):
    
    mask_image_txt = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(mask_image_txt, cv2.COLOR_BGR2GRAY)
    # 高斯模糊,制造边缘模糊效果哦
    gray_Gaussianblur = cv2.GaussianBlur(gray, (3, 3), 0)
    # 使用阈值对图片进行二值化
    _, res = cv2.threshold(gray_Gaussianblur, 200, 255, cv2.THRESH_BINARY)
    res_inv = cv2.bitwise_not(res)
    # 写字的模板保留文字部分
    img_bg = cv2.bitwise_and(mask_image_txt, mask_image_txt, mask=res_inv)
    # 原图保留除文字的其他部分
    img_fg = cv2.bitwise_and(ori_image, ori_image, mask=res)
    # 将两张图直接进行相加,即可
    final = cv2.add(img_bg, img_fg)
    
    return final

# 照片面 
def make_maskA(person, nums, template_path, output_path):
    # 生成一个空白的模板mask
    ori_image = cv2.imread('%s/IDA{}.jpg'.format(random.randint(1, 2)) % template_path)
    mask_image = np.ones_like(ori_image)
    mask_image *= 255
    # print(mask_image.shape,' {}.jpg'.format(nums))
    
    # 往空白模板上写字(只能用PIL写,OpenCV写中文会乱码)
    img = Image.fromarray(cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB))
    img = add_txt(img, 58, 210, 95, to_str(person.name))
    img = add_txt(img, 50, 210, 205, to_str(person.sex))
    img = add_txt(img, 50, 490, 205, to_str(person.national))
    img = add_txt(img, 50, 210, 308, to_str(person.years))
    
    if (int(person.month[0]) > 9):
        img = add_txt(img, 50, 410, 308, to_str(person.month))
    else:
        img = add_txt(img, 50, 425, 308, to_str(person.month))
        
    if (int(person.day[0]) > 9):
        img = add_txt(img, 50, 535, 308, to_str(person.day))
    else:
        img = add_txt(img, 52, 550, 308, to_str(person.day))
        
    img = add_txt(img, 50, 210, 415, to_str(person.address1))
    img = add_txt(img, 50, 210, 485, to_str(person.address2))
    img = add_txt(img, 58, 420, 682, to_str(person.id_card), 'OCR-B 10 BT.ttf')
 
    final = draw_txt(ori_image, img)
    cv2.imwrite('%s/A_{}.jpg'.format(nums) % output_path, final)
    
# 国徽面
def make_maskB(person, nums, template_path, output_path):

    ori_image = cv2.imread('%s/IDB{}.jpg'.format(random.randint(1, 2)) % template_path)
    mask_image = np.ones_like(ori_image)
    mask_image *= 255
    # print(mask_image.shape,' {}.jpg'.format(nums))
 
    img = Image.fromarray(cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB))
    img = add_txt(img, 48, 515, 583, to_str(person.public))
    
    if (int(person.month[0]) > 9):
        month = to_str(person.month)
    else:
        month = ('0' + to_str(person.month))
        
    if (int(person.day[0]) > 9):
        day = to_str(person.day)
    else:
        day = ('0' + to_str(person.day))

    date = to_str(person.years) + '.' + month + '.' + day + '-' + str(int(person.years[0]) + random.randint(10, 30)) + '.' + month + '.' + day
    img = add_txt(img, 48, 515, 688, date)
    
    final = draw_txt(ori_image, img)
    cv2.imwrite('%s/B_{}.jpg'.format(nums) % output_path, final)
    
    
if __name__ == '__main__':
    
    ROOT = '../PaddleOCR-2.7.1/dataset/'
    
    template_path = ROOT + 'IDTemplate' # 存放模板
    output_path = ROOT + 'test'    # train or test
    mkdir(template_path)
    mkdir(output_path)
    
    for i in tqdm(range(0, 1000)):  
        person = Person(name=create_name(), sex=sex_word(), national=national_name(),
                        years=random.randint(1940, 2024), month=random.randint(1, 12), 
                        day=random.randint(1, 31), address1=address_line1(), address2=address_line2(), 
                        id_card=random_id_card(), public=public())
        
        make_maskA(person, str(i).zfill(5), template_path, output_path)
        make_maskB(person, str(i).zfill(5), template_path, output_path)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
参考链接

https://aistudio.baidu.com/projectdetail/2338420

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/煮酒与君饮/article/detail/754279
推荐阅读
相关标签
  

闽ICP备14008679号