赞
踩
凯斯西储轴承数据CWRU数据集制作预处理代码。
基于开源代码的改进。
import os from scipy.io import loadmat import numpy as np import pandas as pd import torch from sklearn.model_selection import train_test_split from datasets.SequenceDatasets import dataset from datasets.sequence_aug import * from tqdm import tqdm def get_files(root, N): ''' This function is used to generate the final training set and test set. root:The location of the data set ''' data = [] lab =[] for k in range(len(N)): for n in tqdm(range(len(dataname[N[k]]))): if n==0: path1 =os.path.join(root,datasetname[3], dataname[N[k]][n]).replace("\\", "/") else: path1 = os.path.join(root,datasetname[0], dataname[N[k]][n]).replace("\\", "/") data1, lab1 = data_load(path1,dataname[N[k]][n],label=label[n]) data += data1 lab +=lab1 return [data, lab] def data_load(filename, axisname, label): ''' This function is mainly used to generate test data and training data. filename:Data location axisname:Select which channel's data,---->"_DE_time","_FE_time","_BA_time" ''' datanumber = axisname.split(".") if eval(datanumber[0]) < 100: realaxis = "X0" + datanumber[0] + axis[0] else: realaxis = "X" + datanumber[0] + axis[0] fl = loadmat(filename)[realaxis] data = [] lab = [] start, end = 0, signal_size while end <= fl.shape[0]: data.append(fl[start:end]) lab.append(label) start += signal_size end += signal_size return data, lab def data_split(data_dir,transfer_task,normlizetype="0-1",transfer_learning=True): source_N = transfer_task[0] target_N = transfer_task[1] data_transforms = { 'train': Compose([ Reshape(), Normalize(normlizetype), # RandomAddGaussian(), # RandomScale(), # RandomStretch(), # RandomCrop(), Retype(), # Scale(1) ]), 'val': Compose([ Reshape(), Normalize(normlizetype), Retype(), # Scale(1) ]) } if transfer_learning: # get source train and val list_data = get_files(data_dir, source_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) train_pd, val_pd = train_test_split(data_pd, test_size=0.2, random_state=40, stratify=data_pd["label"]) source_train = dataset(list_data=train_pd, transform=data_transforms['train']) source_val = dataset(list_data=val_pd, transform=data_transforms['val']) # get target train and val list_data = get_files(data_dir, target_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) train_pd, val_pd = train_test_split(data_pd, test_size=0.2, random_state=40, stratify=data_pd["label"]) target_train = dataset(list_data=train_pd, transform=data_transforms['train']) target_val = dataset(list_data=val_pd, transform=data_transforms['val']) return source_train, source_val #, target_train, target_val else: #get source train and val list_data = get_files(data_dir, source_N) data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) trval_pd, test_pd = train_test_split(data_pd, test_size=0.2, random_state=40) #, stratify=data_pd["label"] train_pd, val_pd = train_test_split(trval_pd, test_size=0.5, random_state=40) xtrain = train_pd['data'].values ytrain = train_pd['label'].values xval = val_pd['data'].values yval = val_pd['label'].values xtest = val_pd['data'].values ytest = val_pd['label'].values # source_train = dataset(list_data=train_pd, transform=data_transforms['train']) # source_val = dataset(list_data=val_pd, transform=data_transforms['val']) # # get target train and val # list_data = get_files(data_dir, target_N) # data_pd = pd.DataFrame({"data": list_data[0], "label": list_data[1]}) # xtest = data_pd['data'].values # ytest = data_pd['label'].values # # target_val = dataset(list_data=data_pd, transform=data_transforms['val']) return xtrain, ytrain , xval , yval , xtest , ytest #source_train, source_val, target_val if __name__ == '__main__': #Digital data was collected at 12,000 samples per second signal_size = 1024 dataname= {0:["97.mat","105.mat", "118.mat", "130.mat", "169.mat", "185.mat", "197.mat", "209.mat", "222.mat","234.mat"], # 1797rpm 1:["98.mat","106.mat", "119.mat", "131.mat", "170.mat", "186.mat", "198.mat", "210.mat", "223.mat","235.mat"], # 1772rpm 2:["99.mat","107.mat", "120.mat", "132.mat", "171.mat", "187.mat", "199.mat", "211.mat", "224.mat","236.mat"], # 1750rpm 3:["100.mat","108.mat", "121.mat","133.mat", "172.mat", "188.mat", "200.mat", "212.mat", "225.mat","237.mat"]} # 1730rpm datasetname = ["12k Drive End Bearing Fault Data", "12k Fan End Bearing Fault Data", "48k Drive End Bearing Fault Data", "Normal Baseline Data"] axis = ["_DE_time", "_FE_time", "_BA_time"] label = [i for i in range(0, 10)] data_dir = '../cwru' output_dir = '../../data/CWRU' transfer_task = [[0], [3]] normlizetype = 'mean - std' X_train,y_train,X_val,y_val,X_test,y_test = data_split(data_dir,transfer_task,normlizetype,transfer_learning=False) print(X_train) dat_dict = dict() # X_train = X_train.permute(0, 2, 1) dat_dict["samples"] = torch.tensor([item for item in X_train]) dat_dict["samples"] = dat_dict["samples"].permute(0, 2, 1) dat_dict["labels"] = torch.from_numpy(y_train) torch.save(dat_dict, os.path.join(output_dir, "train.pt")) dat_dict = dict() dat_dict["samples"] = torch.tensor([item for item in X_val]) dat_dict["samples"] = dat_dict["samples"].permute(0, 2, 1) dat_dict["labels"] = torch.from_numpy(y_val) torch.save(dat_dict, os.path.join(output_dir, "val.pt")) dat_dict = dict() dat_dict["samples"] = torch.tensor([item for item in X_test]) dat_dict["samples"] = dat_dict["samples"].permute(0, 2, 1) dat_dict["labels"] = torch.from_numpy(y_test) torch.save(dat_dict, os.path.join(output_dir, "test.pt"))
数据链接:https://pan.baidu.com/s/1ZKs3Ux_apfhyBL3RrpiEPQ
提取码:2f9j
–来自百度网盘超级会员V4的分享
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。