当前位置:   article > 正文

使用 sklearn处理wine和wine_quality数据集_wine数据集处理

wine数据集处理

import pandas as pd

import numpy as np

import os

from sklearn.decomposition import PCA

from sklearn.preprocessing import StandardScaler

from sklearn.cluster import KMeans

from sklearn.svm import SVC

from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.model_selection import train_test_split

#读取数据集

wine = pd.read_csv('./data/wine.csv',encoding='gbk')

winequality = pd.read_csv('./data/winequality.csv',sep=';',encoding='gbk')


 

#拆分数据和标签

wine_data=wine.iloc[:,1:]

wine_label=wine.iloc[:,0]

print("wine数据集的数据:\n",wine_data)

print("wine数据集的标签:\n",wine_label)

winequality_data=winequality.iloc[:,:10]

winequality_label=winequality.iloc[:,11]

print("winequality数据集的数据:\n",winequality_data)

print("winequality数据集的标签:\n",winequality_label)

#标准化

stdScale1 = StandardScaler().fit(wine_data)

wine_Scaler = stdScale1.transform(wine_data)

print('标准差标准化后wine数据集数据的方差为:',np.var(wine_Scaler))

print('标准差标准化后wine数据集数据的均值为:',np.mean(wine_Scaler))

stdScale1 = StandardScaler().fit(winequality_data)

winequality_Scaler = stdScale1.transform(winequality_data)

print('标准差标准化后winequality数据集数据的方差为:',np.var(winequality_Scaler))

print('标准差标准化后winequality数据集数据的均值为:',np.mean(winequality_Scaler))


 

#划分训练集和数据集

#print('\n(把winequality划分为训练集和测试集后)')

wine_data_train, wine_data_test, wine_label_train, wine_label_test = train_test_split(wine_Scaler, wine_label,

    test_size=0.3, random_state=42)

#print('\n(把winequality划分为训练集和测试集后)')

winequality_data_train, winequality_data_test, winequality_label_train, winequality_label_test = train_test_split(winequality_Scaler, winequality_label,

    test_size=0.3, random_state=42)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/爱喝兽奶帝天荒/article/detail/748891
推荐阅读
相关标签
  

闽ICP备14008679号