赞
踩
交叉验证(Cross Validation)是验证分类器性能的一种统计分析方法,其基本思想是在某种意义下将原始数据进行分组,一部分作为训练集,另一部分作为验证集。首先用训练集对分类器进行训练,再利用验证集来测试训练得到的模型,以此来作为评价分类器的性能指标。通常的交叉验证方法包括简单交叉验证、K 折交叉验证、留一法交叉验证和留 P 法交叉验证。
from sklearn.model_selection import train_test_split
train_data, test_data, train_target, test_target = train_test_split(iris.data, iris.target, test_size=0.4, random_state=0) # random_stage 为随机种子
from sklearn.model_selection import KFold
kf = KFold(n_splits=5) # 5 折交叉验证实例化
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
from sklearn.model_selection import LeavePOut
lpo = LeavePOut(p=5)
简单交叉验证(1折交叉验证)用到了 train_test_split() 函数
:## 简单交叉验证
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split # 导入切分数据包
# 切分数据,训练数据为80%,验证数据为20%
train_data, test_data, train_target, test_target = train_test_split(train, target, target_size=0.8, random_state=0)
# 定义分回归器,拟合预测数据
clf = SGDRegressor(max_iter=1000, tol=1e-3)
clf.fit(train_data, train_target)
score_train = mean_squared_error(train_target, clf.predict(train_data))
score_test = mean_squared_error(test_target, clf.predict(test_data))
print("SGDRegressor train MSE: ", score_train)
print("SGDRegressor test MSE: ", score_test, "\n")
## 5折交叉验证 from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold kf = KFold(n_splits=5) for k, (train_index, test_index) in enumerate(kf.split(train)): train_data, test_data, train_target, test_target = ( train.values[train_index], train.values[test_index], target[train_index], target[test_index]) clf = SGDRegressor(max_iter=1000, tol=1e-3) clf.fit(train_data, train_target) score_train = mean_squared_error(train_target, clf.predict(train_data)) score_test = mean_squared_error(test_target, clf.predict(test_data)) print(k, "折", "SGDRegressor train MSE: ", score_train print(k, "折", "SGDRegressor test MSE: ", score_test, '\n')
from sklearn.metrics import mean_squared_error from sklearn.model_selection import LeaveOneOut loo = LeaveOneOut() # num = 100 for k, (train_index, test_index) in enumerate(loo.split(train)): train_data, test_data, train_target, test_target = train.values[train_index], train.values[test_index], target[train_index], target[test_index] clf = SGDRegressor(max_iter=1000, tol=1e-3) clf.fit(train_data, train_target) score_train = mean_squared_error(train_target, clf.predict(train_data)) score_test = mean_squared_error(test_target, clf.predict(test_data)) print(k, "个", "SGDRegressor train MSE: ", score_train) print(k, "个", "SGDRegressor test MSE: ", score_test) if k >= 9: break
from sklearn.metrics import mean_squared_error from sklearn.model_selection import LeavePOut lpo = LeavePOut(p=10) num = 100 for k, (train_index, test_index) in enumerate(lpo.split(train)): train_data, test_data, train_target, test_target = train.values[train_index], train.values[test_index], target[train_index], target[test_index] clf = SGDRegressor(max_iter=1000, tol=1e-3) clf.fit(train_data, train_target) score_train = mean_squared_error(train_target, clf.predict(train_data)) score_test = mean_squared_error(test_target, clf.predict(test_data)) print(k, "10个", "SGDRegressor train MSE: ", score_train) print(k, "10个", "SGDRegressor test MSE: ", score_test, "\n")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。