当前位置:   article > 正文

xgb和gbm做回归代码sklearn

xgb和gbm做回归代码sklearn

xgb和gbm做回归代码sklearn接口

import numpy as np 
import pandas as pd 
import re 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import lightgbm as lgb
import xgboost as xgb


df = pd.read_csv('翼型数据集.csv', encoding='gbk')
col_dict = dict(zip(set(df['翼型名称']), [i for i in range(len(set(df['翼型名称'])))]))
df['翼型名称'] = df['翼型名称'].map(col_dict)

print(f"the data shape is : {df.shape}")
print(df.head())
print(df.columns)
train_x, test_x = train_test_split(df, random_state=100, test_size=0.2, stratify=df['攻角 (degrees)'])
train_y, test_y = train_x['攻角 (degrees)'], test_x['攻角 (degrees)']
train_x = train_x.drop('攻角 (degrees)', axis=1)
test_x = test_x.drop('攻角 (degrees)', axis=1)


# 模型训练gbm

model = lgb.LGBMClassifier(
    boosting_type='gbdt',  # 基学习器 gbdt:传统的梯度提升决策树; dart:Dropouts多重加性回归树
    n_estimators=100,  # 迭代次数
    learning_rate=0.1,  # 步长
    max_depth=4,  # 树的最大深度
    min_child_weight=1,  # 决定最小叶子节点样本权重和
    # min_split_gain=0.1,  # 在树的叶节点上进行进一步分区所需的最小损失减少
    subsample=1,  # 每个决策树所用的子样本占总样本的比例(作用于样本)
    colsample_bytree=1,  # 建立树时对特征随机采样的比例(作用于特征)典型值:0.5-1
    random_state=27,  # 指定随机种子,为了复现结果
    importance_type='gain',  # 特征重要性的计算方式,split:分隔的总数; gain:总信息增益
    objective='multiclass',
)

model.fit(train_x, train_y, eval_metric="auc_mu", verbose=10, \
                          eval_set=[(train_x, train_y), (test_x, test_y)], \
                         )
print(f"the mae is: ", mean_absolute_error([int(i) for i in model.predict(test_x)], test_y))
print(pd.DataFrame({"predict":[int(i) for i in model.predict(test_x)], 'real':test_y}))


# 模型训练xgb
xgb_Regressor = xgb.XGBClassifier(
        learning_rate=0.01,
        n_estimators=100,
        max_depth=3,
        min_child_weight=1,
        gamma=0,
        objective='multiclass',
        subsample=0.8,
        colsample_bytree=0.8,
        nthread=4,
        scale_pos_weight=1,
        seed=27
    )
xgb_Regressor.fit(train_x, train_y, eval_metric="auc", verbose=10,
                          eval_set=[(train_x, train_y), (test_x, test_y)],
                         )
print(f"the mae is: ", mean_absolute_error(xgb_Regressor.predict(test_x), test_y))



  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
声明:本文内容由网友自发贡献,转载请注明出处:【wpsshop】
推荐阅读
相关标签
  

闽ICP备14008679号