赞
踩
银行等金融机构经常会根据客户的个人资料、财产等情况,来预测借款客户是否会违约,从而进行贷前审核,贷中管理,贷后违约处理等工作。金融处理的就是风险,需要在风险和收益间寻求到一个平衡点,现代金融某种程度上便是一个风险定价的过程,通过个人的海量数据,从而对其进行风险评估并进行合适的借款利率定价,这便是一个典型的风险定价过程,这也被称之为大数据风控。
- import pandas as pd
- df = pd.read_excel('客户信息及违约表现.xlsx')
- df.head()
- X = df.drop(columns='是否违约')
- Y = df['是否违约']
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=123)
- from lightgbm import LGBMClassifier
- model = LGBMClassifier()
- model.fit(X_train, y_train)
- # 预测测试集数据
- y_pred = model.predict(X_test)
- print(y_pred)
- # 预测值和实际值对比
- a = pd.DataFrame() # 创建一个空DataFrame
- a['预测值'] = list(y_pred)
- a['实际值'] = list(y_test)
- a.head()
- from sklearn.metrics import accuracy_score
- score = accuracy_score(y_pred, y_test)
- score
- # 查看预测属于各个分类的概率
- y_pred_proba = model.predict_proba(X_test)
-
- # 绘制ROC曲线
- from sklearn.metrics import roc_curve
- fpr, tpr, thres = roc_curve(y_test, y_pred_proba[:,1])
- import matplotlib.pyplot as plt
- plt.plot(fpr, tpr)
- plt.show()
- # AUC值
- from sklearn.metrics import roc_auc_score
- score = roc_auc_score(y_test, y_pred_proba[:,1])
- score
- # 特征重要性
- model.feature_importances_
- features = X.columns # 获取特征名称
- importances = model.feature_importances_ # 获取特征重要性
-
- # 通过二维表格形式显示
- importances_df = pd.DataFrame()
- importances_df['特征名称'] = features
- importances_df['特征重要性'] = importances
- importances_df.sort_values('特征重要性', ascending=False)
- # 参数调优
- from sklearn.model_selection import GridSearchCV # 网格搜索合适的超参数
- parameters = {'num_leaves': [10, 15, 31], 'n_estimators': [10, 20, 30], 'learning_rate': [0.05, 0.1, 0.2]}
- model = LGBMClassifier() # 构建分类器
- grid_search = GridSearchCV(model, parameters, scoring='roc_auc', cv=5) # cv=5表示交叉验证5次,scoring='roc_auc'表示以ROC曲线的AUC评分作为模型评价准则
- # 输出参数最优值
- grid_search.fit(X_train, y_train) # 传入数据
- grid_search.best_params_ # 输出参数的最优值
- # 重新搭建分类器
- model = LGBMClassifier(num_leaves=15, n_estimators=20,learning_rate=0.1)
- model.fit(X_train, y_train)
- # 查看ROC曲线
- y_pred_proba = model.predict_proba(X_test)
- from sklearn.metrics import roc_curve
- fpr, tpr, thres = roc_curve(y_test, y_pred_proba[:,1])
- import matplotlib.pyplot as plt
- plt.plot(fpr, tpr)
- plt.show()
- # 查看AUC值
- y_pred_proba = model.predict_proba(X_test)
- from sklearn.metrics import roc_auc_score
- score = roc_auc_score(y_test, y_pred_proba[:, 1])
- score
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。