1 数据说明
我们仍以sklearn自带的鸢尾花数据集(Iris)为例来预测花的物种。
2 导入所需模块
from sklearn.model_selection import GridSearchCV
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.neural_network import MLPClassifier
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.metrics import roc_curve
from sklearn.metrics import RocCurveDisplay
3 读取数据并划分为训练样本和测试样本
# 将数据分成特征变量和目标变量
iris = datasets.load_iris()
X, y = datasets.load_iris(return_X_y=True)
# 将数据分成训练样本和测试样本
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)
4 模型训练
一个隐藏层,其中2个隐单元:
# 训练模型
NNet= MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(2,), random_state=1).fit(X_train, y_train)
5 模型评价
# 预测目标变量
y_test_pred = NNet.predict(X_test)
# 模型评价
print('准确率为:', accuracy_score(y_test,y_test_pred))
print('精确率为:', precision_score(y_test,y_test_pred,average=None))
准确率为: 0.8285714285714286
精确率为: [0.97058824 1. 0.69090909]
print('使用神经网络预测iris数据的分类报告为:','\n',classification_report(y_test,y_test_pred))
# 混淆矩阵
print('使用神经网络预测iris数据的分类报告为:','\n',confusion_matrix(y_test,y_test_pred))
6 模型选择
# 模型选择
parameters = {'activation':( 'logistic', 'tanh', 'relu'),'solver':('lbfgs', 'sgd', 'adam'),'hidden_layer_sizes':((3,),(4,),(5,),(4,1),(4,2),(4,3))}
Nnet_grid = GridSearchCV(MLPClassifier(), parameters, cv=5).fit(iris.data, iris.target)
print('gridsearch选择的参数为:', Nnet_grid.best_params_)
print('对应的准确率得分为:', Nnet_grid.best_score_)
gridsearch选择的参数为: {'activation': 'logistic', 'hidden_layer_sizes': (4,), 'solver': 'lbfgs'}
对应的准确率得分为: 0.9733333333333334
7 SVM的图形展示
鸢尾花数据集(Iris)有6个特征变量,因而无法绘制出svm在6维空间的分类图形。这里仅取前2个特征变量,演示svm模型在2维空间的分类图示。
# 绘制SVM的二维图示
X, y = datasets.load_iris(return_X_y=True)
X = X[:, 0:2]
# 将数据分成训练样本和测试样本
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.7)
# 训练模型
NNet2 = MLPClassifier(solver='lbfgs', hidden_layer_sizes=(2, ), random_state=1).fit(X_train, y_train)
# 设置绘图颜色、坐标轴范围
cm = plt.cm.RdBu
cm_bright = ListedColormap(["#FF0000", "#0000FF", "chartreuse"])
DecisionBoundaryDisplay.from_estimator(NNet2, X, cmap=cm, alpha=0.8, eps=0.5)
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
# 绘制训练样本的散点
plt.scatter(
X_train[:, 0],
X_train[:, 1],
c=y_train,
cmap=cm_bright,
edgecolors="k"
)
# 绘制测试样本的散点
plt.scatter(
X_test[:, 0],
X_test[:, 1],
c=y_test,
cmap=cm_bright,
edgecolors="k",
alpha=0.6,
marker='^',
)
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.show()