【集成算法】Python对比软投票与硬投票

最新推荐文章于 2024-04-28 18:30:36 发布

不断向上的萝卜

最新推荐文章于 2024-04-28 18:30:36 发布

阅读量541

点赞数 9

文章标签：算法 python 机器学习

本文链接：https://blog.csdn.net/weixin_62100318/article/details/135624647

版权

硬投票：直接最终的结果的类别值进行“少数服从多数”策略

软投票：根据各自分类器的概率值进行加权平均

软投票相对于硬投票更好，但是需要各个分类器都可以得到概率值

'''自主构建数据集'''
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

plt.plot(X[:, 0][y == 0], X[:, 1][y == 0], 'yo', alpha=0.6)
plt.plot(X[:, 0][y == 1], X[:, 1][y == 1], 'bs', alpha=0.6)
plt.show()

'''对比软投票与硬投票'''
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(random_state=42)
rnd_clf = RandomForestClassifier(random_state=42)
svm_clf = SVC(random_state=42)

'''硬投票实验'''
voting_clf = VotingClassifier(estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)], voting='hard')
'''estimators:所使用的分类器 voting:投票方式，默认为硬投票'''

voting_clf.fit(X_train, y_train)

from sklearn.metrics import accuracy_score

'''对比展示一些使用单个分类器与集成算法的区别'''
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))
'''
运行结果：
LogisticRegression 0.864
RandomForestClassifier 0.904
SVC 0.896
VotingClassifier 0.904
'''

'''软投票实验'''
log_clf_soft = LogisticRegression(random_state=42)
rnd_clf_soft = RandomForestClassifier(random_state=42)
'''SVC默认没有概率值 需要修改参数'''
svm_clf_soft = SVC(probability=True, random_state=42)
voting_clf_soft = VotingClassifier(estimators=[('lr', log_clf_soft), ('rf', rnd_clf_soft), ('svc', svm_clf_soft)],
                                   voting='soft')

voting_clf_soft.fit(X_train, y_train)

'''对比展示一些使用单个分类器与集成算法的区别'''
for clf in (log_clf_soft, rnd_clf_soft, svm_clf_soft, voting_clf_soft):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))
'''
运行结果：
LogisticRegression 0.864
RandomForestClassifier 0.896
SVC 0.896
VotingClassifier 0.92
'''