Hard voting

How to train majority rule classifier ( Hard voting ):
# Training majority rule classifier : from sklearn import datasets from sklearn.model_selection import
cross_val_score from sklearn.linear_model import LogisticRegression from
sklearn.naive_bayes import GaussianNB from sklearn.ensemble import
RandomForestClassifier from sklearn.ensemble import VotingClassifier iris =
datasets.load_iris() X,y = iris.data[:,1:3],iris.target clf1 =
LogisticRegression(solver='lbfgs',multi_class='multinomial',random_state=1) #
solver: Optimization method of logistic regression loss function , A new method of quasi Newton method . The second derivative matrix of loss function is used to optimize the loss function iteratively . clf2 =
RandomForestClassifier(n_estimators=50,random_state=1) clf3 = GaussianNB() eclf
=
VotingClassifier(estimators=[('lr',clf1),('rf',clf2),('gnb',clf3)],voting='hard')
for clf,label in zip([clf1,clf2,clf3,eclf],['Logistic Regression','Random
Forest','Naive Bayes','Ensemble']): scores =
cross_val_score(clf,X,y,cv=5,scoring='accuracy')
print("Accuracy: mean value :%0.2f, standard deviation :%0.2f [%s]" %(scores.mean(),scores.std(),label))
Out:

Accuracy: mean value :0.95, standard deviation :0.04 [Logistic Regression]
Accuracy: mean value :0.94, standard deviation :0.04 [Random Forest]
Accuracy: mean value :0.91, standard deviation :0.04 [Naive Bayes]
Accuracy: mean value :0.95, standard deviation :0.04 [Ensemble]

Soft voting

The following example shows that when the soft voting classifier (soft VotingClassifier) It is based on linear support vector machine (linear SVM), Decision tree (Decision
Tree),K a near neighbor (K-nearest) Classifier time , Possible changes of decision domain :

Plot the decision boundaries of a VotingClassifier
from sklearn import datasets import numpy as np from sklearn.tree import
DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from
sklearn.svm import SVC from itertools import product from sklearn.ensemble
import VotingClassifier import matplotlib.pyplot as plt iris =
datasets.load_iris() X,y = iris.data[:,[0,2]],iris.target #???data[:,[0,2]]
#Training clf1 = DecisionTreeClassifier(max_depth=4) clf2 =
KNeighborsClassifier(n_neighbors=7) clf3 =
SVC(gamma='scale',kernel='rbf',probability=True) #gamma: Kernel function coefficient #
kernel: The types of kernel functions used in the algorithm ,‘rbf’: Radial image kernel function / Gauss kernel # probability: Enable probability estimation eclf =
VotingClassifier(estimators=[('dt',clf1),('knn',clf2),('svc',clf3)],
voting='soft',weights=[2,1,2]) clf1 = clf1.fit(X,y) clf2 = clf2.fit(X,y) clf3 =
clf3.fit(X,y) eclf = eclf.fit(X,y) # Plotting decision regions x_min,x_max =
X[:,0].min() - 1,X[:,0].max() + 1 y_min,y_max = X[:,1].min() - 1,X[:,1].max() +
1 xx,yy = np.meshgrid(np.arange(x_min, x_max, 0.1),np.arange(y_min, y_max,
0.1)) # Generating grid point coordinate matrix : # Coordinate matrix —— Abscissa matrix XX Each element in , Matrix and ordinate YY Corresponding position element in , All of them constitute the complete coordinates of a point . #
as B Point coordinates (X12,Y12)=(1,1)(X12​,Y12​)=(1,1) f, axarr = plt.subplots(2, 2,
sharex='col', sharey='row', figsize=(10, 8)) # sharey‘row’ Time , The subgraphs of each row are shared x perhaps y axis
for idx,clf,tt in zip(product([0,1],[0,1]), [clf1,clf2,clf3,eclf], ['Decison
Tree(depth=4)','KNN(k=7)','Kernel SVM','Soft Voting']): Z =
clf.predict(np.c_[xx.ravel(),yy.ravel()]) '''
numpy In ravel(),flatten(),squeeze() All have the function of converting multidimensional array to one-dimensional array , difference :
ravel(): If not necessary , No copies of the source data are generated flatten(): Returns a copy of the source data squeeze(): Only for dimensions of 1 Dimensionality reduction of virtual reality
in addition ,reshape(-1) it's fine too “ Flattening ” Multidimensional array ''' ''' np.r_ Is to join two matrices by column , That's to add up and down the two matrices , Equal number of columns required , be similar to pandas
In concat(). np.c_ Is to join two matrices by row , That is to add the two matrices left and right , Equal number of rows required , be similar to pandas In merge(). ''' Z =
Z.reshape(xx.shape) axarr[idx[0], idx[1]].contourf(xx, yy, Z, alpha=0.4) #
contourf Method of drawing contour lines ,contour and contourf They all draw three-dimensional contour maps , The difference is that contour() Is to draw the contour line ,contourf() The profile is filled
axarr[idx[0], idx[1]].scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k')
axarr[idx[0], idx[1]].set_title(tt) plt.show()
Figure:

Voting regressor

The idea behind voting regressors is to combine conceptually different machine learning regressors , And return the average predicted value . Such a regressor is useful for a set of models that also perform well , In order to balance their respective weaknesses .

The following example shows how to match voting regressors :

Plot individual and voting regression predictions
import matplotlib.pyplot as plt from sklearn import datasets from
sklearn.ensemble import GradientBoostingRegressor from sklearn.ensemble import
RandomForestRegressor from sklearn.linear_model import LinearRegression from
sklearn.ensemble import VotingRegressor #loading some example data boston =
datasets.load_boston() X,y = boston.data,boston.target #training classifier
reg1 = GradientBoostingRegressor(random_state=1,n_estimators=10) reg2 =
RandomForestRegressor(random_state=1,n_estimators=10) reg3 = LinearRegression()
ereg = VotingRegressor([('gb',reg1),('rf',reg2),('lr',reg3)]) reg1.fit(X,y)
reg2.fit(X,y) reg3.fit(X,y) ereg.fit(X,y) xt = X[:20] plt.figure()
plt.plot(reg1.predict(xt),'gd',label
='GradientBoostingRegressor')#'' The letters in represent the representation styles in the scatter diagram ,gd It's a green diamond ,g It stands for green
plt.plot(reg2.predict(xt),'b^',label='RandomForestRegressor')
plt.plot(reg3.predict(xt),'ys',label='LinearRegression')# The style is yellow square ,y It stands for yellow
plt.plot(ereg.predict(xt),'r*',label='VotingRegressor')
plt.tick_params(axis='y',which='both',bottom=False,top=False,labelbottom=False)
plt.ylabel('predicted') plt.xlabel('training samples') plt.legend('Comparison
of individual predictions with averaged') plt.show()
Figure:

 

Technology