Python源码示例:sklearn.ensemble.VotingClassifier()
示例1
def test_notfitted():
eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
('lr2', LogisticRegression())],
voting='soft')
ereg = VotingRegressor([('dr', DummyRegressor())])
msg = ("This %s instance is not fitted yet. Call \'fit\'"
" with appropriate arguments before using this method.")
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
eclf.predict, X)
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
eclf.predict_proba, X)
assert_raise_message(NotFittedError, msg % 'VotingClassifier',
eclf.transform, X)
assert_raise_message(NotFittedError, msg % 'VotingRegressor',
ereg.predict, X_r)
assert_raise_message(NotFittedError, msg % 'VotingRegressor',
ereg.transform, X_r)
示例2
def test_parallel_fit():
"""Check parallel backend of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
n_jobs=1).fit(X, y)
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
n_jobs=2).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例3
def __init__(self, num_features, **kwargs):
super(VotingClassifier, self).__init__()
kwargs = {**constants.VOTING_CLASSIFIER_PARAMS, **kwargs}
voting = kwargs.pop('voting')
self.num_features = num_features
estimators = []
for clf in constants.CLASSIFIERS_FOR_ENSEMBLE:
model = utils.init_model(clf, num_features=num_features, **kwargs)
estimators.append((clf, model.kernel))
# use as kernel the VotingClassifier coming from sklearn
self.kernel = SKVotingClassifier(
estimators=estimators, voting=voting, n_jobs=None
)
示例4
def __init__(self, api, lobes=False):
"""
lobes = a dict of classifiers to use in the VotingClassifier
defaults to RandomForestClassifier and DecisionTreeClassifier
"""
self.api = api
if not lobes:
lobes = {'rf': RandomForestClassifier(n_estimators=7,
random_state=666),
'dt': DecisionTreeClassifier()
}
self.lobe = VotingClassifier(
estimators=[(lobe, lobes[lobe]) for lobe in lobes],
voting='hard',
n_jobs=-1)
self._trained = False
self.split = splitTrainTestData
self.prep = prepDataframe
示例5
def test_voting_hard_binary(self):
model = VotingClassifier(
voting="hard",
flatten_transform=False,
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression(fit_intercept=False)),
],
)
# predict_proba is not defined when voting is hard.
dump_binary_classification(
model,
suffix="Hard",
comparable_outputs=[0],
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.5.0')",
target_opset=TARGET_OPSET
)
示例6
def test_voting_hard_binary_weights(self):
model = VotingClassifier(
voting="hard",
flatten_transform=False,
weights=numpy.array([1000, 1]),
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression(fit_intercept=False)),
],
)
# predict_proba is not defined when voting is hard.
dump_binary_classification(
model,
suffix="WeightsHard",
comparable_outputs=[0],
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.5.0')",
target_opset=TARGET_OPSET
)
示例7
def test_voting_soft_binary(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression(fit_intercept=False)),
],
)
dump_binary_classification(
model,
suffix="Soft",
comparable_outputs=[0, 1],
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例8
def test_voting_soft_binary_weighted(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
weights=numpy.array([1.8, 0.2]),
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression(fit_intercept=False)),
],
)
dump_binary_classification(
model,
suffix="WeightedSoft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例9
def test_voting_hard_multi(self):
# predict_proba is not defined when voting is hard.
model = VotingClassifier(
voting="hard",
flatten_transform=False,
estimators=[
("lr", LogisticRegression()),
("lr2", DecisionTreeClassifier()),
],
)
dump_multiple_classification(
model,
suffix="Hard",
comparable_outputs=[0],
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.5.0')",
target_opset=TARGET_OPSET
)
示例10
def test_voting_soft_multi(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression()),
],
)
dump_multiple_classification(
model,
suffix="Soft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例11
def test_voting_soft_multi_string(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression()),
],
)
dump_multiple_classification(
model, label_string=True,
suffix="Soft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例12
def test_voting_soft_multi_weighted(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
weights=numpy.array([1.8, 0.2]),
estimators=[
("lr", LogisticRegression()),
("lr2", LogisticRegression()),
],
)
dump_multiple_classification(
model,
suffix="WeightedSoft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例13
def test_voting_soft_multi_weighted4(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
weights=numpy.array([2.7, 0.3, 0.5, 0.5]),
estimators=[
("lr", LogisticRegression()),
("lra", LogisticRegression()),
("lrb", LogisticRegression()),
("lr2", LogisticRegression()),
],
)
dump_multiple_classification(
model,
suffix="Weighted4Soft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例14
def test_voting_soft_multi_weighted42(self):
model = VotingClassifier(
voting="soft",
flatten_transform=False,
weights=numpy.array([27, 0.3, 0.5, 0.5]),
estimators=[
("lr", LogisticRegression()),
("lra", LogisticRegression()),
("lrb", LogisticRegression()),
("lr2", LogisticRegression()),
],
)
dump_multiple_classification(
model,
suffix="Weighted42Soft",
allow_failure="StrictVersion(onnxruntime.__version__)"
" <= StrictVersion('0.2.1')",
target_opset=TARGET_OPSET
)
示例15
def test_parallel_fit():
"""Check parallel backend of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
n_jobs=1).fit(X, y)
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
n_jobs=2).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例16
def test_set_params():
"""set_params should be able to set estimators"""
clf1 = LogisticRegression(random_state=123, C=1.0)
clf2 = RandomForestClassifier(random_state=123, max_depth=None)
clf3 = GaussianNB()
eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
weights=[1, 2])
eclf1.fit(X, y)
eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
weights=[1, 2])
eclf2.set_params(nb=clf2).fit(X, y)
assert_false(hasattr(eclf2, 'nb'))
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())
eclf1.set_params(lr__C=10.0)
eclf2.set_params(nb__max_depth=5)
assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0)
assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5)
assert_equal(eclf1.get_params()["lr__C"],
eclf1.get_params()["lr"].get_params()['C'])
示例17
def test_estimator_init():
eclf = VotingClassifier(estimators=[])
msg = ('Invalid `estimators` attribute, `estimators` should be'
' a list of (string, estimator) tuples')
assert_raise_message(AttributeError, msg, eclf.fit, X, y)
clf = LogisticRegression(random_state=1)
eclf = VotingClassifier(estimators=[('lr', clf)], voting='error')
msg = ('Voting must be \'soft\' or \'hard\'; got (voting=\'error\')')
assert_raise_message(ValueError, msg, eclf.fit, X, y)
eclf = VotingClassifier(estimators=[('lr', clf)], weights=[1, 2])
msg = ('Number of `estimators` and weights must be equal'
'; got 2 weights, 1 estimators')
assert_raise_message(ValueError, msg, eclf.fit, X, y)
eclf = VotingClassifier(estimators=[('lr', clf), ('lr', clf)],
weights=[1, 2])
msg = "Names provided are not unique: ['lr', 'lr']"
assert_raise_message(ValueError, msg, eclf.fit, X, y)
eclf = VotingClassifier(estimators=[('lr__', clf)])
msg = "Estimator names must not contain __: got ['lr__']"
assert_raise_message(ValueError, msg, eclf.fit, X, y)
eclf = VotingClassifier(estimators=[('estimators', clf)])
msg = "Estimator names conflict with constructor arguments: ['estimators']"
assert_raise_message(ValueError, msg, eclf.fit, X, y)
示例18
def test_predictproba_hardvoting():
eclf = VotingClassifier(estimators=[('lr1', LogisticRegression()),
('lr2', LogisticRegression())],
voting='hard')
msg = "predict_proba is not available when voting='hard'"
assert_raise_message(AttributeError, msg, eclf.predict_proba, X)
示例19
def test_majority_label_iris():
"""Check classification by majority label on dataset iris."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='hard')
scores = cross_val_score(eclf, X, y, cv=5, scoring='accuracy')
assert_almost_equal(scores.mean(), 0.95, decimal=2)
示例20
def test_tie_situation():
"""Check voting classifier selects smaller class label in tie situation."""
clf1 = LogisticRegression(random_state=123, multi_class='ovr',
solver='liblinear')
clf2 = RandomForestClassifier(random_state=123)
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)],
voting='hard')
assert_equal(clf1.fit(X, y).predict(X)[73], 2)
assert_equal(clf2.fit(X, y).predict(X)[73], 1)
assert_equal(eclf.fit(X, y).predict(X)[73], 1)
示例21
def test_predict_on_toy_problem():
"""Manually check predicted class labels for toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5],
[-1.2, -1.4],
[-3.4, -2.2],
[1.1, 1.2],
[2.1, 1.4],
[3.1, 2.3]])
y = np.array([1, 1, 1, 2, 2, 2])
assert_equal(all(clf1.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
assert_equal(all(clf2.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
assert_equal(all(clf3.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='hard',
weights=[1, 1, 1])
assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
weights=[1, 1, 1])
assert_equal(all(eclf.fit(X, y).predict(X)), all([1, 1, 1, 2, 2, 2]))
示例22
def test_multilabel():
"""Check if error is raised for multilabel classification."""
X, y = make_multilabel_classification(n_classes=2, n_labels=1,
allow_unlabeled=False,
random_state=123)
clf = OneVsRestClassifier(SVC(kernel='linear'))
eclf = VotingClassifier(estimators=[('ovr', clf)], voting='hard')
try:
eclf.fit(X, y)
except NotImplementedError:
return
示例23
def test_gridsearch():
"""Check GridSearch support."""
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft')
params = {'lr__C': [1.0, 100.0],
'voting': ['soft', 'hard'],
'weights': [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]]}
grid = GridSearchCV(estimator=eclf, param_grid=params, cv=5)
grid.fit(iris.data, iris.target)
示例24
def test_sample_weight_kwargs():
"""Check that VotingClassifier passes sample_weight as kwargs"""
class MockClassifier(BaseEstimator, ClassifierMixin):
"""Mock Classifier to check that sample_weight is received as kwargs"""
def fit(self, X, y, *args, **sample_weight):
assert 'sample_weight' in sample_weight
clf = MockClassifier()
eclf = VotingClassifier(estimators=[('mock', clf)], voting='soft')
# Should not raise an error.
eclf.fit(X, y, sample_weight=np.ones((len(y),)))
示例25
def test_set_params():
"""set_params should be able to set estimators"""
clf1 = LogisticRegression(random_state=123, C=1.0)
clf2 = RandomForestClassifier(random_state=123, max_depth=None)
clf3 = GaussianNB()
eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
weights=[1, 2])
assert 'lr' in eclf1.named_estimators
assert eclf1.named_estimators.lr is eclf1.estimators[0][1]
assert eclf1.named_estimators.lr is eclf1.named_estimators['lr']
eclf1.fit(X, y)
assert 'lr' in eclf1.named_estimators_
assert eclf1.named_estimators_.lr is eclf1.estimators_[0]
assert eclf1.named_estimators_.lr is eclf1.named_estimators_['lr']
eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
weights=[1, 2])
eclf2.set_params(nb=clf2).fit(X, y)
assert not hasattr(eclf2, 'nb')
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())
eclf1.set_params(lr__C=10.0)
eclf2.set_params(nb__max_depth=5)
assert eclf1.estimators[0][1].get_params()['C'] == 10.0
assert eclf2.estimators[1][1].get_params()['max_depth'] == 5
assert_equal(eclf1.get_params()["lr__C"],
eclf1.get_params()["lr"].get_params()['C'])
示例26
def test_estimator_weights_format():
# Test estimator weights inputs as list and array
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2)],
weights=[1, 2],
voting='soft')
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2)],
weights=np.array((1, 2)),
voting='soft')
eclf1.fit(X, y)
eclf2.fit(X, y)
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例27
def test_transform():
"""Check transform method of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft').fit(X, y)
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
flatten_transform=True).fit(X, y)
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('gnb', clf3)],
voting='soft',
flatten_transform=False).fit(X, y)
assert_array_equal(eclf1.transform(X).shape, (4, 6))
assert_array_equal(eclf2.transform(X).shape, (4, 6))
assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
assert_array_almost_equal(eclf1.transform(X),
eclf2.transform(X))
assert_array_almost_equal(
eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)),
eclf2.transform(X)
)
示例28
def log(msg):
print("[%s] %s" % (time.asctime(), msg))
#-------------------------------------------------------------------------------
# The original VotingClassifier class uses np.bincount() with an array and
# annoyingly it will fail with a message like "cannot cast float64 to int64".
#
示例29
def vote(self):
log("Loading data...")
self.X, self.y = self.load_data()
estimators = []
names = []
for classifier, name, arg in ML_CLASSIFIERS:
clf = classifier(arg)
log("Creating model %s..." % (classifier.__name__))
estimators.append([classifier.__name__, clf])
names.append(name)
log("Fitting data with VotingClassifier('hard')")
self.clf = CPigaiosVotingClassifier(estimators=estimators, voting='hard', n_jobs=-1)
self.clf.fit(self.X, self.y)
log("Predicting...")
self.predict()
log("Saving model...")
joblib.dump(self.clf, "clf.pkl")
for clf, label in zip(estimators, names):
try:
scores = cross_val_score(clf, self.X, self.y, cv=5, scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))
except:
print("Error with", clf, ":", sys.exc_info()[1])
示例30
def getEstimator(scorer_type):
if scorer_type == 'grad_boost':
clf = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)
if scorer_type == 'svm1': # stochastic gradient decent classifier
clf = svm.SVC(gamma=0.001, C=100., verbose=True)
if scorer_type == 'logistic_regression' :
clf = logistic.LogisticRegression()
if scorer_type == 'svm3':
clf = svm.SVC(kernel='poly', C=1.0, probability=True, class_weight='unbalanced')
if scorer_type == "bayes":
clf = naive_bayes.GaussianNB()
if scorer_type == 'voting_hard_svm_gradboost_logistic':
svm2 = svm.SVC(kernel='linear', C=1.0, probability=True, class_weight='balanced', verbose=True)
log_reg = logistic.LogisticRegression()
gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)
clf = VotingClassifier(estimators=[ # ('gb', gb),
('svm', svm2),
('grad_boost', gradboost),
('logisitc_regression', log_reg)
], n_jobs=1,
voting='hard')
if scorer_type == 'voting_hard_bayes_gradboost':
bayes = naive_bayes.GaussianNB()
gradboost = GradientBoostingClassifier(n_estimators=200, random_state=14128, verbose=True)
clf = VotingClassifier(estimators=[ # ('gb', gb),
('bayes', bayes),
('grad_boost', gradboost),
], n_jobs=1,
voting='hard')
return clf