Python源码示例:sklearn.ensemble.BaggingClassifier()
示例1
def test_classification():
# Check classification for various parameter settings.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"max_features": [1, 2, 4],
"bootstrap": [True, False],
"bootstrap_features": [True, False]})
for base_estimator in [None,
DummyClassifier(),
Perceptron(tol=1e-3),
DecisionTreeClassifier(),
KNeighborsClassifier(),
SVC(gamma="scale")]:
for params in grid:
BaggingClassifier(base_estimator=base_estimator,
random_state=rng,
**params).fit(X_train, y_train).predict(X_test)
示例2
def test_warm_start(random_state=42):
# Test if fitting incrementally with warm start gives a forest of the
# right size and the same results as a normal fit.
X, y = make_hastie_10_2(n_samples=20, random_state=1)
clf_ws = None
for n_estimators in [5, 10]:
if clf_ws is None:
clf_ws = BaggingClassifier(n_estimators=n_estimators,
random_state=random_state,
warm_start=True)
else:
clf_ws.set_params(n_estimators=n_estimators)
clf_ws.fit(X, y)
assert_equal(len(clf_ws), n_estimators)
clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
warm_start=False)
clf_no_ws.fit(X, y)
assert_equal(set([tree.random_state for tree in clf_ws]),
set([tree.random_state for tree in clf_no_ws]))
示例3
def test_warm_start_equal_n_estimators():
# Test that nothing happens when fitting without increasing n_estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# modify X to nonsense values, this should not change anything
X_train += 1.
assert_warns_message(UserWarning,
"Warm-start fitting without increasing n_estimators does not",
clf.fit, X_train, y_train)
assert_array_equal(y_pred, clf.predict(X_test))
示例4
def test_warm_start_equivalence():
# warm started classifier with 5+5 estimators should be equivalent to
# one classifier with 10 estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
random_state=3141)
clf_ws.fit(X_train, y_train)
clf_ws.set_params(n_estimators=10)
clf_ws.fit(X_train, y_train)
y1 = clf_ws.predict(X_test)
clf = BaggingClassifier(n_estimators=10, warm_start=False,
random_state=3141)
clf.fit(X_train, y_train)
y2 = clf.predict(X_test)
assert_array_almost_equal(y1, y2)
示例5
def main():
indata = np.load(inputs)
training_data = indata['data_training']
training_labels = indata['label_training']
validation_data = indata['data_val']
validation_labels = indata['label_val']
ts = range(1,11)
sampling_rates = [round(0.1*t, 1) for t in ts]
forest_sizes = [10, 20, 50, 100]
for sampling_rate in sampling_rates:
legend_label = 'sampling rate='+str(sampling_rate)
accuracy_results = []
for forest_size in forest_sizes:
rf_clf = ensemble.BaggingClassifier(n_estimators=forest_size, max_samples=sampling_rate)
rf_clf.fit(training_data, training_labels)
predictions = rf_clf.predict(validation_data)
accuracy = metrics.accuracy_score(validation_labels, predictions)
accuracy_results.append(accuracy)
plt.plot(range(len(forest_sizes)), accuracy_results, label=legend_label)
plt.xticks(range(len(forest_sizes)), forest_sizes, size='small')
plt.legend()
plt.show()
示例6
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.25
loadDataset('../Dataset/LDAdata.csv', split, trainingSet, testSet)
print('Train set: ' + repr(len(trainingSet)))
print('Test set: ' + repr(len(testSet)))
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData)
y = np.array(trainingSet)[:,columns]
clf = BaggingClassifier(LDA())
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData)
y_test = np.array(testSet)[:,columns]
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
示例7
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.25
loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
print 'Train set: ' + repr(len(trainingSet))
print 'Test set: ' + repr(len(testSet))
# generate predictions
predictions=[]
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData)
y = np.array(trainingSet)[:,columns]
clf = BaggingClassifier(QDA())
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData)
y_test = np.array(testSet)[:,columns]
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
示例8
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.25
loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
print 'Train set: ' + repr(len(trainingSet))
print 'Test set: ' + repr(len(testSet))
# generate predictions
predictions=[]
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData)
y = np.array(trainingSet)[:,columns]
clf = BaggingClassifier(KNN(n_neighbors=10, weights='uniform', algorithm='auto', leaf_size=10, p=1, metric='minkowski', metric_params=None, n_jobs=1))
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData)
y_test = np.array(testSet)[:,columns]
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
示例9
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.25
loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
print 'Train set: ' + repr(len(trainingSet))
print 'Test set: ' + repr(len(testSet))
# generate predictions
predictions=[]
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData)
y = np.array(trainingSet)[:,columns]
clf = BaggingClassifier(SVC(C=1.0, kernel='linear', degree=5, gamma='auto', coef0=0.0, shrinking=True, probability=False,tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None))
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData)
y_test = np.array(testSet)[:,columns]
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
示例10
def __init__(self,
base_classifier=None,
n_classifiers=100,
combination_rule='majority_vote'):
self.base_classifier = base_classifier
self.n_classifiers = n_classifiers
# using the sklearn implementation of bagging for now
self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
n_estimators=n_classifiers,
max_samples=1.0,
max_features=1.0)
self.ensemble = Ensemble()
self.combiner = Combiner(rule=combination_rule)
示例11
def __init__(self,
base_classifier=None,
n_classifiers=100,
combination_rule='majority_vote'):
self.base_classifier = base_classifier
self.n_classifiers = n_classifiers
# using the sklearn implementation of bagging for now
self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
n_estimators=n_classifiers,
max_samples=1.0,
max_features=1.0)
self.ensemble = Ensemble()
self.combiner = Combiner(rule=combination_rule)
示例12
def test_classification():
# Check classification for various parameter settings.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"max_features": [1, 2, 4],
"bootstrap": [True, False],
"bootstrap_features": [True, False]})
for base_estimator in [None,
DummyClassifier(),
Perceptron(tol=1e-3),
DecisionTreeClassifier(),
KNeighborsClassifier(),
SVC()]:
for params in grid:
BaggingClassifier(base_estimator=base_estimator,
random_state=rng,
**params).fit(X_train, y_train).predict(X_test)
示例13
def test_warm_start(random_state=42):
# Test if fitting incrementally with warm start gives a forest of the
# right size and the same results as a normal fit.
X, y = make_hastie_10_2(n_samples=20, random_state=1)
clf_ws = None
for n_estimators in [5, 10]:
if clf_ws is None:
clf_ws = BaggingClassifier(n_estimators=n_estimators,
random_state=random_state,
warm_start=True)
else:
clf_ws.set_params(n_estimators=n_estimators)
clf_ws.fit(X, y)
assert_equal(len(clf_ws), n_estimators)
clf_no_ws = BaggingClassifier(n_estimators=10, random_state=random_state,
warm_start=False)
clf_no_ws.fit(X, y)
assert_equal(set([tree.random_state for tree in clf_ws]),
set([tree.random_state for tree in clf_no_ws]))
示例14
def test_warm_start_equal_n_estimators():
# Test that nothing happens when fitting without increasing n_estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# modify X to nonsense values, this should not change anything
X_train += 1.
assert_warns_message(UserWarning,
"Warm-start fitting without increasing n_estimators does not",
clf.fit, X_train, y_train)
assert_array_equal(y_pred, clf.predict(X_test))
示例15
def test_warm_start_equivalence():
# warm started classifier with 5+5 estimators should be equivalent to
# one classifier with 10 estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf_ws = BaggingClassifier(n_estimators=5, warm_start=True,
random_state=3141)
clf_ws.fit(X_train, y_train)
clf_ws.set_params(n_estimators=10)
clf_ws.fit(X_train, y_train)
y1 = clf_ws.predict(X_test)
clf = BaggingClassifier(n_estimators=10, warm_start=False,
random_state=3141)
clf.fit(X_train, y_train)
y2 = clf.predict(X_test)
assert_array_almost_equal(y1, y2)
示例16
def test_base():
# Check BaseEnsemble methods.
ensemble = BaggingClassifier(
base_estimator=Perceptron(tol=1e-3, random_state=None), n_estimators=3)
iris = load_iris()
ensemble.fit(iris.data, iris.target)
ensemble.estimators_ = [] # empty the list and create estimators manually
ensemble._make_estimator()
random_state = np.random.RandomState(3)
ensemble._make_estimator(random_state=random_state)
ensemble._make_estimator(random_state=random_state)
ensemble._make_estimator(append=False)
assert_equal(3, len(ensemble))
assert_equal(3, len(ensemble.estimators_))
assert isinstance(ensemble[0], Perceptron)
assert_equal(ensemble[0].random_state, None)
assert isinstance(ensemble[1].random_state, int)
assert isinstance(ensemble[2].random_state, int)
assert_not_equal(ensemble[1].random_state, ensemble[2].random_state)
np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
n_estimators=np.int32(3))
np_int_ensemble.fit(iris.data, iris.target)
示例17
def test_base_zero_n_estimators():
# Check that instantiating a BaseEnsemble with n_estimators<=0 raises
# a ValueError.
ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
n_estimators=0)
iris = load_iris()
assert_raise_message(ValueError,
"n_estimators must be greater than zero, got 0.",
ensemble.fit, iris.data, iris.target)
示例18
def test_base_not_int_n_estimators():
# Check that instantiating a BaseEnsemble with a string as n_estimators
# raises a ValueError demanding n_estimators to be supplied as an integer.
string_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
n_estimators='3')
iris = load_iris()
assert_raise_message(ValueError,
"n_estimators must be an integer",
string_ensemble.fit, iris.data, iris.target)
float_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
n_estimators=3.0)
assert_raise_message(ValueError,
"n_estimators must be an integer",
float_ensemble.fit, iris.data, iris.target)
示例19
def test_oob_score_classification():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
for base_estimator in [DecisionTreeClassifier(), SVC(gamma="scale")]:
clf = BaggingClassifier(base_estimator=base_estimator,
n_estimators=100,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingClassifier(base_estimator=base_estimator,
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)
示例20
def test_error():
# Test that it gives proper exception on deficient input.
X, y = iris.data, iris.target
base = DecisionTreeClassifier()
# Test max_samples
assert_raises(ValueError,
BaggingClassifier(base, max_samples=-1).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_samples=0.0).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_samples=2.0).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_samples=1000).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_samples="foobar").fit, X, y)
# Test max_features
assert_raises(ValueError,
BaggingClassifier(base, max_features=-1).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_features=0.0).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_features=2.0).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_features=5).fit, X, y)
assert_raises(ValueError,
BaggingClassifier(base, max_features="foobar").fit, X, y)
# Test support of decision_function
assert not hasattr(BaggingClassifier(base).fit(X, y), 'decision_function')
示例21
def test_gridsearch():
# Check that bagging ensembles can be grid-searched.
# Transform iris into a binary classification task
X, y = iris.data, iris.target
y[y == 2] = 1
# Grid search with scoring based on decision_function
parameters = {'n_estimators': (1, 2),
'base_estimator__C': (1, 2)}
GridSearchCV(BaggingClassifier(SVC(gamma="scale")),
parameters,
scoring="roc_auc").fit(X, y)
示例22
def test_bagging_sample_weight_unsupported_but_passed():
estimator = BaggingClassifier(DummyZeroEstimator())
rng = check_random_state(0)
estimator.fit(iris.data, iris.target).predict(iris.data)
assert_raises(ValueError, estimator.fit, iris.data, iris.target,
sample_weight=rng.randint(10, size=(iris.data.shape[0])))
示例23
def test_warm_start_smaller_n_estimators():
# Test if warm start'ed second fit with smaller n_estimators raises error.
X, y = make_hastie_10_2(n_samples=20, random_state=1)
clf = BaggingClassifier(n_estimators=5, warm_start=True)
clf.fit(X, y)
clf.set_params(n_estimators=4)
assert_raises(ValueError, clf.fit, X, y)
示例24
def test_oob_score_removed_on_warm_start():
X, y = make_hastie_10_2(n_samples=2000, random_state=1)
clf = BaggingClassifier(n_estimators=50, oob_score=True)
clf.fit(X, y)
clf.set_params(warm_start=True, oob_score=False, n_estimators=100)
clf.fit(X, y)
assert_raises(AttributeError, getattr, clf, "oob_score_")
示例25
def test_oob_score_consistency():
# Make sure OOB scores are identical when random_state, estimator, and
# training data are fixed and fitting is done twice
X, y = make_hastie_10_2(n_samples=200, random_state=1)
bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5,
max_features=0.5, oob_score=True,
random_state=1)
assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)
示例26
def test_estimators_samples():
# Check that format of estimators_samples_ is correct and that results
# generated at fit time can be identically reproduced at a later time
# using data saved in object attributes.
X, y = make_hastie_10_2(n_samples=200, random_state=1)
bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
max_features=0.5, random_state=1,
bootstrap=False)
bagging.fit(X, y)
# Get relevant attributes
estimators_samples = bagging.estimators_samples_
estimators_features = bagging.estimators_features_
estimators = bagging.estimators_
# Test for correct formatting
assert_equal(len(estimators_samples), len(estimators))
assert_equal(len(estimators_samples[0]), len(X) // 2)
assert_equal(estimators_samples[0].dtype.kind, 'i')
# Re-fit single estimator to test for consistent sampling
estimator_index = 0
estimator_samples = estimators_samples[estimator_index]
estimator_features = estimators_features[estimator_index]
estimator = estimators[estimator_index]
X_train = (X[estimator_samples])[:, estimator_features]
y_train = y[estimator_samples]
orig_coefs = estimator.coef_
estimator.fit(X_train, y_train)
new_coefs = estimator.coef_
assert_array_almost_equal(orig_coefs, new_coefs)
示例27
def test_estimators_samples_deterministic():
# This test is a regression test to check that with a random step
# (e.g. SparseRandomProjection) and a given random state, the results
# generated at fit time can be identically reproduced at a later time using
# data saved in object attributes. Check issue #9524 for full discussion.
iris = load_iris()
X, y = iris.data, iris.target
base_pipeline = make_pipeline(SparseRandomProjection(n_components=2),
LogisticRegression())
clf = BaggingClassifier(base_estimator=base_pipeline,
max_samples=0.5,
random_state=0)
clf.fit(X, y)
pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy()
estimator = clf.estimators_[0]
estimator_sample = clf.estimators_samples_[0]
estimator_feature = clf.estimators_features_[0]
X_train = (X[estimator_sample])[:, estimator_feature]
y_train = y[estimator_sample]
estimator.fit(X_train, y_train)
assert_array_equal(estimator.steps[-1][1].coef_, pipeline_estimator_coef)
示例28
def test_max_samples_consistency():
# Make sure validated max_samples and original max_samples are identical
# when valid integer max_samples supplied by user
max_samples = 100
X, y = make_hastie_10_2(n_samples=2*max_samples, random_state=1)
bagging = BaggingClassifier(KNeighborsClassifier(),
max_samples=max_samples,
max_features=0.5, random_state=1)
bagging.fit(X, y)
assert_equal(bagging._max_samples, max_samples)
示例29
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
示例30
def test_bagging_small_max_features():
# Check that Bagging estimator can accept low fractional max_features
X = np.array([[1, 2], [3, 4]])
y = np.array([1, 0])
bagging = BaggingClassifier(LogisticRegression(),
max_features=0.3, random_state=1)
bagging.fit(X, y)