Python源码示例:sklearn.ensemble.BaggingRegressor()
示例1
def test_regression():
# Check regression for various parameter settings.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"max_features": [0.5, 1.0],
"bootstrap": [True, False],
"bootstrap_features": [True, False]})
for base_estimator in [None,
DummyRegressor(),
DecisionTreeRegressor(),
KNeighborsRegressor(),
SVR(gamma='scale')]:
for params in grid:
BaggingRegressor(base_estimator=base_estimator,
random_state=rng,
**params).fit(X_train, y_train).predict(X_test)
示例2
def test_bootstrap_features():
# Test that bootstrapping features may generate duplicate features.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_features=1.0,
bootstrap_features=False,
random_state=rng).fit(X_train, y_train)
for features in ensemble.estimators_features_:
assert_equal(boston.data.shape[1], np.unique(features).shape[0])
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_features=1.0,
bootstrap_features=True,
random_state=rng).fit(X_train, y_train)
for features in ensemble.estimators_features_:
assert_greater(boston.data.shape[1], np.unique(features).shape[0])
示例3
def test_parallel_regression():
# Check parallel regression.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y2)
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=1,
random_state=0).fit(X_train, y_train)
y3 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y3)
示例4
def test_regression():
# Check regression for various parameter settings.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
grid = ParameterGrid({"max_samples": [0.5, 1.0],
"max_features": [0.5, 1.0],
"bootstrap": [True, False],
"bootstrap_features": [True, False]})
for base_estimator in [None,
DummyRegressor(),
DecisionTreeRegressor(),
KNeighborsRegressor(),
SVR()]:
for params in grid:
BaggingRegressor(base_estimator=base_estimator,
random_state=rng,
**params).fit(X_train, y_train).predict(X_test)
示例5
def test_bootstrap_features():
# Test that bootstrapping features may generate duplicate features.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_features=1.0,
bootstrap_features=False,
random_state=rng).fit(X_train, y_train)
for features in ensemble.estimators_features_:
assert_equal(boston.data.shape[1], np.unique(features).shape[0])
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_features=1.0,
bootstrap_features=True,
random_state=rng).fit(X_train, y_train)
for features in ensemble.estimators_features_:
assert_greater(boston.data.shape[1], np.unique(features).shape[0])
示例6
def test_parallel_regression():
# Check parallel regression.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y2)
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=1,
random_state=0).fit(X_train, y_train)
y3 = ensemble.predict(X_test)
assert_array_almost_equal(y1, y3)
示例7
def test_bootstrap_samples():
# Test that bootstrapping samples generate non-perfect base estimators.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
# without bootstrap, all trees are perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=False,
random_state=rng).fit(X_train, y_train)
assert_equal(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
# with bootstrap, trees are no longer perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=True,
random_state=rng).fit(X_train, y_train)
assert_greater(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
# check that each sampling correspond to a complete bootstrap resample.
# the size of each bootstrap should be the same as the input data but
# the data should be different (checked using the hash of the data).
ensemble = BaggingRegressor(base_estimator=DummySizeEstimator(),
bootstrap=True).fit(X_train, y_train)
training_hash = []
for estimator in ensemble.estimators_:
assert estimator.training_size_ == X_train.shape[0]
training_hash.append(estimator.training_hash_)
assert len(set(training_hash)) == len(training_hash)
示例8
def test_oob_score_regression():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=50,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)
示例9
def test_bagging_regressor_with_missing_inputs():
# Check that BaggingRegressor can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y_values = [
np.array([2, 3, 3, 3, 3]),
np.array([
[2, 1, 9],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8],
[3, 6, 8],
])
]
for y in y_values:
regressor = DecisionTreeRegressor()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
regressor
)
pipeline.fit(X, y).predict(X)
bagging_regressor = BaggingRegressor(pipeline)
y_hat = bagging_regressor.fit(X, y).predict(X)
assert_equal(y.shape, y_hat.shape)
# Verify that exceptions can be raised by wrapper regressor
regressor = DecisionTreeRegressor()
pipeline = make_pipeline(regressor)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_regressor = BaggingRegressor(pipeline)
assert_raises(ValueError, bagging_regressor.fit, X, y)
示例10
def fit(self):
"""Scale data and train the model with the indicated algorithm.
Do not forget to tune the hyperparameters.
Parameters
----------
algorithm : String,
"KernelRidge", "SVM", "LinearRegression", "Lasso", "ElasticNet", "NeuralNet", "BaggingNeuralNet", default = "SVM"
"""
self.X_scaler.fit(self.X_train)
self.Y_scaler.fit(self.y_train)
# scaling the data in all cases, it may not be used during the fit later
self.X_train_sc = self.X_scaler.transform(self.X_train)
self.y_train_sc = self.Y_scaler.transform(self.y_train)
self.X_test_sc = self.X_scaler.transform(self.X_test)
self.y_test_sc = self.Y_scaler.transform(self.y_test)
if self.algorithm == "KernelRidge":
clf_kr = KernelRidge(kernel=self.user_kernel)
self.model = sklearn.model_selection.GridSearchCV(clf_kr, cv=5, param_grid=self.param_kr)
elif self.algorithm == "SVM":
clf_svm = SVR(kernel=self.user_kernel)
self.model = sklearn.model_selection.GridSearchCV(clf_svm, cv=5, param_grid=self.param_svm)
elif self.algorithm == "Lasso":
clf_lasso = sklearn.linear_model.Lasso(alpha=0.1,random_state=self.rand_state)
self.model = sklearn.model_selection.GridSearchCV(clf_lasso, cv=5,
param_grid=dict(alpha=np.logspace(-5,5,30)))
elif self.algorithm == "ElasticNet":
clf_ElasticNet = sklearn.linear_model.ElasticNet(alpha=0.1, l1_ratio=0.5,random_state=self.rand_state)
self.model = sklearn.model_selection.GridSearchCV(clf_ElasticNet,cv=5,
param_grid=dict(alpha=np.logspace(-5,5,30)))
elif self.algorithm == "LinearRegression":
self.model = sklearn.linear_model.LinearRegression()
elif self.algorithm == "NeuralNet":
self.model = MLPRegressor(**self.param_neurons)
elif self.algorithm == "BaggingNeuralNet":
nn_m = MLPRegressor(**self.param_neurons)
self.model = BaggingRegressor(base_estimator = nn_m, **self.param_bag)
if self.scaling == True:
self.model.fit(self.X_train_sc, self.y_train_sc.reshape(-1,))
predict_train_sc = self.model.predict(self.X_train_sc)
self.prediction_train = self.Y_scaler.inverse_transform(predict_train_sc.reshape(-1,1))
predict_test_sc = self.model.predict(self.X_test_sc)
self.prediction_test = self.Y_scaler.inverse_transform(predict_test_sc.reshape(-1,1))
else:
self.model.fit(self.X_train, self.y_train.reshape(-1,))
self.prediction_train = self.model.predict(self.X_train)
self.prediction_test = self.model.predict(self.X_test)
示例11
def __init__(self, info, verbose=True, debug_mode=False):
self.label_num=info['label_num']
self.target_num=info['target_num']
self.task = info['task']
self.metric = info['metric']
self.postprocessor = None
#self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
if debug_mode>=2:
self.name = "RandomPredictor"
self.model = RandomPredictor(self.target_num)
self.predict_method = self.model.predict_proba
return
if info['task']=='regression':
if info['is_sparse']==True:
self.name = "BaggingRidgeRegressor"
self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingRegressor"
self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True)
self.predict_method = self.model.predict # Always predict probabilities
else:
if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
self.name = "RandomForestClassifier"
self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
elif info['is_sparse']:
self.name = "BaggingNBClassifier"
self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
else:
self.name = "GradientBoostingClassifier"
self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)")
if info['task']=='multilabel.classification':
self.model = MultiLabelEnsemble(self.model)
self.predict_method = self.model.predict_proba
示例12
def setClf(self):
# min_samples_split = 3
self.clf = BaggingRegressor(n_estimators = 100, max_samples =0.5, max_features =0.5, verbose = 100)
return
示例13
def __init__(self, options):
self.handle_options(options)
params = options.get('params', {})
out_params = convert_params(
params,
floats=['max_samples', 'max_features'],
bools=['bootstrap', 'bootstrap_features', 'oob_score', 'warm_start'],
ints=['n_estimators'],
)
self.estimator = _BaggingRegressor(**out_params)
示例14
def __init__(self, cols=None, predictors=None, base_estimator=None,
n_estimators=10, max_samples=1.0, max_features=1.0,
bootstrap=True, bootstrap_features=False, n_jobs=1,
random_state=None, verbose=0, tmp_fill=-999., as_df=True):
super(BaggedRegressorImputer, self).__init__(
imputer_class=BaggingRegressor, cols=cols, predictors=predictors,
base_estimator=base_estimator, n_estimators=n_estimators,
max_samples=max_samples, max_features=max_features,
bootstrap=bootstrap, bootstrap_features=bootstrap_features,
n_jobs=n_jobs, random_state=random_state, verbose=verbose,
tmp_fill=tmp_fill, as_df=as_df)
示例15
def get_ensemble_models():
rf = RandomForestRegressor(
n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42,
n_jobs=int(0.8*n_cores))
bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores))
extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores))
ada = AdaBoostRegressor(random_state=42)
grad = GradientBoostingRegressor(n_estimators=101, random_state=42)
classifier_list = [rf, bag, extra, ada, grad]
classifier_name_list = ['Random Forests', 'Bagging',
'Extra Trees', 'AdaBoost', 'Gradient Boost']
return classifier_list, classifier_name_list
示例16
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.ensemble.AdaBoostClassifier,
ensemble.AdaBoostClassifier)
self.assertIs(df.ensemble.AdaBoostRegressor,
ensemble.AdaBoostRegressor)
self.assertIs(df.ensemble.BaggingClassifier,
ensemble.BaggingClassifier)
self.assertIs(df.ensemble.BaggingRegressor,
ensemble.BaggingRegressor)
self.assertIs(df.ensemble.ExtraTreesClassifier,
ensemble.ExtraTreesClassifier)
self.assertIs(df.ensemble.ExtraTreesRegressor,
ensemble.ExtraTreesRegressor)
self.assertIs(df.ensemble.GradientBoostingClassifier,
ensemble.GradientBoostingClassifier)
self.assertIs(df.ensemble.GradientBoostingRegressor,
ensemble.GradientBoostingRegressor)
self.assertIs(df.ensemble.IsolationForest,
ensemble.IsolationForest)
self.assertIs(df.ensemble.RandomForestClassifier,
ensemble.RandomForestClassifier)
self.assertIs(df.ensemble.RandomTreesEmbedding,
ensemble.RandomTreesEmbedding)
self.assertIs(df.ensemble.RandomForestRegressor,
ensemble.RandomForestRegressor)
self.assertIs(df.ensemble.VotingClassifier,
ensemble.VotingClassifier)
示例17
def bagging_regressor(self, assign=True, base_estimator=DecisionTreeRegressor(), **kwargs):
"""
有监督学习回归器,实例化BaggingRegressor,默认使用:
BaggingRegressor(base_estimator=base_estimator, n_estimators=200,
bootstrap=True, oob_score=True, random_state=1)
通过**kwargs即关键字参数透传BaggingRegressor,即:
BaggingRegressor(**kwargs)
:param base_estimator: 默认使用DecisionTreeRegressor()
:param assign: 是否保存实例后的BaggingRegressor对象,默认True,self.reg = reg
:param kwargs: 有参数情况下初始化: BaggingRegressor(**kwargs)
无参数情况下初始化: BaggingRegressor(base_estimator=base_estimator, reg_core, n_estimators=200,
bootstrap=True, oob_score=True, random_state=1)
:return: 实例化的BaggingRegressor对象
"""
if kwargs is not None and len(kwargs) > 0:
if 'base_estimator' not in kwargs:
kwargs['base_estimator'] = base_estimator
reg = BaggingRegressor(**kwargs)
else:
reg = BaggingRegressor(base_estimator=base_estimator, n_estimators=200,
bootstrap=True, oob_score=True, random_state=1)
if assign:
self.reg = reg
return reg
示例18
def bagging_regressor_best(self, x, y, param_grid=None, assign=True, n_jobs=-1, show=True):
"""
寻找BaggingRegressor构造器的最优参数
上层AbuML中bagging_regressor_best函数,直接使用AbuML中的x,y数据调用
eg:
bagging_regressor_best无param_grid参数调用:
from abupy import AbuML, ml
ttn_abu = AbuML.create_test_more_fiter()
ttn_abu.bagging_regressor_best()
bagging_regressor_best有param_grid参数调用:
param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)}
ttn_abu.bagging_regressor_best(param_grid=param_grid, n_jobs=-1)
out: BaggingRegressor(max_samples=4, n_estimators=250)
:param x: 训练集x矩阵,numpy矩阵
:param y: 训练集y序列,numpy序列
:param param_grid: 最优字典关键字参数,
eg:param_grid = {'max_samples': np.arange(1, 5), 'n_estimators': np.arange(100, 300, 50)}
:param assign: 是否保存实例化后最优参数的学习器对象,默认True
:param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
:param show: 是否可视化最优参数搜索结果
:return: 通过最优参数构造的BaggingRegressor对象
"""
return self._estimators_prarms_best(self.bagging_regressor, x, y, param_grid, assign, n_jobs, show)
示例19
def test_bootstrap_samples():
# Test that bootstrapping samples generate non-perfect base estimators.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
base_estimator = DecisionTreeRegressor().fit(X_train, y_train)
# without bootstrap, all trees are perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=False,
random_state=rng).fit(X_train, y_train)
assert_equal(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
# with bootstrap, trees are no longer perfect on the training set
ensemble = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
max_samples=1.0,
bootstrap=True,
random_state=rng).fit(X_train, y_train)
assert_greater(base_estimator.score(X_train, y_train),
ensemble.score(X_train, y_train))
示例20
def test_oob_score_regression():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
clf = BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=50,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingRegressor(base_estimator=DecisionTreeRegressor(),
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)
示例21
def test_sparse_regression():
# Check regression for various parameter settings on sparse input.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
class CustomSVR(SVR):
"""SVC variant that records the nature of the training set"""
def fit(self, X, y):
super().fit(X, y)
self.data_type_ = type(X)
return self
parameter_sets = [
{"max_samples": 0.5,
"max_features": 2,
"bootstrap": True,
"bootstrap_features": True},
{"max_samples": 1.0,
"max_features": 4,
"bootstrap": True,
"bootstrap_features": True},
{"max_features": 2,
"bootstrap": False,
"bootstrap_features": True},
{"max_samples": 0.5,
"bootstrap": True,
"bootstrap_features": False},
]
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in parameter_sets:
# Trained on sparse format
sparse_classifier = BaggingRegressor(
base_estimator=CustomSVR(gamma='scale'),
random_state=1,
**params
).fit(X_train_sparse, y_train)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_results = BaggingRegressor(
base_estimator=CustomSVR(gamma='scale'),
random_state=1,
**params
).fit(X_train, y_train).predict(X_test)
sparse_type = type(X_train_sparse)
types = [i.data_type_ for i in sparse_classifier.estimators_]
assert_array_almost_equal(sparse_results, dense_results)
assert all([t == sparse_type for t in types])
assert_array_almost_equal(sparse_results, dense_results)
示例22
def test_base_estimator():
# Check base_estimator and its default values.
rng = check_random_state(0)
# Classification
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
ensemble = BaggingClassifier(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)
ensemble = BaggingClassifier(DecisionTreeClassifier(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, DecisionTreeClassifier)
ensemble = BaggingClassifier(Perceptron(tol=1e-3),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, Perceptron)
# Regression
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, DecisionTreeRegressor)
ensemble = BaggingRegressor(SVR(gamma='scale'),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert isinstance(ensemble.base_estimator_, SVR)
示例23
def lets_try(train, labels):
results = {}
def test_model(clf):
cv = KFold(n_splits=5, shuffle=True, random_state=45)
r2 = make_scorer(r2_score)
r2_val_score = cross_val_score(clf, train, labels, cv=cv, scoring=r2)
scores = [r2_val_score.mean()]
return scores
clf = linear_model.LinearRegression()
results["Linear"] = test_model(clf)
clf = linear_model.Ridge()
results["Ridge"] = test_model(clf)
clf = linear_model.BayesianRidge()
results["Bayesian Ridge"] = test_model(clf)
clf = linear_model.HuberRegressor()
results["Hubber"] = test_model(clf)
clf = linear_model.Lasso(alpha=1e-4)
results["Lasso"] = test_model(clf)
clf = BaggingRegressor()
results["Bagging"] = test_model(clf)
clf = RandomForestRegressor()
results["RandomForest"] = test_model(clf)
clf = AdaBoostRegressor()
results["AdaBoost"] = test_model(clf)
clf = svm.SVR()
results["SVM RBF"] = test_model(clf)
clf = svm.SVR(kernel="linear")
results["SVM Linear"] = test_model(clf)
results = pd.DataFrame.from_dict(results, orient='index')
results.columns = ["R Square Score"]
# results = results.sort(columns=["R Square Score"], ascending=False)
results.plot(kind="bar", title="Model Scores")
axes = plt.gca()
axes.set_ylim([0.5, 1])
return results
示例24
def test_sparse_regression():
# Check regression for various parameter settings on sparse input.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(boston.data[:50],
boston.target[:50],
random_state=rng)
class CustomSVR(SVR):
"""SVC variant that records the nature of the training set"""
def fit(self, X, y):
super(CustomSVR, self).fit(X, y)
self.data_type_ = type(X)
return self
parameter_sets = [
{"max_samples": 0.5,
"max_features": 2,
"bootstrap": True,
"bootstrap_features": True},
{"max_samples": 1.0,
"max_features": 4,
"bootstrap": True,
"bootstrap_features": True},
{"max_features": 2,
"bootstrap": False,
"bootstrap_features": True},
{"max_samples": 0.5,
"bootstrap": True,
"bootstrap_features": False},
]
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in parameter_sets:
# Trained on sparse format
sparse_classifier = BaggingRegressor(
base_estimator=CustomSVR(),
random_state=1,
**params
).fit(X_train_sparse, y_train)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_results = BaggingRegressor(
base_estimator=CustomSVR(),
random_state=1,
**params
).fit(X_train, y_train).predict(X_test)
sparse_type = type(X_train_sparse)
types = [i.data_type_ for i in sparse_classifier.estimators_]
assert_array_equal(sparse_results, dense_results)
assert all([t == sparse_type for t in types])
assert_array_equal(sparse_results, dense_results)
示例25
def test_base_estimator():
# Check base_estimator and its default values.
rng = check_random_state(0)
# Classification
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
ensemble = BaggingClassifier(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
ensemble = BaggingClassifier(DecisionTreeClassifier(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeClassifier))
ensemble = BaggingClassifier(Perceptron(tol=1e-3),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, Perceptron))
# Regression
X_train, X_test, y_train, y_test = train_test_split(boston.data,
boston.target,
random_state=rng)
ensemble = BaggingRegressor(None,
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
ensemble = BaggingRegressor(DecisionTreeRegressor(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, DecisionTreeRegressor))
ensemble = BaggingRegressor(SVR(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
assert_true(isinstance(ensemble.base_estimator_, SVR))