Python源码示例:sklearn.ensemble.AdaBoostRegressor()
示例1
def Train(data, modelcount, censhu, yanzhgdata):
model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
n_estimators=modelcount, learning_rate=0.8)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = mse(data[:, -1], train_out)
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算MSE
add_mse = mse(yanzhgdata[:, -1], add_yan)
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例2
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
示例3
def test_sample_weight_adaboost_regressor():
"""
AdaBoostRegressor should work without sample_weights in the base estimator
The random weighted sampling is done internally in the _boost method in
AdaBoostRegressor.
"""
class DummyEstimator(BaseEstimator):
def fit(self, X, y):
pass
def predict(self, X):
return np.zeros(X.shape[0])
boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
boost.fit(X, y_regr)
assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
示例4
def test_multidimensional_X():
"""
Check that the AdaBoost estimators can work with n-dimensional
data matrix
"""
from sklearn.dummy import DummyClassifier, DummyRegressor
rng = np.random.RandomState(0)
X = rng.randn(50, 3, 3)
yc = rng.choice([0, 1], 50)
yr = rng.randn(50)
boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
boost.fit(X, yc)
boost.predict(X)
boost.predict_proba(X)
boost = AdaBoostRegressor(DummyRegressor())
boost.fit(X, yr)
boost.predict(X)
示例5
def run_sklearn():
n_trees = 100
n_folds = 3
# https://www.analyticsvidhya.com/blog/2015/06/tuning-random-forest-model/
alg_list = [
['lreg',LinearRegression()],
['rforest',RandomForestRegressor(n_estimators=1000, n_jobs=-1, max_depth=3)],
['extree',ExtraTreesClassifier(n_estimators = 1000,max_depth=2)],
['adaboost',AdaBoostRegressor(base_estimator=None, n_estimators=600, learning_rate=1.0)],
['knn', sklearn.neighbors.KNeighborsRegressor(n_neighbors=5)]
]
start_time = time.time()
for name,alg in alg_list:
train = jhkaggle.train_sklearn.TrainSKLearn("1",name,alg,False)
train.run()
train = None
elapsed_time = time.time() - start_time
print("Elapsed time: {}".format(jhkaggle.util.hms_string(elapsed_time)))
示例6
def sample_1031_4():
"""
10.3.1_4 猪老三使用回归预测股价:使用集成学习算法预测股价AdaBoost与RandomForest
:return:
"""
train_x, train_y_regress, train_y_classification, pig_three_feature, \
test_x, test_y_regress, test_y_classification, kl_another_word_feature_test = sample_1031_1()
# AdaBoost
from sklearn.ensemble import AdaBoostRegressor
estimator = AdaBoostRegressor(n_estimators=100)
regress_process(estimator, train_x, train_y_regress, test_x,
test_y_regress)
plt.show()
# RandomForest
from sklearn.ensemble import RandomForestRegressor
estimator = RandomForestRegressor(n_estimators=100)
regress_process(estimator, train_x, train_y_regress, test_x, test_y_regress)
plt.show()
示例7
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
示例8
def test_sample_weight_adaboost_regressor():
"""
AdaBoostRegressor should work without sample_weights in the base estimator
The random weighted sampling is done internally in the _boost method in
AdaBoostRegressor.
"""
class DummyEstimator(BaseEstimator):
def fit(self, X, y):
pass
def predict(self, X):
return np.zeros(X.shape[0])
boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
boost.fit(X, y_regr)
assert_equal(len(boost.estimator_weights_), len(boost.estimator_errors_))
示例9
def Adaboost_First(self, data, max_depth=5, n_estimators=320):
model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=max_depth),
n_estimators=n_estimators, learning_rate=0.8)
model.fit(data['train'][:, :-1], data['train'][:, -1])
# 注意存储验证数据集结果和预测数据集结果的不同
# 训练数据集的预测结果
xul = model.predict(data['train'][:, :-1])
# 验证的预测结果
yanre = model.predict(data['test'][:, :-1])
# 预测的预测结果
prer = model.predict(data['predict'][:, :-1])
# 储存
self.yanzhneg_pr.append(yanre)
self.predi.append(prer)
# 分别计算训练、验证、预测的误差
# 每计算一折后,要计算训练、验证、预测数据的误差
xx = self.RMSE(xul, data['train'][:, -1])
yy = self.RMSE(yanre, data['test'][:, -1])
pp = self.RMSE(prer, data['predict'][:, -1])
# 储存误差
self.error_dict['AdaBoost'] = [xx, yy, pp]
# 验证数据集的真实输出结果
self.yanzhneg_real = data['test'][:, -1]
# 预测数据集的真实输出结果
self.preal = data['predict'][:, -1]
return print('1层中的AdaBoost运行完毕')
# GBDT
示例10
def recspre(exstr, predata, datadict, zhe, count=100):
tree, te = exstr.split('-')
model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=int(te)),
n_estimators=int(tree), learning_rate=0.8)
model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])
# 预测
yucede = model.predict(predata[:, :-1])
# 为了便于展示,选100条数据进行展示
zongleng = np.arange(len(yucede))
randomnum = np.random.choice(zongleng, count, replace=False)
yucede_se = list(np.array(yucede)[randomnum])
yuce_re = list(np.array(predata[:, -1])[randomnum])
# 对比
plt.figure(figsize=(17, 9))
plt.subplot(2, 1, 1)
plt.plot(list(range(len(yucede_se))), yucede_se, 'r--', label='预测', lw=2)
plt.scatter(list(range(len(yuce_re))), yuce_re, c='b', marker='.', label='真实', lw=2)
plt.xlim(-1, count + 1)
plt.legend()
plt.title('预测和真实值对比[最大树数%d]' % int(tree))
plt.subplot(2, 1, 2)
plt.plot(list(range(len(yucede_se))), np.array(yuce_re) - np.array(yucede_se), 'k--', marker='s', label='真实-预测', lw=2)
plt.legend()
plt.title('预测和真实值相对误差')
plt.savefig(r'C:\Users\GWT9\Desktop\duibi.jpg')
return '预测真实对比完毕'
# 主函数
示例11
def test_regression_toy():
# Check classification on a toy dataset.
clf = AdaBoostRegressor(random_state=0)
clf.fit(X, y_regr)
assert_array_equal(clf.predict(T), y_t_regr)
示例12
def test_boston():
# Check consistency on dataset boston house prices.
reg = AdaBoostRegressor(random_state=0)
reg.fit(boston.data, boston.target)
score = reg.score(boston.data, boston.target)
assert score > 0.85
# Check we used multiple estimators
assert len(reg.estimators_) > 1
# Check for distinct random states (see issue #7408)
assert_equal(len(set(est.random_state for est in reg.estimators_)),
len(reg.estimators_))
示例13
def test_pickle():
# Check pickability.
import pickle
# Adaboost classifier
for alg in ['SAMME', 'SAMME.R']:
obj = AdaBoostClassifier(algorithm=alg)
obj.fit(iris.data, iris.target)
score = obj.score(iris.data, iris.target)
s = pickle.dumps(obj)
obj2 = pickle.loads(s)
assert_equal(type(obj2), obj.__class__)
score2 = obj2.score(iris.data, iris.target)
assert_equal(score, score2)
# Adaboost regressor
obj = AdaBoostRegressor(random_state=0)
obj.fit(boston.data, boston.target)
score = obj.score(boston.data, boston.target)
s = pickle.dumps(obj)
obj2 = pickle.loads(s)
assert_equal(type(obj2), obj.__class__)
score2 = obj2.score(boston.data, boston.target)
assert_equal(score, score2)
示例14
def test_sample_weight_missing():
from sklearn.cluster import KMeans
clf = AdaBoostClassifier(KMeans(), algorithm="SAMME")
assert_raises(ValueError, clf.fit, X, y_regr)
clf = AdaBoostRegressor(KMeans())
assert_raises(ValueError, clf.fit, X, y_regr)
示例15
def setClf(self):
# min_samples_split = 3
self.clf = AdaBoostRegressor()
return
示例16
def __init__(self, options):
self.handle_options(options)
params = options.get('params', {})
out_params = convert_params(
params,
strs=['loss', 'max_features'],
floats=['learning_rate'],
ints=['n_estimators'],
)
self.estimator = _AdaBoostRegressor(**out_params)
示例17
def test_reg_engineer(env_boston_regression, hh_assets, opt_pro):
"""Demonstrate problem with `BayesianOptPro` specifically - same configuration is fine with all
other `OptPro`s"""
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor,
model_init_params=dict(),
feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
)
opt.go()
示例18
def test_reg_engineer_integer_ok(env_boston_regression, hh_assets, opt_pro):
"""Identical to `test_reg_engineer`, except `Integer` dimension added to show that everything is
fine now. Problem limited to not only `BayesianOptPro`, but also exclusively `Categorical`
search spaces"""
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor,
model_init_params=dict(n_estimators=Integer(10, 40)),
feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
)
opt.go()
示例19
def test_reg_engineer_categorical(env_boston_regression, hh_assets, opt_pro):
"""Demonstrate that `BayesianOptPro` breaks with multiple `Categorical`s when `FeatureEngineer`
is included in the dimensions"""
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor,
model_init_params=dict(loss=Categorical(["linear", "square", "exponential"])),
feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
)
opt.go()
示例20
def test_reg_engineer_categorical_integer_ok(env_boston_regression, hh_assets, opt_pro):
"""Identical to `test_reg_engineer_categorical`, except `Integer` added to demonstrate that all
`OptPro`s can optimize with `FeatureEngineer` if space is not exclusively `Categorical`"""
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor,
model_init_params=dict(
loss=Categorical(["linear", "square", "exponential"]), n_estimators=Integer(10, 40)
),
feature_engineer=FeatureEngineer([Categorical([standard_scale, min_max_scale, normalize])]),
)
opt.go()
示例21
def test_reg_categorical_ok(env_boston_regression, hh_assets, opt_pro):
"""Demonstrate that all `OptPro`s are fine with exclusively-`Categorical` space that doesn't
include `FeatureEngineer`"""
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor,
model_init_params=dict(loss=Categorical(["linear", "square", "exponential"])),
)
opt.go()
示例22
def test_reg_integer_ok(env_boston_regression, hh_assets, opt_pro):
opt = opt_pro(iterations=3, random_state=32, n_initial_points=1)
opt.forge_experiment(
model_initializer=AdaBoostRegressor, model_init_params=dict(n_estimators=Integer(10, 40))
)
opt.go()
示例23
def exp_lambda_cb(lambda_cbs):
"""Return a `CVExperiment` with `lambda_cbs` as `callbacks`
Parameters
----------
lambda_cbs: `LambdaCallback`, list of `LambdaCallback`, or None
LambdaCallback values passed to the `CVExperiment`'s `callbacks` kwarg"""
return CVExperiment(AdaBoostRegressor, callbacks=lambda_cbs)
##################################################
# Dummy LambdaCallbacks
##################################################
示例24
def get_ensemble_models():
rf = RandomForestRegressor(
n_estimators=51, min_samples_leaf=5, min_samples_split=3, random_state=42,
n_jobs=int(0.8*n_cores))
bag = BaggingRegressor(n_estimators=51, random_state=42, n_jobs=int(0.8*n_cores))
extra = ExtraTreesRegressor(n_estimators=71, random_state=42, n_jobs=int(0.8*n_cores))
ada = AdaBoostRegressor(random_state=42)
grad = GradientBoostingRegressor(n_estimators=101, random_state=42)
classifier_list = [rf, bag, extra, ada, grad]
classifier_name_list = ['Random Forests', 'Bagging',
'Extra Trees', 'AdaBoost', 'Gradient Boost']
return classifier_list, classifier_name_list
示例25
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.ensemble.AdaBoostClassifier,
ensemble.AdaBoostClassifier)
self.assertIs(df.ensemble.AdaBoostRegressor,
ensemble.AdaBoostRegressor)
self.assertIs(df.ensemble.BaggingClassifier,
ensemble.BaggingClassifier)
self.assertIs(df.ensemble.BaggingRegressor,
ensemble.BaggingRegressor)
self.assertIs(df.ensemble.ExtraTreesClassifier,
ensemble.ExtraTreesClassifier)
self.assertIs(df.ensemble.ExtraTreesRegressor,
ensemble.ExtraTreesRegressor)
self.assertIs(df.ensemble.GradientBoostingClassifier,
ensemble.GradientBoostingClassifier)
self.assertIs(df.ensemble.GradientBoostingRegressor,
ensemble.GradientBoostingRegressor)
self.assertIs(df.ensemble.IsolationForest,
ensemble.IsolationForest)
self.assertIs(df.ensemble.RandomForestClassifier,
ensemble.RandomForestClassifier)
self.assertIs(df.ensemble.RandomTreesEmbedding,
ensemble.RandomTreesEmbedding)
self.assertIs(df.ensemble.RandomForestRegressor,
ensemble.RandomForestRegressor)
self.assertIs(df.ensemble.VotingClassifier,
ensemble.VotingClassifier)
示例26
def adaboost_regressor(self, assign=True, base_estimator=DecisionTreeRegressor(), **kwargs):
"""
有监督学习回归器,实例化AdaBoostRegressor,默认使用:
AdaBoostRegressor(base_estimator=base_estimator, n_estimators=100, random_state=1)
通过**kwargs即关键字参数透传AdaBoostRegressor,即:
AdaBoostRegressor(**kwargs)
:param base_estimator: 默认使用DecisionTreeRegressor()
:param assign: 是否保存实例后的AdaBoostRegressor对象,默认True,self.reg = reg
:param kwargs: 有参数情况下初始化: AdaBoostRegressor(**kwargs)
无参数情况下初始化: AdaBoostRegressor(n_estimators=100, random_state=1)
:return: 实例化的AdaBoostRegressor对象
"""
if kwargs is not None and len(kwargs) > 0:
if 'base_estimator' not in kwargs:
kwargs['base_estimator'] = base_estimator
reg = AdaBoostRegressor(**kwargs)
else:
reg = AdaBoostRegressor(base_estimator=base_estimator, n_estimators=100, random_state=1)
if assign:
self.reg = reg
return reg
示例27
def adaboost_regressor_best(self, x, y, param_grid=None, assign=True, n_jobs=-1, show=True):
"""
寻找AdaBoostRegressor构造器的最优参数
上层AbuML中adaboost_regressor_best函数,直接使用AbuML中的x,y数据调用
eg:
adaboost_regressor_best无param_grid参数调用:
from abupy import AbuML, ml
ttn_abu = AbuML.create_test_more_fiter()
ttn_abu.adaboost_regressor_best()
adaboost_classifier_best有param_grid参数调用:
param_grid = {'learning_rate': np.arange(0.2, 1.2, 0.2), 'n_estimators': np.arange(10, 100, 10)}
ttn_abu.adaboost_regressor_best(param_grid=param_grid, n_jobs=-1)
out: AdaBoostRegressor(learning_rate=0.8, n_estimators=40)
:param x: 训练集x矩阵,numpy矩阵
:param y: 训练集y序列,numpy序列
:param param_grid: 最优字典关键字参数,
eg:param_grid = {'learning_rate': np.arange(0.2, 1.2, 0.2),
'n_estimators': np.arange(10, 100, 10)}
:param assign: 是否保存实例化后最优参数的学习器对象,默认True
:param n_jobs: 并行执行的进程任务数量,默认-1, 开启与cpu相同数量的进程数
:param show: 是否可视化最优参数搜索结果
:return: 通过最优参数构造的AdaBoostRegressor对象
"""
return self._estimators_prarms_best(self.adaboost_regressor, x, y, param_grid, assign, n_jobs, show)
示例28
def __init__(self, isTrain):
super(RegressionAdaBoost, self).__init__(isTrain)
# data preprocessing
#self.dataPreprocessing()
# Create AdaBoost regression object
decisionReg = DecisionTreeRegressor(max_depth=10)
rng = np.random.RandomState(1)
self.adaReg = AdaBoostRegressor(decisionReg,
n_estimators=400,
random_state=rng)
示例29
def test_regression_toy():
# Check classification on a toy dataset.
clf = AdaBoostRegressor(random_state=0)
clf.fit(X, y_regr)
assert_array_equal(clf.predict(T), y_t_regr)
示例30
def test_boston():
# Check consistency on dataset boston house prices.
reg = AdaBoostRegressor(random_state=0)
reg.fit(boston.data, boston.target)
score = reg.score(boston.data, boston.target)
assert score > 0.85
# Check we used multiple estimators
assert_true(len(reg.estimators_) > 1)
# Check for distinct random states (see issue #7408)
assert_equal(len(set(est.random_state for est in reg.estimators_)),
len(reg.estimators_))