Python源码示例:sklearn.ensemble.AdaBoostClassifier()
示例1
def buildModel(dataset, method, parameters):
"""
Build final model for predicting real testing data
"""
features = dataset.columns[0:-1]
if method == 'RNN':
clf = performRNNlass(dataset[features], dataset['UpDown'])
return clf
elif method == 'RF':
clf = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
elif method == 'KNN':
clf = neighbors.KNeighborsClassifier()
elif method == 'SVM':
c = parameters[0]
g = parameters[1]
clf = SVC(C=c, gamma=g)
elif method == 'ADA':
clf = AdaBoostClassifier()
return clf.fit(dataset[features], dataset['UpDown'])
示例2
def Train(data, modelcount, censhu, yanzhgdata):
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=censhu),
algorithm="SAMME",
n_estimators=modelcount, learning_rate=0.8)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = fmse(data[:, -1], train_out)[0]
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算f1度量
add_mse = fmse(yanzhgdata[:, -1], add_yan)[0]
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例3
def recspre(estrs, predata, datadict, zhe):
mo, ze = estrs.split('-')
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=int(ze)),
algorithm="SAMME",
n_estimators=int(mo), learning_rate=0.8)
model.fit(datadict[zhe]['train'][:, :-1], datadict[zhe]['train'][:, -1])
# 预测
yucede = model.predict(predata[:, :-1])
# 计算混淆矩阵
print(ConfuseMatrix(predata[:, -1], yucede))
return fmse(predata[:, -1], yucede)
# 主函数
示例4
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
示例5
def test_importances():
# Check variable importances.
X, y = datasets.make_classification(n_samples=2000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
shuffle=False,
random_state=1)
for alg in ['SAMME', 'SAMME.R']:
clf = AdaBoostClassifier(algorithm=alg)
clf.fit(X, y)
importances = clf.feature_importances_
assert_equal(importances.shape[0], 10)
assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
True)
示例6
def test_multidimensional_X():
"""
Check that the AdaBoost estimators can work with n-dimensional
data matrix
"""
from sklearn.dummy import DummyClassifier, DummyRegressor
rng = np.random.RandomState(0)
X = rng.randn(50, 3, 3)
yc = rng.choice([0, 1], 50)
yr = rng.randn(50)
boost = AdaBoostClassifier(DummyClassifier(strategy='most_frequent'))
boost.fit(X, yc)
boost.predict(X)
boost.predict_proba(X)
boost = AdaBoostRegressor(DummyRegressor())
boost.fit(X, yr)
boost.predict(X)
示例7
def __init__(self, classifier=FaceClassifierModels.DEFAULT):
self._clf = None
if classifier == FaceClassifierModels.LINEAR_SVM:
self._clf = SVC(C=1.0, kernel="linear", probability=True)
elif classifier == FaceClassifierModels.NAIVE_BAYES:
self._clf = GaussianNB()
elif classifier == FaceClassifierModels.RBF_SVM:
self._clf = SVC(C=1, kernel='rbf', probability=True, gamma=2)
elif classifier == FaceClassifierModels.NEAREST_NEIGHBORS:
self._clf = KNeighborsClassifier(1)
elif classifier == FaceClassifierModels.DECISION_TREE:
self._clf = DecisionTreeClassifier(max_depth=5)
elif classifier == FaceClassifierModels.RANDOM_FOREST:
self._clf = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
elif classifier == FaceClassifierModels.NEURAL_NET:
self._clf = MLPClassifier(alpha=1)
elif classifier == FaceClassifierModels.ADABOOST:
self._clf = AdaBoostClassifier()
elif classifier == FaceClassifierModels.QDA:
self._clf = QuadraticDiscriminantAnalysis()
print("classifier={}".format(FaceClassifierModels(classifier)))
示例8
def getModels():
result = []
result.append("LinearRegression")
result.append("BayesianRidge")
result.append("ARDRegression")
result.append("ElasticNet")
result.append("HuberRegressor")
result.append("Lasso")
result.append("LassoLars")
result.append("Rigid")
result.append("SGDRegressor")
result.append("SVR")
result.append("MLPClassifier")
result.append("KNeighborsClassifier")
result.append("SVC")
result.append("GaussianProcessClassifier")
result.append("DecisionTreeClassifier")
result.append("RandomForestClassifier")
result.append("AdaBoostClassifier")
result.append("GaussianNB")
result.append("LogisticRegression")
result.append("QuadraticDiscriminantAnalysis")
return result
示例9
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.20
loadDataset('../Dataset/med.data', split, trainingSet, testSet)
print('Train set: ' + repr(len(trainingSet)))
print('Test set: ' + repr(len(testSet)))
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData)
y = np.array(trainingSet)[:,columns]
clf = AdaBoostClassifier()
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData)
y_test = np.array(testSet)[:,columns]
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
示例10
def learn(x, y, test_x):
# set sample weight
weight_list = []
for j in range(len(y)):
if y[j] == "0":
weight_list.append(variables.weight_0_ada)
if y[j] == "1000":
weight_list.append(variables.weight_1000_ada)
if y[j] == "1500":
weight_list.append(variables.weight_1500_ada)
if y[j] == "2000":
weight_list.append(variables.weight_2000_ada)
clf = AdaBoostClassifier(n_estimators=variables.n_estimators_ada, learning_rate=variables.learning_rate_ada).fit(x,
y,
np.asarray(
weight_list))
prediction_list = clf.predict(test_x)
prediction_list_prob = clf.predict_proba(test_x)
return prediction_list, prediction_list_prob
示例11
def _train_adaboost(self, X, y):
# Define hyperparams.
# http://scikit-learn.org/stable/modules/ensemble.html#adaboost
self._get_or_set_hyperparam('base_estimator')
self._get_or_set_hyperparam('n_estimators')
self._get_or_set_hyperparam('learning_rate')
self._get_or_set_hyperparam('adaboost_algorithm')
self._get_or_set_hyperparam('n_jobs')
self._get_or_set_hyperparam('class_weight')
self._get_or_set_hyperparam('scoring')
# Build initial model.
self._model = AdaBoostClassifier(\
base_estimator=DecisionTreeClassifier(class_weight='balanced'),
n_estimators=self._hyperparams['n_estimators'],
learning_rate=self._hyperparams['learning_rate'],
algorithm=self._hyperparams['adaboost_algorithm'],
random_state=self._hyperparams['random_state']
)
# Tune hyperparams.
self._tune_hyperparams(self._hyperparam_search_space, X, y)
示例12
def test_ada_boost_classifier_samme_r(self):
model, X_test = fit_classification_model(AdaBoostClassifier(
n_estimators=10, algorithm="SAMME.R", random_state=42,
base_estimator=DecisionTreeClassifier(
max_depth=2, random_state=42)), 3)
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", FloatTensorType((None, X_test.shape[1])))],
target_opset=10
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMER",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例13
def test_ada_boost_classifier_samme_r_decision_function(self):
model, X_test = fit_classification_model(AdaBoostClassifier(
n_estimators=10, algorithm="SAMME.R", random_state=42,
base_estimator=DecisionTreeClassifier(
max_depth=2, random_state=42)), 4)
options = {id(model): {'raw_scores': True}}
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", FloatTensorType((None, X_test.shape[1])))],
target_opset=10,
options=options,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMERDecisionFunction",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
methods=['predict', 'decision_function'],
)
示例14
def test_ada_boost_classifier_samme_r_logreg(self):
model, X_test = fit_classification_model(AdaBoostClassifier(
n_estimators=5, algorithm="SAMME.R",
base_estimator=LogisticRegression(
solver='liblinear')), 4)
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", FloatTensorType((None, X_test.shape[1])))],
target_opset=10
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMERLogReg",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例15
def test_ada_boost_classifier_samme(self):
model, X_test = fit_classification_model(AdaBoostClassifier(
n_estimators=5, algorithm="SAMME", random_state=42,
base_estimator=DecisionTreeClassifier(
max_depth=6, random_state=42)), 2)
model_onnx = convert_sklearn(
model,
"AdaBoostClSamme",
[("input", FloatTensorType((None, X_test.shape[1])))],
target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMEDT",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"< StrictVersion('0.5.0')",
)
示例16
def test_ada_boost_classifier_samme_decision_function(self):
model, X_test = fit_classification_model(AdaBoostClassifier(
n_estimators=5, algorithm="SAMME", random_state=42,
base_estimator=DecisionTreeClassifier(
max_depth=6, random_state=42)), 2)
options = {id(model): {'raw_scores': True}}
model_onnx = convert_sklearn(
model,
"AdaBoostClSamme",
[("input", FloatTensorType((None, X_test.shape[1])))],
target_opset=10,
options=options,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierSAMMEDTDecisionFunction",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"< StrictVersion('0.5.0')",
methods=['predict', 'decision_function_binary'],
)
示例17
def test_ada_boost_classifier_bool(self):
model, X_test = fit_classification_model(
AdaBoostClassifier(random_state=42), 3,
is_bool=True)
model_onnx = convert_sklearn(
model,
"AdaBoost classification",
[("input", BooleanTensorType((None, X_test.shape[1])))],
target_opset=10,
)
self.assertIsNotNone(model_onnx)
dump_data_and_model(
X_test,
model,
model_onnx,
basename="SklearnAdaBoostClassifierBool",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例18
def adaboost_classifier(self, assign=True, base_estimator=DecisionTreeClassifier(), **kwargs):
"""
有监督学习分类器,实例化AdaBoostClassifier,默认使用:
AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, random_state=1)
通过**kwargs即关键字参数透传AdaBoostClassifier,即:
AdaBoostClassifier(**kwargs)
:param base_estimator: 默认使用DecisionTreeClassifier()
:param assign: 是否保存实例后的AdaBoostClassifier对象,默认True,self.clf = clf
:param kwargs: 有参数情况下初始化: AdaBoostClassifier(**kwargs)
无参数情况下初始化: AdaBoostClassifier(n_estimators=100, random_state=1)
:return: 实例化的AdaBoostClassifier对象
"""
if kwargs is not None and len(kwargs) > 0:
if 'base_estimator' not in kwargs:
kwargs['base_estimator'] = base_estimator
clf = AdaBoostClassifier(**kwargs)
else:
clf = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=100, random_state=1)
if assign:
self.clf = clf
return clf
示例19
def __init__(
self,data_block, predictors=[],cv_folds=10,
scoring_metric='accuracy',additional_display_metrics=[]):
base_classification.__init__(
self, alg=AdaBoostClassifier(), data_block=data_block,
predictors=predictors,cv_folds=cv_folds,
scoring_metric=scoring_metric,
additional_display_metrics=additional_display_metrics
)
self.model_output = pd.Series(self.default_parameters)
self.model_output['Feature_Importance'] = "-"
#Set parameters to default values:
self.set_parameters(set_default=True)
示例20
def __init__(self, isTrain, isOutlierRemoval):
super(ClassificationAdaBoost, self).__init__(isTrain, isOutlierRemoval)
# data preprocessing
self.dataPreprocessing()
self.dt_stump = DecisionTreeClassifier(max_depth=10)
self.ada = AdaBoostClassifier(
base_estimator=self.dt_stump,
learning_rate=1,
n_estimators=7,
algorithm="SAMME.R")
# self.dt_stump = DecisionTreeClassifier(max_depth=14)
# self.ada = AdaBoostClassifier(
# base_estimator=self.dt_stump,
# learning_rate=1,
# n_estimators=50,
# algorithm="SAMME")
示例21
def test_different_scorer():
X, y = make_classification(n_samples=100, random_state=42)
X_val, y_val = make_classification(n_samples=25, random_state=123)
pool = AdaBoostClassifier(n_estimators=10).fit(X, y)
performances = []
for clf in pool:
preds = clf.predict_proba(X_val)
performances.append(log_loss(y_val.ravel(), preds[:, -1]))
id_best = np.argsort(performances)
ss = StaticSelection(pool_classifiers=pool, scoring='neg_log_loss')
ss.fit(X_val, y_val)
assert (id_best[:ss.n_classifiers_ensemble_] == ss.clf_indices_).all()
# Test if static_selection can select the best classifier according to a
# metric that needs to be minimized.
示例22
def test_gridsearch():
# Check that base trees can be grid-searched.
# AdaBoost classification
boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier())
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2),
'algorithm': ('SAMME', 'SAMME.R')}
clf = GridSearchCV(boost, parameters)
clf.fit(iris.data, iris.target)
# AdaBoost regression
boost = AdaBoostRegressor(base_estimator=DecisionTreeRegressor(),
random_state=0)
parameters = {'n_estimators': (1, 2),
'base_estimator__max_depth': (1, 2)}
clf = GridSearchCV(boost, parameters)
clf.fit(boston.data, boston.target)
示例23
def test_importances():
# Check variable importances.
X, y = datasets.make_classification(n_samples=2000,
n_features=10,
n_informative=3,
n_redundant=0,
n_repeated=0,
shuffle=False,
random_state=1)
for alg in ['SAMME', 'SAMME.R']:
clf = AdaBoostClassifier(algorithm=alg)
clf.fit(X, y)
importances = clf.feature_importances_
assert_equal(importances.shape[0], 10)
assert_equal((importances[:3, np.newaxis] >= importances[3:]).all(),
True)
示例24
def define_clfs_params(self):
'''
Defines all relevant parameters and classes for classfier objects.
Edit these if you wish to change parameters.
'''
# These are the classifiers
self.clfs = {
'RF': RandomForestClassifier(n_estimators = 50, n_jobs = -1),
'ET': ExtraTreesClassifier(n_estimators = 10, n_jobs = -1, criterion = 'entropy'),
'AB': AdaBoostClassifier(DecisionTreeClassifier(max_depth = [1, 5, 10, 15]), algorithm = "SAMME", n_estimators = 200),
'LR': LogisticRegression(penalty = 'l1', C = 1e5),
'SVM': svm.SVC(kernel = 'linear', probability = True, random_state = 0),
'GB': GradientBoostingClassifier(learning_rate = 0.05, subsample = 0.5, max_depth = 6, n_estimators = 10),
'NB': GaussianNB(),
'DT': DecisionTreeClassifier(),
'SGD': SGDClassifier(loss = 'log', penalty = 'l2'),
'KNN': KNeighborsClassifier(n_neighbors = 3)
}
# These are the parameters which will be run through
self.params = {
'RF':{'n_estimators': [1,10,100,1000], 'max_depth': [10, 15,20,30,40,50,60,70,100], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'LR': {'penalty': ['l1','l2'], 'C': [0.00001,0.0001,0.001,0.01,0.1,1,10], 'random_state': [1]},
'SGD': {'loss': ['log'], 'penalty': ['l2','l1','elasticnet'], 'random_state': [1]},
'ET': {'n_estimators': [1,10,100,1000], 'criterion' : ['gini', 'entropy'], 'max_depth': [1,3,5,10,15], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'AB': {'algorithm': ['SAMME', 'SAMME.R'], 'n_estimators': [1,10,100,1000], 'random_state': [1]},
'GB': {'n_estimators': [1,10,100,1000], 'learning_rate' : [0.001,0.01,0.05,0.1,0.5],'subsample' : [0.1,0.5,1.0], 'max_depth': [1,3,5,10,20,50,100], 'random_state': [1]},
'NB': {},
'DT': {'criterion': ['gini', 'entropy'], 'max_depth': [1,2,15,20,30,40,50], 'max_features': ['sqrt','log2'],'min_samples_split': [2,5,10], 'random_state': [1]},
'SVM' :{'C' :[0.00001,0.0001,0.001,0.01,0.1,1,10],'kernel':['linear'], 'random_state': [1]},
'KNN' :{'n_neighbors': [1,5,10,25,50,100],'weights': ['uniform','distance'],'algorithm': ['auto','ball_tree','kd_tree']}
}
示例25
def performAdaBoostClass(X_train, y_train, X_test, y_test, parameters, savemodel):
"""
Ada Boosting binary Classification
"""
# n = parameters[0]
# l = parameters[1]
clf = AdaBoostClassifier()
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
return accuracy
示例26
def Adaboost_First(self, data, max_depth=5, n_estimators=300):
model = AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth),
algorithm="SAMME",
n_estimators=n_estimators, learning_rate=0.8)
model.fit(data['train'][:, :-1], data['train'][:, -1])
# 存储验证数据集结果和预测数据集结果的
# 训练数据集的预测结果
xul = model.predict(data['train'][:, :-1])
# 验证的预测结果
yanre = model.predict(data['test'][:, :-1])
# 预测的预测结果
prer = model.predict(data['predict'][:, :-1])
# 每计算一折后,要计算训练、验证、预测数据的误差
xx = self.F1(xul, data['train'][:, -1])
yy = self.F1(yanre, data['test'][:, -1])
pp = self.F1(prer, data['predict'][:, -1])
# 开始结合
self.yanzhneg_pr.append(yanre)
self.yanzhneg_real = data['test'][:, -1]
self.predi.append(prer)
self.preal = data['predict'][:, -1]
# 存储误差
self.error_dict['AdaBoost'] = [xx, yy, pp]
return print('1层中的AdaBoost运行完毕')
# GBDT
示例27
def __init__(self, **kwargs):
super(AdaBoost, self).__init__()
super(AdaBoost, self).SetModel(AdaBoostClassifier(random_state=RANDOM_SEED[CLASSIFIER_AB], **kwargs))
示例28
def test_oneclass_adaboost_proba():
# Test predict_proba robustness for one class label input.
# In response to issue #7501
# https://github.com/scikit-learn/scikit-learn/issues/7501
y_t = np.ones(len(X))
clf = AdaBoostClassifier().fit(X, y_t)
assert_array_almost_equal(clf.predict_proba(X), np.ones((len(X), 1)))
示例29
def test_classification_toy():
# Check classification on a toy dataset.
for alg in ['SAMME', 'SAMME.R']:
clf = AdaBoostClassifier(algorithm=alg, random_state=0)
clf.fit(X, y_class)
assert_array_equal(clf.predict(T), y_t_class)
assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
assert_equal(clf.predict_proba(T).shape, (len(T), 2))
assert_equal(clf.decision_function(T).shape, (len(T),))
示例30
def test_iris():
# Check consistency on dataset iris.
classes = np.unique(iris.target)
clf_samme = prob_samme = None
for alg in ['SAMME', 'SAMME.R']:
clf = AdaBoostClassifier(algorithm=alg)
clf.fit(iris.data, iris.target)
assert_array_equal(classes, clf.classes_)
proba = clf.predict_proba(iris.data)
if alg == "SAMME":
clf_samme = clf
prob_samme = proba
assert_equal(proba.shape[1], len(classes))
assert_equal(clf.decision_function(iris.data).shape[1], len(classes))
score = clf.score(iris.data, iris.target)
assert score > 0.9, "Failed with algorithm %s and score = %f" % \
(alg, score)
# Check we used multiple estimators
assert_greater(len(clf.estimators_), 1)
# Check for distinct random states (see issue #7408)
assert_equal(len(set(est.random_state for est in clf.estimators_)),
len(clf.estimators_))
# Somewhat hacky regression test: prior to
# ae7adc880d624615a34bafdb1d75ef67051b8200,
# predict_proba returned SAMME.R values for SAMME.
clf_samme.algorithm = "SAMME.R"
assert_array_less(0,
np.abs(clf_samme.predict_proba(iris.data) - prob_samme))