Python源码示例:sklearn.metrics.brier_score_loss()
示例1
def test_brier_score_loss():
# Check brier_score_loss function
y_true = np.array([0, 1, 1, 0, 1, 1])
y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95])
true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true)
assert_almost_equal(brier_score_loss(y_true, y_true), 0.0)
assert_almost_equal(brier_score_loss(y_true, y_pred), true_score)
assert_almost_equal(brier_score_loss(1. + y_true, y_pred),
true_score)
assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred),
true_score)
assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
# calculate even if only single class in y_true (#6980)
assert_almost_equal(brier_score_loss([0], [0.5]), 0.25)
assert_almost_equal(brier_score_loss([1], [0.5]), 0.25)
示例2
def test_brier_score_loss():
# Check brier_score_loss function
y_true = np.array([0, 1, 1, 0, 1, 1])
y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95])
true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true)
assert_almost_equal(brier_score_loss(y_true, y_true), 0.0)
assert_almost_equal(brier_score_loss(y_true, y_pred), true_score)
assert_almost_equal(brier_score_loss(1. + y_true, y_pred),
true_score)
assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred),
true_score)
assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
# ensure to raise an error for multiclass y_true
y_true = np.array([0, 1, 2, 0])
y_pred = np.array([0.8, 0.6, 0.4, 0.2])
error_message = ("Only binary classification is supported. Labels "
"in y_true: {}".format(np.array([0, 1, 2])))
assert_raise_message(ValueError, error_message, brier_score_loss,
y_true, y_pred)
# calculate correctly when there's only one class in y_true
assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([0], [0.4]), 0.16)
assert_almost_equal(brier_score_loss([1], [0.4]), 0.36)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16)
assert_almost_equal(
brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36)
示例3
def test_calibration_prefit():
"""Test calibration for prefitted classifiers"""
n_samples = 50
X, y = make_classification(n_samples=3 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
X -= X.min() # MultinomialNB only allows positive X
# split train and test
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_calib, y_calib, sw_calib = \
X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
sample_weight[n_samples:2 * n_samples]
X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]
# Naive-Bayes
clf = MultinomialNB()
clf.fit(X_train, y_train, sw_train)
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
# Naive Bayes with calibration
for this_X_calib, this_X_test in [(X_calib, X_test),
(sparse.csr_matrix(X_calib),
sparse.csr_matrix(X_test))]:
for method in ['isotonic', 'sigmoid']:
pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")
for sw in [sw_calib, None]:
pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
y_prob = pc_clf.predict_proba(this_X_test)
y_pred = pc_clf.predict(this_X_test)
prob_pos_pc_clf = y_prob[:, 1]
assert_array_equal(y_pred,
np.array([0, 1])[np.argmax(y_prob, axis=1)])
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss(y_test, prob_pos_pc_clf))
示例4
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
lb = LabelEncoder()
labels = lb.fit_transform(labels)
actual = lb.transform(actual)
return brier_score_loss(actual, predicted, sample_weight, pos_label=labels[1])
示例5
def __init__(self, scoring_method=None):
if scoring_method is None:
scoring_method = 'brier_score_loss'
self.scoring_method = scoring_method
if callable(scoring_method):
self.scoring_func = scoring_method
else:
self.scoring_func = scoring_name_function_map[scoring_method]
示例6
def score(self, estimator, X, y, advanced_scoring=False):
X, y = utils.drop_missing_y_vals(X, y, output_column=None)
if isinstance(estimator, GradientBoostingClassifier):
X = X.toarray()
predictions = estimator.predict_proba(X)
if self.scoring_method == 'brier_score_loss':
# At the moment, Microsoft's LightGBM returns probabilities > 1 and < 0, which can break some scoring functions. So we have to take the max of 1 and the pred, and the min of 0 and the pred.
probas = [max(min(row[1], 1), 0) for row in predictions]
predictions = probas
try:
score = self.scoring_func(y, predictions)
except ValueError as e:
bad_val_indices = []
for idx, val in enumerate(y):
if str(val) in bad_vals_as_strings:
bad_val_indices.append(idx)
predictions = [val for idx, val in enumerate(predictions) if idx not in bad_val_indices]
y = [val for idx, val in enumerate(y) if idx not in bad_val_indices]
print('Found ' + str(len(bad_val_indices)) + ' null or infinity values in the y values. We will ignore these, and report the score on the rest of the dataset')
try:
score = self.scoring_func(y, predictions)
except ValueError:
# Sometimes, particularly for a badly fit model using either too little data, or a really bad set of hyperparameters during a grid search, we can predict probas that are > 1 or < 0. We'll cap those here, while warning the user about them, because they're unlikely to occur in a model that's properly trained with enough data and reasonable params
predictions = self.clean_probas(predictions)
score = self.scoring_func(y, predictions)
if advanced_scoring:
return (-1 * score, predictions)
else:
return -1 * score
示例7
def calculate_brier_score_loss(actuals, probas):
return -1 * brier_score_loss(actuals, probas)
示例8
def calculate_brier_score_loss(actuals, probas):
return -1 * brier_score_loss(actuals, probas)
示例9
def __call__(self, y_true_proba, y_proba):
return brier_score_loss(y_true_proba, y_proba)
示例10
def __call__(self, y_true_proba, y_proba):
climo = np.ones(y_true_proba.size) * y_true_proba.mean()
bs = brier_score_loss(y_true_proba, y_proba)
bs_c = brier_score_loss(y_true_proba, climo)
return 1 - bs / bs_c
示例11
def test_calibration_prefit():
"""Test calibration for prefitted classifiers"""
n_samples = 50
X, y = make_classification(n_samples=3 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
X -= X.min() # MultinomialNB only allows positive X
# split train and test
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_calib, y_calib, sw_calib = \
X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
sample_weight[n_samples:2 * n_samples]
X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]
# Naive-Bayes
clf = MultinomialNB()
clf.fit(X_train, y_train, sw_train)
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
# Naive Bayes with calibration
for this_X_calib, this_X_test in [(X_calib, X_test),
(sparse.csr_matrix(X_calib),
sparse.csr_matrix(X_test))]:
for method in ['isotonic', 'sigmoid']:
pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")
for sw in [sw_calib, None]:
pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
y_prob = pc_clf.predict_proba(this_X_test)
y_pred = pc_clf.predict(this_X_test)
prob_pos_pc_clf = y_prob[:, 1]
assert_array_equal(y_pred,
np.array([0, 1])[np.argmax(y_prob, axis=1)])
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss(y_test, prob_pos_pc_clf))
示例12
def check_calibration(method):
# Adpated from sklearn/tests/test_calibration.py
# Authors: Alexandre Gramfort
# License: BSD 3 clause
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
X -= X.min() # MultinomialNB only allows positive X
# split train and test
X_train, y_train = X[:n_samples], y[:n_samples]
X_test, y_test = X[n_samples:], y[n_samples:]
# Naive-Bayes
clf = MultinomialNB().fit(X_train, y_train)
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
assert_raises(ValueError, pc_clf.fit, X, y)
pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
# Note that this fit overwrites the fit on the entire training set
pc_clf.fit(X_train, y_train)
prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1]
# Check that brier score has improved after calibration
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss(y_test, prob_pos_pc_clf))
# Check invariance against relabeling [0, 1] -> [1, 2]
pc_clf.fit(X_train, y_train + 1)
prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
assert_array_almost_equal(prob_pos_pc_clf,
prob_pos_pc_clf_relabeled)
# Check invariance against relabeling [0, 1] -> [-1, 1]
pc_clf.fit(X_train, 2 * y_train - 1)
prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
assert_array_almost_equal(prob_pos_pc_clf,
prob_pos_pc_clf_relabeled)
# Check invariance against relabeling [0, 1] -> [1, 0]
pc_clf.fit(X_train, (y_train + 1) % 2)
prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
if method == "sigmoid":
assert_array_almost_equal(prob_pos_pc_clf,
1 - prob_pos_pc_clf_relabeled)
else:
# Isotonic calibration is not invariant against relabeling
# but should improve in both cases
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss((y_test + 1) % 2,
prob_pos_pc_clf_relabeled))