Python源码示例:sklearn.metrics.average_precision_score()
示例1
def plot_precision_recall_curve(y_true, y_score, size=None):
"""plot_precision_recall_curve."""
precision, recall, thresholds = precision_recall_curve(y_true, y_score)
if size is not None:
plt.figure(figsize=(size, size))
plt.axis('equal')
plt.plot(recall, precision, lw=2, color='navy')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([-0.05, 1.05])
plt.xlim([-0.05, 1.05])
plt.grid()
plt.title('Precision-Recall AUC={0:0.2f}'.format(average_precision_score(
y_true, y_score)))
示例2
def test(self, z, pos_edge_index, neg_edge_index):
r"""Given latent variables :obj:`z`, positive edges
:obj:`pos_edge_index` and negative edges :obj:`neg_edge_index`,
computes area under the ROC curve (AUC) and average precision (AP)
scores.
Args:
z (Tensor): The latent space :math:`\mathbf{Z}`.
pos_edge_index (LongTensor): The positive edges to evaluate
against.
neg_edge_index (LongTensor): The negative edges to evaluate
against.
"""
pos_y = z.new_ones(pos_edge_index.size(1))
neg_y = z.new_zeros(neg_edge_index.size(1))
y = torch.cat([pos_y, neg_y], dim=0)
pos_pred = self.decoder(z, pos_edge_index, sigmoid=True)
neg_pred = self.decoder(z, neg_edge_index, sigmoid=True)
pred = torch.cat([pos_pred, neg_pred], dim=0)
y, pred = y.detach().cpu().numpy(), pred.detach().cpu().numpy()
return roc_auc_score(y, pred), average_precision_score(y, pred)
示例3
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
score = model.evaluate(eval_data, eval_labels, verbose=0)
print("Test accuracy: " + str(score[1]))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr
示例4
def get_all_metrics_(eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, precision, recall, f1, average_precision, fpr, tpr
示例5
def test(data, model, epoch, args):
model.eval()
n_iters = 0
ap_sum = 0.0
progress_bar = tqdm(data)
for batch_idx, sample_batched in enumerate(progress_bar):
img, target = load_tensor_data(sample_batched, args.cuda, volatile=True)
output = model(img)
ap = average_precision_score(target.data, output.data)
n_iters += 1
ap_sum += ap
if batch_idx % args.log_interval == 0:
m_ap = ap_sum / n_iters
progress_bar.set_postfix(dict(AP='{:.2}'.format(m_ap)))
m_ap = ap_sum / n_iters
print('Test Epoch {}: Avg. Precision Score = {:.2};'.format(epoch, m_ap))
示例6
def calculate_scores(y_predicted, y_true):
"""
Function to calculate different performance scores
"""
accuracy = accuracy_score(y_pred=y_predicted, y_true=y_true)
precision = precision_score(y_pred=y_predicted, y_true=y_true)
average_precision_score1 = average_precision_score(y_score=y_predicted, y_true=y_true)
f1_score1 = f1_score(y_pred=y_predicted, y_true=y_true)
print("Accuracy score:", accuracy)
print("Precision score:", precision)
print("Average Precision score:", average_precision_score1)
print("F1 score:", f1_score1)
print("Outlier detection and/or treatment completed.")
return {"accuracy": accuracy,
"precision": precision,
"average_precision_score": average_precision_score1,
"f1_score": f1_score1,
}
示例7
def _average_precision_slow(y_true, y_score):
"""A second alternative implementation of average precision that closely
follows the Wikipedia article's definition (see References). This should
give identical results as `average_precision_score` for all inputs.
References
----------
.. [1] `Wikipedia entry for the Average precision
<https://en.wikipedia.org/wiki/Average_precision>`_
"""
precision, recall, threshold = precision_recall_curve(y_true, y_score)
precision = list(reversed(precision))
recall = list(reversed(recall))
average_precision = 0
for i in range(1, len(precision)):
average_precision += precision[i] * (recall[i] - recall[i - 1])
return average_precision
示例8
def test_average_precision_score_pos_label_errors():
# Raise an error when pos_label is not in binary y_true
y_true = np.array([0, 1])
y_pred = np.array([0, 1])
error_message = ("pos_label=2 is invalid. Set it to a label in y_true.")
assert_raise_message(ValueError, error_message, average_precision_score,
y_true, y_pred, pos_label=2)
# Raise an error for multilabel-indicator y_true with
# pos_label other than 1
y_true = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
y_pred = np.array([[0.9, 0.1], [0.1, 0.9], [0.8, 0.2], [0.2, 0.8]])
error_message = ("Parameter pos_label is fixed to 1 for multilabel"
"-indicator y_true. Do not set pos_label or set "
"pos_label to 1.")
assert_raise_message(ValueError, error_message, average_precision_score,
y_true, y_pred, pos_label=0)
示例9
def test_score_scale_invariance():
# Test that average_precision_score and roc_auc_score are invariant by
# the scaling or shifting of probabilities
# This test was expanded (added scaled_down) in response to github
# issue #3864 (and others), where overly aggressive rounding was causing
# problems for users with very small y_score values
y_true, _, probas_pred = make_prediction(binary=True)
roc_auc = roc_auc_score(y_true, probas_pred)
roc_auc_scaled_up = roc_auc_score(y_true, 100 * probas_pred)
roc_auc_scaled_down = roc_auc_score(y_true, 1e-6 * probas_pred)
roc_auc_shifted = roc_auc_score(y_true, probas_pred - 10)
assert_equal(roc_auc, roc_auc_scaled_up)
assert_equal(roc_auc, roc_auc_scaled_down)
assert_equal(roc_auc, roc_auc_shifted)
pr_auc = average_precision_score(y_true, probas_pred)
pr_auc_scaled_up = average_precision_score(y_true, 100 * probas_pred)
pr_auc_scaled_down = average_precision_score(y_true, 1e-6 * probas_pred)
pr_auc_shifted = average_precision_score(y_true, probas_pred - 10)
assert_equal(pr_auc, pr_auc_scaled_up)
assert_equal(pr_auc, pr_auc_scaled_down)
assert_equal(pr_auc, pr_auc_shifted)
示例10
def score_link_prediction(labels, scores):
"""
Calculates the area under the ROC curve and the average precision score.
Parameters
----------
labels : array-like, shape [N]
The ground truth labels
scores : array-like, shape [N]
The (unnormalized) scores of how likely are the instances
Returns
-------
roc_auc : float
Area under the ROC curve score
ap : float
Average precision score
"""
return roc_auc_score(labels, scores), average_precision_score(labels, scores)
示例11
def average_precision_compute_fn(y_preds, y_targets, mask, activation=None):
try:
from sklearn.metrics import average_precision_score
except ImportError:
raise RuntimeError("This contrib module requires sklearn to be installed.")
y_true = y_targets.numpy()
if activation is not None:
y_preds = activation(y_preds)
y_pred = y_preds.numpy()
if mask is not None:
y_true = y_true[:, mask]
y_pred = y_pred[:, mask]
return average_precision_score(y_true, y_pred)
示例12
def get_ap(self, q_name, sorted_idx):
rel = self.__relevants[q_name]
junk = self.__junk[q_name]
# construct ground-truth and scores:
y_scores = np.zeros(self.N_images)
y_true = np.zeros(self.N_images)
for e,i in enumerate(sorted_idx): y_scores[i] = self.N_images - e
for i in rel: y_true[i] = 1
# remove junk:
y_scores = np.delete(y_scores, junk)
y_true = np.delete(y_true, junk)
# compute ap:
return average_precision_score(y_true, y_scores)
示例13
def plot_PR_curve(classifier):
precision, recall, thresholds = precision_recall_curve(DataPrep.test_news['Label'], classifier)
average_precision = average_precision_score(DataPrep.test_news['Label'], classifier)
plt.step(recall, precision, color='b', alpha=0.2,
where='post')
plt.fill_between(recall, precision, step='post', alpha=0.2,
color='b')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('2-class Random Forest Precision-Recall curve: AP={0:0.2f}'.format(
average_precision))
示例14
def report_metrics(y_dset, y_pred, batch_size, dset='Val'):
# Print additional metrics involving predictions
n_rows = (y_dset.shape[0] / batch_size) * batch_size
y_true = y_dset[0:n_rows, :].flatten()
y_pred = y_pred.flatten()
val_ap = average_precision_score(y_true, y_pred)
val_roc = roc_auc_score(y_true, y_pred)
n = y_true.size
n_pos = y_true.sum()
idx_sorted = np.argsort(-y_pred)
val_rec = []
logging.info(dset + "-AP {:.6f}".format(val_ap))
logging.info(dset + "-ROC {:.6f}".format(val_roc))
for i, v in enumerate([10, 25, 50, 75, 100]):
tp = y_true[idx_sorted[:int(v * n / 100)]].sum()
val_rec.append(tp * 1.0 / n_pos)
logging.info(dset + "-R{} {:.6f}".format(v, val_rec[i]))
return val_ap, val_rec[2]
# ############################## Main program #################################
示例15
def evaluate_embedding_link_prediction(adj_matrix, node_pairs, embedding_matrix, norm=False):
"""Evaluate the node embeddings on the link prediction task.
:param adj_matrix: sp.csr_matrix, shape [n_nodes, n_nodes]
Adjacency matrix of the graph
:param node_pairs:
:param embedding_matrix: np.ndarray, shape [n_nodes, embedding_dim]
Embedding matrix
:param norm: bool
Whether to normalize the embeddings
:return: float, float
Average precision (AP) score and area under ROC curve (AUC) score
"""
if norm:
embedding_matrix = normalize(embedding_matrix)
true = adj_matrix[node_pairs[:, 0], node_pairs[:, 1]].A1
scores = (embedding_matrix[node_pairs[:, 0]] * embedding_matrix[node_pairs[:, 1]]).sum(1)
auc_score, ap_score = roc_auc_score(true, scores), average_precision_score(true, scores)
return auc_score, ap_score
示例16
def __call__(self, args, env):
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import average_precision_score
from sklearn.metrics import precision_recall_curve
from vergeml.plots import load_labels, load_predictions
try:
labels = load_labels(env)
except FileNotFoundError:
raise VergeMLError("Can't plot PR curve - not supported by model.")
nclasses = len(labels)
if args['class'] not in labels:
raise VergeMLError("Unknown class: " + args['class'])
try:
y_test, y_score = load_predictions(env, nclasses)
except FileNotFoundError:
raise VergeMLError("Can't plot PR curve - not supported by model.")
# From:
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py
ix = labels.index(args['class'])
y_test = y_test[:,ix].astype(np.int)
y_score = y_score[:,ix]
precision, recall, _ = precision_recall_curve(y_test, y_score)
average_precision = average_precision_score(y_test, y_score)
plt.step(recall, precision, color='b', alpha=0.2, where='post')
plt.fill_between(recall, precision, alpha=0.2, color='b', step='post')
plt.xlabel('Recall ({})'.format(args['class']))
plt.ylabel('Precision ({})'.format(args['class']))
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall curve for @{0}: AP={1:0.2f}'.format(args['@AI'], average_precision))
plt.show()
示例17
def estimate_predictive_performance(x_y,
estimator=None,
n_splits=10,
random_state=1):
"""estimate_predictive_performance."""
x, y = x_y
cv = ShuffleSplit(n_splits=n_splits,
test_size=0.3,
random_state=random_state)
scoring = make_scorer(average_precision_score)
scores = cross_val_score(estimator, x, y, cv=cv, scoring=scoring)
return scores
示例18
def perf(y_true, y_pred, y_score):
"""perf."""
print('Accuracy: %.2f' % accuracy_score(y_true, y_pred))
print(' AUC ROC: %.2f' % roc_auc_score(y_true, y_score))
print(' AUC AP: %.2f' % average_precision_score(y_true, y_score))
print()
print('Classification Report:')
print(classification_report(y_true, y_pred))
print()
plot_confusion_matrices(y_true, y_pred, size=int(len(set(y_true)) * 2.5))
print()
plot_aucs(y_true, y_score, size=10)
示例19
def estimate_model(positive_data_matrix=None,
negative_data_matrix=None,
target=None,
estimator=None,
n_jobs=4):
"""estimate_model."""
X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
negative_data_matrix=negative_data_matrix,
target=target)
logger.info('Test set')
logger.info(describe(X))
logger.info('-' * 80)
logger.info('Test Estimate')
predictions = estimator.predict(X)
margins = estimator.decision_function(X)
logger.info(classification_report(y, predictions))
apr = average_precision_score(y, margins)
logger.info('APR: %.3f' % apr)
roc = roc_auc_score(y, margins)
logger.info('ROC: %.3f' % roc)
logger.info('Cross-validated estimate')
scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
'average_precision', 'roc_auc']
for scoring in scoring_strings:
scores = cross_validation.cross_val_score(
estimator, X, y, cv=5,
scoring=scoring, n_jobs=n_jobs)
logger.info('%20s: %.3f +- %.3f' % (scoring,
np.mean(scores),
np.std(scores)))
return roc, apr
示例20
def report_evaluation_metrics(y_true, y_pred):
average_precision = average_precision_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)
recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)
f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
print('Precision: {0:0.2f}'.format(precision))
print('Recall: {0:0.2f}'.format(recall))
print('F1: {0:0.2f}'.format(f1))
示例21
def calculate_roc_pr(model, sequence, mask=-1, return_pred=False):
y_true = sequence.y
y_pred = model.predict_generator(sequence, use_multiprocessing=True, workers=6)
if y_true.ndim == 1:
val_roc = roc_auc_score(y_true, y_pred)
val_pr = average_precision_score(y_true, y_pred)
elif y_true.ndim == 2:
y_true = y_true.transpose()
y_pred = y_pred.transpose()
unmask_idx = [np.where(y != mask)[0] for y in y_true]
val_roc = [roc_auc_score(yt[idx], yp[idx]) for (yt, yp, idx) in zip(y_true, y_pred, unmask_idx)]
val_pr = [average_precision_score(yt[idx], yp[idx]) for (yt, yp, idx) in zip(y_true, y_pred, unmask_idx)]
val_roc = np.array(val_roc).mean()
val_pr = np.array(val_pr).mean()
y_pred = y_pred.transpose()
else:
raise ValueError("Unsupported output shape for auc calculation")
if return_pred:
return val_roc, val_pr, y_pred
else:
return val_roc, val_pr
示例22
def auprc(labels, scores):
ap = average_precision_score(labels, scores)
return ap
示例23
def map_sklearn(y_true, y_pred):
# """ Returns mAP """
n_classes = y_true.shape[1]
map = [average_precision_score(y_true[:, i], y_pred[:, i]) for i in range(n_classes)]
map = np.nan_to_num(map)
map = np.mean(map)
return map
示例24
def map_sklearn(y_true, y_pred):
# """ Returns mAP """
n_classes = y_true.shape[1]
map = [average_precision_score(y_true[:, i], y_pred[:, i]) for i in range(n_classes)]
map = np.nan_to_num(map)
map = np.mean(map)
return map
示例25
def average_precision_score(conditions, prediction_scores, average='micro',
sample_weight=None):
# average == [micro, macro, sampled, weidhted]
return metrics.average_precision_score(conditions, prediction_scores,
average=average,
sample_weight=sample_weight)
# if __name__ == '__main__':
# parser = argparse.ArgumentParser(
# description='This script trains and tests a model.')
# parser.add_argument('gold_standard', help='file containing gold standars')
# parser.add_argument(PREDICTIONS, help='file containing predictions')
# parser.add_argument('output_fp', help='output file')
# args = parser.parse_args()
#
# hdf5_data = h5py.File(args.gold_standard, 'r')
# split = hdf5_data[SPLIT].value
# column = hdf5_data['macros'].value
# hdf5_data.close()
# conditions = column[split == 2] # ground truth
#
# predictions = np.load(args.predictions)
#
# confusion_matrix = ConfusionMatrix(predictions, conditions)
#
# results = load_json(args.output_fp)
# results['confusion_matrix_stats'] = {
# 'confusion_matrix': confusion_matrix.cm.tolist(),
# 'overall_stats': confusion_matrix.stats(),
# 'per_class_stats': confusion_matrix.per_class_stats()
# }
# save_json(args.output_fp, results)
示例26
def compute_pr(y_test, probability_predictions):
"""
Compute Precision-Recall, thresholds and PR AUC.
Args:
y_test (list) : true label values corresponding to the predictions. Also length n.
probability_predictions (list) : predictions coming from an ML algorithm of length n.
Returns:
dict:
"""
_validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)
# Calculate PR
precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)
# get ideal cutoffs for suggestions (upper right or 1,1)
pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2
# To prevent the case where there are two points with the same minimum distance, return only the first
# np.where returns a tuple (we want the first element in the first array)
pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
best_precision = precisions[pr_index]
best_recall = recalls[pr_index]
ideal_pr_cutoff = pr_thresholds[pr_index]
return {'pr_auc': pr_auc,
'best_pr_cutoff': ideal_pr_cutoff,
'best_precision': best_precision,
'best_recall': best_recall,
'precisions': precisions,
'recalls': recalls,
'pr_thresholds': pr_thresholds}
示例27
def auprc(self, data, model, tt, name):
scores = self.get_predictions_loss(data, model, tt)[0]
labels = [prot["label"][:, 2] for prot in data[tt]]
close_count = 0
auprcs = []
for preds, lbls in zip(scores, labels):
if np.allclose(preds[:, 0], np.zeros_like(preds[:, 0]) + np.mean(preds[:, 0])):
close_count += 1
auprcs.append(average_precision_score(lbls, preds))
if close_count > 0:
printt("For {} proteins, all predicted scores are close to each other, auprc may be based on improper sorting".format(close_count))
med_auprc = np.median(auprcs)
printt("{} median auprc: {:0.3f}".format(name, med_auprc))
return ["auprc_med_" + tt], [med_auprc]
示例28
def get_stats_detection(logit, label, n_classes=52):
'''
Calculate the accuracy and average precisions.
'''
logit = to_numpy(logit)
label = to_numpy(label)
scores = softmax(logit, axis=1)
pred = np.argmax(logit, 1)
length = label.shape[0]
acc = np.sum(pred == label)/length
keep_bg = label == 0
acc_bg = np.sum(pred[keep_bg] == label[keep_bg])/label[keep_bg].shape[0]
ratio_bg = np.sum(keep_bg)/length
keep_action = label != 0
acc_action = np.sum(
pred[keep_action] == label[keep_action]) / label[keep_action].shape[0]
# Average precision
y_true = np.zeros((len(label), n_classes))
y_true[np.arange(len(label)), label] = 1
acc = np.sum(pred == label)/label.shape[0]
aps = average_precision_score(y_true, scores, average=None)
aps = list(filter(lambda x: not np.isnan(x), aps))
ap = np.mean(aps)
return ap, acc, acc_bg, acc_action, ratio_bg, pred, label
示例29
def test(opt, model, dataloader):
'''Test model.'''
# Logging
logger = logging.Logger(opt.load_ckpt_path, opt.split)
stats = logging.Statistics(opt.ckpt_path, opt.split)
logger.log(opt)
logits, labels = [], []
model.load(opt.load_ckpt_paths, opt.load_epoch)
for step, data in enumerate(dataloader, 1):
inputs, label = data
info_acc, logit = model.test(inputs, label)
logits.append(utils.to_numpy(logit.squeeze(0)))
labels.append(utils.to_numpy(label))
update = stats.update(label.size(0), info_acc)
if utils.is_due(step, opt.print_every):
utils.info('step {}/{}: {}'.format(step, len(dataloader), update))
logits = np.concatenate(logits, axis=0)
length, n_classes = logits.shape
labels = np.concatenate(labels)
scores = utils.softmax(logits, axis=1)
# Accuracy
preds = np.argmax(scores, axis=1)
acc = np.sum(preds == labels) / length
# Average precision
y_true = np.zeros((length, n_classes))
y_true[np.arange(length), labels] = 1
aps = average_precision_score(y_true, scores, average=None)
aps = list(filter(lambda x: not np.isnan(x), aps))
mAP = np.mean(aps)
logger.log('[Summary]: {}'.format(stats.summarize()))
logger.log('Acc: {}, mAP: {}'.format(acc, mAP))
示例30
def mean_ap_metric(predicts, targets):
predict = predicts[:, ~np.all(targets == 0, axis=0)]
target = targets[:, ~np.all(targets == 0, axis=0)]
mean_auc = 0
aps = [0]
try:
mean_auc = metrics.roc_auc_score(target, predict)
except ValueError:
print(
'The roc_auc curve requires a sufficient number of classes \
which are missing in this sample.'
)
try:
aps = metrics.average_precision_score(target, predict, average=None)
except ValueError:
print(
'Average precision requires a sufficient number of samples \
in a batch which are missing in this sample.'
)
mean_ap = np.mean(aps)
weights = np.sum(target.astype(float), axis=0)
weights /= np.sum(weights)
mean_wap = np.sum(np.multiply(aps, weights))
all_aps = np.zeros((1, targets.shape[1]))
all_aps[:, ~np.all(targets == 0, axis=0)] = aps
return mean_auc, mean_ap, mean_wap, all_aps.flatten()