Python源码示例:sklearn.metrics.recall_score()
示例1
def multi_class_classification(data_X,data_Y):
'''
calculate multi-class classification and return related evaluation metrics
'''
svc = svm.SVC(C=1, kernel='linear')
# X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0)
clf = svc.fit(data_X, data_Y) #svm
# array = svc.coef_
# print array
predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
print "accuracy",metrics.accuracy_score(data_Y, predicted)
print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro')
print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro')
print "precision score",metrics.precision_score(data_Y, predicted, average='macro')
print "recall score",metrics.recall_score(data_Y, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
print "classification_report", metrics.classification_report(data_Y, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
# print "log_loss", metrics.log_loss(data_Y, predicted)
print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
# print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
# print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)
示例2
def evaluation_analysis(true_label,predicted):
'''
return all metrics results
'''
print "accuracy",metrics.accuracy_score(true_label, predicted)
print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')
print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro')
print "precision score",metrics.precision_score(true_label, predicted, average='macro')
print "recall score",metrics.recall_score(true_label, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(true_label, predicted)
print "classification_report", metrics.classification_report(true_label, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
print "log_loss", metrics.log_loss(true_label, predicted)
print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)
示例3
def classification_scores(gts, preds, labels):
accuracy = metrics.accuracy_score(gts, preds)
class_accuracies = []
for lab in labels: # TODO Fix
class_accuracies.append(metrics.accuracy_score(gts[gts == lab], preds[gts == lab]))
class_accuracies = np.array(class_accuracies)
f1_micro = metrics.f1_score(gts, preds, average='micro')
precision_micro = metrics.precision_score(gts, preds, average='micro')
recall_micro = metrics.recall_score(gts, preds, average='micro')
f1_macro = metrics.f1_score(gts, preds, average='macro')
precision_macro = metrics.precision_score(gts, preds, average='macro')
recall_macro = metrics.recall_score(gts, preds, average='macro')
# class wise score
f1s = metrics.f1_score(gts, preds, average=None)
precisions = metrics.precision_score(gts, preds, average=None)
recalls = metrics.recall_score(gts, preds, average=None)
confusion = metrics.confusion_matrix(gts,preds, labels=labels)
#TODO confusion matrix, recall, precision
return accuracy, f1_micro, precision_micro, recall_micro, f1_macro, precision_macro, recall_macro, confusion, class_accuracies, f1s, precisions, recalls
示例4
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
score = model.evaluate(eval_data, eval_labels, verbose=0)
print("Test accuracy: " + str(score[1]))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr
示例5
def get_all_metrics_(eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, precision, recall, f1, average_precision, fpr, tpr
示例6
def run_evaluate(self, test):
"""Evaluates performance on test set
Args:
test: dataset that yields tuple of (sentences, relation tags)
Returns:
metrics: (dict) metrics["acc"] = 98.4, ...
"""
y_true, y_pred = [], []
for data in minibatches(test, self.config.batch_size):
word_batch, pos1_batch, pos2_batch, pos_batch, y_batch = data
relations_pred = self.predict_batch(word_batch, pos1_batch, pos2_batch, pos_batch)
assert len(relations_pred) == len(y_batch)
y_true += y_batch
y_pred += relations_pred.tolist()
acc = accuracy_score(y_true, y_pred)
p = precision_score(y_true, y_pred, average='macro')
r = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')
return {"acc":acc, "p":p, "r":r, "f1":f1}
示例7
def evaluate(trueValues, predicted, decimals, note):
print note
label = 1
avg = 'weighted'
a = accuracy_score(trueValues, predicted)
p = precision_score(trueValues, predicted, pos_label=label, average=avg)
r = recall_score(trueValues, predicted, pos_label=label, average=avg)
avg_f1 = f1_score(trueValues, predicted, pos_label=label, average=avg)
fclasses = f1_score(trueValues, predicted, average=None)
f1c1 = fclasses[0]; f1c2 = fclasses[1]
fw = (f1c1 + f1c2)/2.0
print 'accuracy:\t', str(round(a,decimals))
print 'precision:\t', str(round(p,decimals))
print 'recall:\t', str(round(r,decimals))
print 'avg f1:\t', str(round(avg_f1,decimals))
print 'c1 f1:\t', str(round(f1c1,decimals))
print 'c2 f1:\t', str(round(f1c2,decimals))
print 'avg(c1,c2):\t', str(round(fw,decimals))
print '------------'
###################################################################################
# split a parallel or comparable corpus into two parts
示例8
def accuracy(y_true, y_pred):
# 计算混淆矩阵
y = np.zeros(len(y_true))
y_ = np.zeros(len(y_true))
for i in range(len(y_true)):
y[i] = np.argmax(y_true[i,:])
y_[i] = np.argmax(y_pred[i,:])
cnf_mat = confusion_matrix(y, y_)
# Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
# Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
# Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
# # 绘制ROC曲线
# fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
# Auc = auc(fpr, tpr)
# 计算多分类评价值
Sens = recall_score(y, y_, average='macro')
Prec = precision_score(y, y_, average='macro')
F1 = f1_score(y, y_, average='weighted')
Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
return Sens, Prec, F1, cnf_mat
示例9
def test_precision_recall_f_ignored_labels():
# Test a subset of labels may be requested for PRF
y_true = [1, 1, 2, 3]
y_pred = [1, 3, 3, 3]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
recall_all = partial(recall_score, y_true, y_pred, labels=None)
assert_array_almost_equal([.5, 1.], recall_13(average=None))
assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
assert_almost_equal((.5 * 2 + 1. * 1) / 3,
recall_13(average='weighted'))
assert_almost_equal(2. / 3, recall_13(average='micro'))
# ensure the above were meaningful tests:
for average in ['macro', 'weighted', 'micro']:
assert_not_equal(recall_13(average=average),
recall_all(average=average))
示例10
def test_recall_warnings():
assert_no_warnings(recall_score,
np.array([[1, 1], [1, 1]]),
np.array([[0, 0], [0, 0]]),
average='micro')
clean_warning_registry()
with warnings.catch_warnings(record=True) as record:
warnings.simplefilter('always')
recall_score(np.array([[0, 0], [0, 0]]),
np.array([[1, 1], [1, 1]]),
average='micro')
assert_equal(str(record.pop().message),
'Recall is ill-defined and '
'being set to 0.0 due to no true samples.')
recall_score([0, 0], [0, 0])
assert_equal(str(record.pop().message),
'Recall is ill-defined and '
'being set to 0.0 due to no true samples.')
示例11
def test_prf_average_binary_data_non_binary():
# Error if user does not explicitly set non-binary average mode
y_true_mc = [1, 2, 3, 3]
y_pred_mc = [1, 2, 3, 1]
msg_mc = ("Target is multiclass but average='binary'. Please "
"choose another average setting, one of ["
"None, 'micro', 'macro', 'weighted'].")
y_true_ind = np.array([[0, 1, 1], [1, 0, 0], [0, 0, 1]])
y_pred_ind = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
msg_ind = ("Target is multilabel-indicator but average='binary'. Please "
"choose another average setting, one of ["
"None, 'micro', 'macro', 'weighted', 'samples'].")
for y_true, y_pred, msg in [
(y_true_mc, y_pred_mc, msg_mc),
(y_true_ind, y_pred_ind, msg_ind),
]:
for metric in [precision_score, recall_score, f1_score,
partial(fbeta_score, beta=2)]:
assert_raise_message(ValueError, msg,
metric, y_true, y_pred)
示例12
def test_grid_search_cv_results_multimetric():
X, y = make_classification(n_samples=50, n_features=4, random_state=42)
n_splits = 3
params = [dict(kernel=['rbf', ], C=[1, 10], gamma=[0.1, 1]),
dict(kernel=['poly', ], degree=[1, 2])]
for iid in (False, True):
grid_searches = []
for scoring in ({'accuracy': make_scorer(accuracy_score),
'recall': make_scorer(recall_score)},
'accuracy', 'recall'):
grid_search = GridSearchCV(SVC(gamma='scale'), cv=n_splits,
iid=iid, param_grid=params,
scoring=scoring, refit=False)
grid_search.fit(X, y)
assert_equal(grid_search.iid, iid)
grid_searches.append(grid_search)
compare_cv_results_multimetric_with_single(*grid_searches, iid=iid)
示例13
def test_ovr_multilabel_dataset():
base_clf = MultinomialNB(alpha=1)
for au, prec, recall in zip((True, False), (0.51, 0.66), (0.51, 0.80)):
X, Y = datasets.make_multilabel_classification(n_samples=100,
n_features=20,
n_classes=5,
n_labels=2,
length=50,
allow_unlabeled=au,
random_state=0)
X_train, Y_train = X[:80], Y[:80]
X_test, Y_test = X[80:], Y[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
Y_pred = clf.predict(X_test)
assert clf.multilabel_
assert_almost_equal(precision_score(Y_test, Y_pred, average="micro"),
prec,
decimal=2)
assert_almost_equal(recall_score(Y_test, Y_pred, average="micro"),
recall,
decimal=2)
示例14
def get_score(self, model, texta, textb, labels, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
assert texta.size(1) == textb.size(1) == len(labels)
vec_predict = model(texta, textb)
soft_predict = torch.softmax(vec_predict, dim=1)
predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
# print('prob', predict_prob)
# print('index', predict_index)
# print('labels', labels)
labels = labels.view(-1).cpu().data.numpy()
return metric_func(predict_index, labels, average='micro')
示例15
def get_score(self, model, texta, textb, labels, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
assert texta.size(1) == textb.size(1) == len(labels)
predict_prob = model(texta, textb)
# print('predict', predict_prob)
# print('labels', labels)
predict_labels = torch.gt(predict_prob, 0.5)
predict_labels = predict_labels.view(-1).cpu().data.numpy()
labels = labels.view(-1).cpu().data.numpy()
return metric_func(predict_labels, labels, average='micro')
示例16
def get_score(self, model, x, y, pos, rel, field_x, field_y, field_pos, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
vec_x = torch.tensor([field_x.stoi[i] for i in x])
len_vec_x = torch.tensor([len(vec_x)]).to(DEVICE)
vec_pos = torch.tensor([field_pos.stoi[i] for i in pos])
vec_rel = torch.tensor([int(x) for x in rel])
predict_y = model(vec_x.view(-1, 1).to(DEVICE), vec_pos.view(-1, 1).to(DEVICE), vec_rel.view(-1, 1).to(DEVICE),
len_vec_x)[0]
true_y = [field_y.stoi[i] for i in y]
assert len(true_y) == len(predict_y)
return metric_func(predict_y, true_y, average='micro')
示例17
def get_score(self, model, x, y, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
outputs, dep_graph, actions_done = model(x)
assert len(actions_done) == len(y)
predict_y = actions_done
true_y = y.cpu().view(-1).tolist()
# print(actions_done, y)
# print(actions_done)
# print(true_y)
return metric_func(predict_y, true_y, average='micro')
示例18
def get_score(self, model, texts, labels, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
assert texts.size(0) == len(labels)
vec_predict = model(texts)
soft_predict = torch.softmax(vec_predict, dim=1)
predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
# print('prob', predict_prob)
# print('index', predict_index)
# print('labels', labels)
labels = labels.view(-1).cpu().data.numpy()
return metric_func(predict_index, labels, average='micro')
示例19
def get_score(self, model, texts, labels, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
assert len(texts) == len(labels)
vec_predict = model(texts)
soft_predict = torch.softmax(vec_predict, dim=1)
predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
# print('prob', predict_prob)
# print('index', predict_index)
# print('labels', labels)
labels = labels.view(-1).cpu().data.numpy()
return metric_func(predict_index, labels, average='micro')
示例20
def get_score(self, model, src, src_lens, trg, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
output = model(src, src_lens, trg)
output = output[1:].contiguous()
output = output.view(-1, output.shape[-1])
trg = trg.transpose(1, 0)
trg = trg[1:].contiguous()
trg = trg.view(-1)
soft_predict = torch.softmax(output, dim=1)
predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
labels = trg.cpu().data.numpy()
return metric_func(predict_index, labels, average='micro')
示例21
def get_score(self, model, src, src_lens, trg, score_type='f1'):
metrics_map = {
'f1': f1_score,
'p': precision_score,
'r': recall_score,
'acc': accuracy_score
}
metric_func = metrics_map[score_type] if score_type in metrics_map else metrics_map['f1']
output = model(src, src_lens, trg)
output = output[1:].contiguous()
output = output.view(-1, output.shape[-1])
trg = trg.transpose(1, 0)
trg = trg[1:].contiguous()
trg = trg.view(-1)
soft_predict = torch.softmax(output, dim=1)
predict_prob, predict_index = torch.max(soft_predict.cpu().data, dim=1)
labels = trg.cpu().data.numpy()
return metric_func(predict_index, labels, average='micro')
示例22
def add_run(self, y_true, y_pred, domains_test):
"""
Add a completed run
:param domains_test:
:param y_true: true labels
:param y_pred: predicted labels
:return:
"""
log.verbose('Adding run.\ny_true: {!s}\ny_pred: {!s}'.format(y_true, y_pred))
self.ys.append((y_true, y_pred))
self.y_true = numpy.concatenate((self.y_true, y_true))
self.y_pred = numpy.concatenate((self.y_pred, y_pred))
self.cms.append(confusion_matrix(y_true, y_pred))
self.scores = {'accuracy': [accuracy_score(y_true, y_pred)],
'precision': [precision_score(y_true, y_pred)],
'recall': [recall_score(y_true, y_pred)],
'roc': [roc_auc_score(y_true, y_pred)],
'f1': [f1_score(y_true, y_pred)]}
for i in range(len(y_true)):
if y_true[i] != y_pred[i]:
self.missclassified.append((domains_test[i], y_true[i]))
示例23
def test_recall_op(generator_fn, y_true_all, y_pred_all, pos_indices,
average):
# Precision on the whole dataset
pr_sk = recall_score(
y_true_all, y_pred_all, pos_indices, average=average)
# Create Tensorflow graph
ds = tf.data.Dataset.from_generator(
generator_fn, (tf.int32, tf.int32), ([None], [None]))
y_true, y_pred = ds.make_one_shot_iterator().get_next()
pr_tf = tf_metrics.recall(y_true, y_pred, 4, pos_indices, average=average)
with tf.Session() as sess:
# Initialize and run the update op on each batch
sess.run(tf.local_variables_initializer())
while True:
try:
sess.run(pr_tf[1])
except OutOfRangeError as e:
break
# Check final value
assert np.allclose(sess.run(pr_tf[0]), pr_sk)
示例24
def calc_metrics(y_true, y_hat, max_steps=1000):
y_true = np.array(y_true)
y_hat = np.array(y_hat)
metrics = {}
metrics['Logloss'] = float(log_loss(y_true, y_hat))
metrics['AUC'] = roc_auc_score(y_true, y_hat)
metrics['F1'] = []
metrics['Precision'] = []
metrics['Recall'] = []
for i in range(1, max_steps):
threshold = float(i) / max_steps
y_tmp = y_hat > threshold
metrics['F1'].append(f1_score(y_true, y_tmp))
metrics['Precision'].append(precision_score(y_true, y_tmp))
metrics['Recall'].append(recall_score(y_true, y_tmp))
max_idx = np.argmax(metrics['F1'])
metrics['F1'] = metrics['F1'][max_idx]
metrics['Precision'] = metrics['Precision'][max_idx]
metrics['Recall'] = metrics['Recall'][max_idx]
metrics['Threshold'] = float(max_idx + 1) / max_steps
return metrics
示例25
def evaluate_precision_recall(y, target, labels):
import sklearn.metrics as metrics
target = target[:len(y)]
num_classes = max(target) + 1
results = []
for i in range(num_classes):
class_target = _extract_single_class(i, target)
class_y = _extract_single_class(i, y)
results.append({
'precision': metrics.precision_score(class_target, class_y),
'recall': metrics.recall_score(class_target, class_y),
'f1': metrics.f1_score(class_target, class_y),
'fraction': sum(class_target)/len(target),
'#of_class': int(sum(class_target)),
'label': labels[i],
'label_id': i
# 'tp': tp
})
print('%d/%d' % (i, num_classes), results[-1])
accuracy = metrics.accuracy_score(target, y)
return accuracy, results
示例26
def report_evaluation_metrics(y_true, y_pred):
average_precision = average_precision_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, labels=[0, 1], pos_label=1)
recall = recall_score(y_true, y_pred, labels=[0, 1], pos_label=1)
f1 = f1_score(y_true, y_pred, labels=[0, 1], pos_label=1)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
print('Precision: {0:0.2f}'.format(precision))
print('Recall: {0:0.2f}'.format(recall))
print('F1: {0:0.2f}'.format(f1))
示例27
def report_metrics(self, threshold):
for average_strategy in ["micro", "macro"]:
print("{} average strategy, threshold {}".format(average_strategy, threshold))
print("precision:\t{}".format(precision_score(self.y_true, self.y_pred, average=average_strategy)))
print("recall:\t{}".format(recall_score(self.y_true, self.y_pred, average=average_strategy)))
print("f1:\t{}".format(f1_score(self.y_true, self.y_pred, average=average_strategy)))
示例28
def precision_and_recall(label_gt, label_pred, n_class):
from sklearn.metrics import precision_score, recall_score
assert len(label_gt) == len(label_pred)
precision = np.zeros(n_class, dtype=np.float32)
recall = np.zeros(n_class, dtype=np.float32)
img_A = np.array(label_gt, dtype=np.float32).flatten()
img_B = np.array(label_pred, dtype=np.float32).flatten()
precision[:] = precision_score(img_A, img_B, average=None, labels=range(n_class))
recall[:] = recall_score(img_A, img_B, average=None, labels=range(n_class))
return precision, recall
示例29
def recall_at_threshold(self, y_true, y_scores, threshold):
'''
Dyanamic threshold recall.
'''
y_pred = np.asarray([1 if i >= threshold else 0 for i in y_scores])
return metrics.recall_score(y_true, y_pred)
示例30
def recall_at_k(self, y_true, y_scores, k):
'''
Dynamic k recall, where 0<k<1.
'''
y_pred = self.k_predictions(y_scores, k)
return metrics.recall_score(y_true, y_pred)