Python源码示例:sklearn.metrics.precision_recall_fscore_support()
示例1
def accuracy(y_true, y_pred):
# 计算混淆矩阵
y = np.zeros(len(y_true))
y_ = np.zeros(len(y_true))
for i in range(len(y_true)):
y[i] = np.argmax(y_true[i,:])
y_[i] = np.argmax(y_pred[i,:])
cnf_mat = confusion_matrix(y, y_)
# Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
# Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
# Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
# # 绘制ROC曲线
# fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
# Auc = auc(fpr, tpr)
# 计算多分类评价值
Sens = recall_score(y, y_, average='macro')
Prec = precision_score(y, y_, average='macro')
F1 = f1_score(y, y_, average='weighted')
Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
return Sens, Prec, F1, cnf_mat
示例2
def calc_test_result(result, test_label, test_mask):
true_label=[]
predicted_label=[]
for i in range(result.shape[0]):
for j in range(result.shape[1]):
if test_mask[i,j]==1:
true_label.append(np.argmax(test_label[i,j] ))
predicted_label.append(np.argmax(result[i,j] ))
print("Confusion Matrix :")
print(confusion_matrix(true_label, predicted_label))
print("Classification Report :")
print(classification_report(true_label, predicted_label,digits=4))
print("Accuracy ", accuracy_score(true_label, predicted_label))
print("Macro Classification Report :")
print(precision_recall_fscore_support(true_label, predicted_label,average='macro'))
print("Weighted Classification Report :")
print(precision_recall_fscore_support(true_label, predicted_label,average='weighted'))
#print "Normal Classification Report :"
#print precision_recall_fscore_support(true_label, predicted_label)
示例3
def test_precision_recall_f1_score_binary_averaged():
y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1])
y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])
# compute scores with default labels introspection
ps, rs, fs, _ = precision_recall_fscore_support(y_true, y_pred,
average=None)
p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
average='macro')
assert_equal(p, np.mean(ps))
assert_equal(r, np.mean(rs))
assert_equal(f, np.mean(fs))
p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
average='weighted')
support = np.bincount(y_true)
assert_equal(p, np.average(ps, weights=support))
assert_equal(r, np.average(rs, weights=support))
assert_equal(f, np.average(fs, weights=support))
示例4
def test_precision_recall_f1_no_labels(beta, average):
y_true = np.zeros((20, 3))
y_pred = np.zeros_like(y_true)
p, r, f, s = assert_warns(UndefinedMetricWarning,
precision_recall_fscore_support,
y_true, y_pred, average=average,
beta=beta)
assert_almost_equal(p, 0)
assert_almost_equal(r, 0)
assert_almost_equal(f, 0)
assert_equal(s, None)
fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
y_true, y_pred,
beta=beta, average=average)
assert_almost_equal(fbeta, 0)
示例5
def test_precision_recall_f1_no_labels_average_none():
y_true = np.zeros((20, 3))
y_pred = np.zeros_like(y_true)
beta = 1
# tp = [0, 0, 0]
# fn = [0, 0, 0]
# fp = [0, 0, 0]
# support = [0, 0, 0]
# |y_hat_i inter y_i | = [0, 0, 0]
# |y_i| = [0, 0, 0]
# |y_hat_i| = [0, 0, 0]
p, r, f, s = assert_warns(UndefinedMetricWarning,
precision_recall_fscore_support,
y_true, y_pred, average=None, beta=beta)
assert_array_almost_equal(p, [0, 0, 0], 2)
assert_array_almost_equal(r, [0, 0, 0], 2)
assert_array_almost_equal(f, [0, 0, 0], 2)
assert_array_almost_equal(s, [0, 0, 0], 2)
fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
y_true, y_pred, beta=beta, average=None)
assert_array_almost_equal(fbeta, [0, 0, 0], 2)
示例6
def test_fbeta_multiclass_with_weighted_average(self, device: str):
self.predictions = self.predictions.to(device)
self.targets = self.targets.to(device)
labels = [0, 1]
fbeta = FBetaMeasure(average="weighted", labels=labels)
fbeta(self.predictions, self.targets)
metric = fbeta.get_metric()
precisions = metric["precision"]
recalls = metric["recall"]
fscores = metric["fscore"]
weighted_precision, weighted_recall, weighted_fscore, _ = precision_recall_fscore_support(
self.targets.cpu().numpy(),
self.predictions.argmax(dim=1).cpu().numpy(),
labels=labels,
average="weighted",
)
# check value
assert_allclose(precisions, weighted_precision)
assert_allclose(recalls, weighted_recall)
assert_allclose(fscores, weighted_fscore)
示例7
def print_result(fold, y, y_predicted, id_class_mapping):
""" print result matrix """
n_classes = len(np.unique(y))
p, r, f, s = precision_recall_fscore_support(y, y_predicted, labels=None, pos_label=1, average=None)
a = [(accuracy_score(y[y == c], y_predicted[y == c])) for c in xrange(n_classes)]
# count occurrences of classes
count = Counter(y)
print("\n")
if fold is not None:
print("Results on fold %d" % fold)
print("\n")
print("%30s | %s | %5s | %4s | %4s | %4s |" % ("LABEL", "CNT", "ACC ", "PR ", "RE ", "F1 "))
print('-' * 70)
for c in xrange(n_classes):
print("%30s | %03d | %0.3f | %.2f | %.2f | %.3f |" % (id_class_mapping[c], count[c], a[c], p[c], r[c], f[c]))
print('-' * 70)
print("%30s | %03d | %0.3f | %.2f | %.2f | %.3f |" % ('average', len(y), np.mean(a), np.mean(p), np.mean(r), np.mean(f)))
print('=' * 70)
print("Overall Accuracy: %.3f %%" % (100.0 * accuracy_score(y, y_predicted)))
print('=' * 70)
示例8
def _update_onco_metrics(self, y_true, y_pred, prob):
self.onco_gene_pred = pd.Series(y_pred, self.y.index)
self.onco_gene_score = pd.Series(prob, self.y.index)
# compute metrics for classification
self.onco_gene_count[self.num_pred] = sum(y_pred)
prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
self.onco_precision[self.num_pred] = prec[self.onco_num]
self.onco_recall[self.num_pred] = recall[self.onco_num]
self.onco_f1_score[self.num_pred] = fscore[self.onco_num]
self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
self.num_pred + 1, str(prec), str(recall), str(fscore)))
# compute ROC curve metrics
fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr)
#self.onco_mean_tpr[0] = 0.0
# compute Precision-Recall curve metrics
p, r, thresh = metrics.precision_recall_curve(y_true, prob)
p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results
thresh = np.insert(thresh, 0, 1.0)
self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p)
self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh)
示例9
def _update_tsg_metrics(self, y_true, y_pred, prob):
self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
self.tsg_gene_score = pd.Series(prob, self.y.index)
# compute metrics for classification
self.tsg_gene_count[self.num_pred] = sum(y_pred)
prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
tsg_col = 1 # column for metrics relate to tsg
self.tsg_precision[self.num_pred] = prec[tsg_col]
self.tsg_recall[self.num_pred] = recall[tsg_col]
self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
self.num_pred + 1, str(prec), str(recall), str(fscore)))
# compute ROC curve metrics
fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
#self.tsg_tpr_array[0] = 0.0
# compute Precision-Recall curve metrics
p, r, thresh = metrics.precision_recall_curve(y_true, prob)
p, r, thresh = p[::-1], r[::-1], thresh[::-1] # reverse order of results
self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p)
示例10
def metrics(y_pred, y_true):
""" Calucate evaluation metrics for precision, recall, and f1.
Arguments
---------
y_pred: ndarry, the predicted result list
y_true: ndarray, the ground truth label list
Returns
-------
precision: float, precision value
recall: float, recall value
f1: float, f1 measure value
"""
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
return precision, recall, f1
示例11
def validation(classifier, data, y_data, y_target, class_names, title):
#kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
#cv = kfold
t = 'Confusion matrix: '+str(title)
x = np.transpose(data)
if (classifier == None):
print ("No accuracy to be computed")
else:
accuracy = model_selection.cross_val_score(classifier, x, y_target, scoring='accuracy')
print("Accuracy: "+ str(accuracy))
#precision = model_selection.cross_val_score(self.classifier, x, target, scoring='precision')
#precision_score(y_true, y_pred, average='macro')
#recall = model_selection.cross_val_score(self.classifier, x, target, scoring='recall')
precision, recall, fscore, m = precision_recall_fscore_support(y_target, y_data, average='macro')
cnf_matrix = confusion_matrix(y_target, y_data)
print("Precision: " +str(precision) +", Recall:" +str(recall) + ", f-score:" +str(fscore))
np.set_printoptions(precision=2)
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=class_names, title=t)
print ("... finishing matrix plot")
plt.show()
示例12
def evaluate(model, data_iterator, num_steps, metric_labels):
"""Evaluate the model on `num_steps` batches."""
# set model to evaluation mode
model.eval()
output_labels = list()
target_labels = list()
# compute metrics over the dataset
for _ in range(num_steps):
# fetch the next evaluation batch
batch_data, batch_labels = next(data_iterator)
# compute model output
batch_output = model(batch_data) # batch_size x num_labels
batch_output_labels = torch.max(batch_output, dim=1)[1]
output_labels.extend(batch_output_labels.data.cpu().numpy().tolist())
target_labels.extend(batch_labels.data.cpu().numpy().tolist())
# Calculate precision, recall and F1 for all relation categories
p_r_f1_s = precision_recall_fscore_support(target_labels, output_labels, labels=metric_labels, average='micro')
p_r_f1 = {'precison': p_r_f1_s[0] * 100,
'recall': p_r_f1_s[1] * 100,
'f1': p_r_f1_s[2] * 100}
return p_r_f1
示例13
def summary_util(self, type):
if type == "test":
Y_hat = self.model.predict(self.X_test)
Y = self.Y_test
elif type == "train":
Y_hat = self.model.predict(self.X_train)
Y = self.Y_train
elif type == "val":
Y_hat = self.model.predict(self.X_val)
Y = self.Y_val
elif type == "forecast":
Y_hat = self.model.predict(self.X_forecast)
Y = self.Y_forecast
Y_pred = Y_hat > 0.5
precision,recall,F1,junk = precision_recall_fscore_support(Y,Y_pred)
out = dict()
out['precision']=precision[1]
out['recall']=recall[1]
out['F1']=F1[1]
return out
示例14
def boot_human(i, sample_size=sample_size):
np.random.seed(seed=i)
random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
test = np.array([p_2_id_sub[pid] for pid in random_pids])
boot_list = []
for ids in test:
size = len(ids)
boot_list.append(np.random.choice(ids))
y_pred_sub = human_authored[boot_list, :]
y_true_sub = subset_y[boot_list, :]
# evaluate model
# print('calculating')
output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
precision = output[0][2]
recall = output[1][2]
f1 = output[2][2]
# print('done')
return precision, recall, f1
示例15
def boot_human_clinic(i, sample_size=sample_size):
np.random.seed(seed=i)
random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
test = np.array([p_2_id_sub[pid] for pid in random_pids])
boot_list = []
for ids in test:
size = len(ids)
boot_list.append(np.random.choice(ids))
y_pred_sub = subset_ClinicNet[boot_list, :]
y_true_sub = subset_y[boot_list, :]
auroc = roc_auc_score(y_true_sub, y_pred_sub, average='micro')
avg_precision = average_precision_score(y_true_sub, y_pred_sub, average='micro')
y_pred_sub[y_pred_sub<threshold_clinicnet] = 0
y_pred_sub[y_pred_sub>=threshold_clinicnet] = 1
# evaluate model
# print('calculating')
output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
# print('done')
precision = output[0][1]
recall = output[1][1]
f1 = output[2][1]
# print('done')
return auroc, avg_precision, precision, recall, f1
示例16
def boot_human_logistic(i, sample_size=sample_size):
np.random.seed(seed=i)
random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
test = np.array([p_2_id_sub[pid] for pid in random_pids])
boot_list = []
for ids in test:
size = len(ids)
boot_list.append(np.random.choice(ids))
y_pred_sub = subset_log[boot_list, :]
y_true_sub = subset_y[boot_list, :]
auroc = roc_auc_score(y_true_sub, y_pred_sub, average='micro')
avg_precision = average_precision_score(y_true_sub, y_pred_sub, average='micro')
y_pred_sub[y_pred_sub<threshold_log] = 0
y_pred_sub[y_pred_sub>=threshold_log] = 1
# evaluate model
# print('calculating')
output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
# print('done')
precision = output[0][1]
recall = output[1][1]
f1 = output[2][1]
# print('done')
return auroc, avg_precision, precision, recall, f1
示例17
def eval_epoch(model, sess, eval_set):
result, labels = [], []
avg_loss, avg_acc, steps, total_len = 0, 0, 0, 0
for batch in eval_set.next_batch():
steps += 1
predictions, batch_loss, batch_acc = sess.run([model.pred, model.loss, model.accuracy],
feed_dict={model.dropout_keep_prob: 1.0,
model.input_x: batch.texts, model.input_y: batch.labels})
batch_len = len(batch.texts)
avg_loss += batch_loss * batch_len
avg_acc += batch_acc * batch_len
total_len += batch_len
result.extend(predictions.tolist())
labels.extend(batch.labels.tolist())
avg_loss, avg_acc = avg_loss / total_len, avg_acc / total_len
precision, recall, fscore, support = precision_recall_fscore_support(labels, result, average='weighted')
metrics = {'loss': avg_loss, 'accuracy': avg_acc, 'precision': precision, 'recall': recall, 'fscore': fscore }
return metrics, result
示例18
def _get_class_stats(y_true, y_pred, labels):
"""
Method for getting some basic statistics by class.
Returns:
dict: A structured dictionary containing precision, recall, f_beta, and support \
vectors (1 x number of classes)
"""
precision, recall, f_beta, support = score(
y_true=y_true, y_pred=y_pred, labels=labels
)
stats = {
"precision": precision,
"recall": recall,
"f_beta": f_beta,
"support": support,
}
return stats
示例19
def get_dice(pred, gt, num_labels):
if num_labels != 2:
print('Dice evaluation score is only implemented for 2 labels')
sys.exit()
return 1.0 - scipy.spatial.distance.dice(pred.reshape(-1), gt.reshape(-1))
#f1 score at beta = 1 is the same as dice score
# recall = (num detected WMH) / (num true WMH)
示例20
def evaluate(model: DetectModel, loader: DataLoader, config: dict)->tuple:
batch_size = loader.batch_size
device = torch.device("cuda:{}".format(args.cuda) if args.cuda else 'cpu')
criterion = nn.CrossEntropyLoss(reduction='none')
total_loss = []
total_output = []
total_labels = []
with torch.no_grad():
initial_hidden_state = torch.zeros(config['rnn_layers'], batch_size, config['hidden_size'], dtype=torch.float).to(device)
for step, (seq_inputs, labels) in enumerate(loader):
seq_inputs = torch.tensor(seq_inputs, dtype=torch.float).to(device)
labels = torch.tensor(labels, dtype=torch.long).to(device)
output = model(seq_inputs, initial_hidden_state)
loss = criterion(output, labels)
total_loss.append(loss)
output = F.softmax(output, dim=1).argmax(dim=1)
total_output.append(output)
total_labels.append(labels)
total_loss = torch.cat(total_loss).contiguous().mean()
total_output = torch.cat(total_output).cpu().numpy()
total_labels = torch.cat(total_labels).cpu().numpy()
precision, recall, f1, _ = precision_recall_fscore_support(total_labels, total_output, labels=[0, 1])
return precision, recall, f1, total_loss
示例21
def f1_weighted(y_true, y_pred):
'''
This method is used to supress UndefinedMetricWarning
in f1_score of scikit-learn.
"filterwarnings" doesn't work in CV with multiprocess.
'''
_, _, f, _ = precision_recall_fscore_support(y_true, y_pred,
beta=1,
labels=None,
pos_label=1,
average='weighted',
warn_for=(),
sample_weight=None)
return f
示例22
def calc_test_result(self, pred_label, test_label, test_mask):
true_label=[]
predicted_label=[]
for i in range(pred_label.shape[0]):
for j in range(pred_label.shape[1]):
if test_mask[i,j]==1:
true_label.append(np.argmax(test_label[i,j] ))
predicted_label.append(np.argmax(pred_label[i,j] ))
print("Confusion Matrix :")
print(confusion_matrix(true_label, predicted_label))
print("Classification Report :")
print(classification_report(true_label, predicted_label, digits=4))
print('Weighted FScore: \n ', precision_recall_fscore_support(true_label, predicted_label, average='weighted'))
示例23
def cnf_roc(y_true, y_pred, classes, isPlot, save_tag = ''):
# 计算混淆矩阵
y = np.zeros(len(y_true))
y_ = np.zeros(len(y_true))
for i in range(len(y_true)):
y[i] = np.argmax(y_true[i,:])
y_[i] = np.argmax(y_pred[i,:])
cnf_mat = confusion_matrix(y, y_)
print cnf_mat
if isPlot:
# # 绘制混淆矩阵
plot_confusion_matrix(cnf_mat, range(classes), save_tag=save_tag)
# # 绘制ROC曲线
plot_roc_curve(y_true, y_pred, range(classes), save_tag)
if classes > 2:
# 计算多分类评价值
Sens = recall_score(y, y_, average='macro')
Prec = precision_score(y, y_, average='macro')
F1 = f1_score(y, y_, average='weighted')
Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
print Support
return Sens, Prec, F1, cnf_mat
else:
Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
# 计算AUC值
Auc = roc_auc_score(y_true[:,1], y_pred[:,1])
return Acc, Sens, Spec, Auc
示例24
def run_evaluate(self, test, report=True):
"""Evaluates performance on test set
Args:
test: dataset that yields tuple of (sentences, tags)
Returns:
metrics: (dict) metrics["acc"] = 98.4, ...
"""
accs = []
labs = []
labs_pred = []
for words, labels in minibatches(test, self.config.batch_size):
labels_pred, document_lengths = self.predict_batch(words)
for lab, lab_pred, length in zip(labels, labels_pred,
document_lengths):
lab = lab[:length]
lab_pred = lab_pred[:length]
accs += [a==b for (a, b) in zip(lab, lab_pred)]
labs.extend(lab)
labs_pred.extend(lab_pred)
labs = [self.idx_to_tag[lab].split('_')[0] for lab in labs]
labs_pred = [self.idx_to_tag[lab_pred].split('_')[0] for lab_pred in labs_pred]
_, _, macro_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='macro')
_, _, micro_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='micro')
_, _, weighted_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='weighted')
acc = np.mean(accs)
if report == True:
class_report = classification_report(labs, labs_pred, digits=4)
print(class_report)
confusion = confusion_matrix(labs, labs_pred)
print(confusion)
return {"acc": 100*acc, "macro-f1": 100*macro_f1, "micro-f1": 100*micro_f1,
"weighted-f1": 100*weighted_f1, "classification-report": class_report,
"confusion-matrix": confusion}
示例25
def test_precision_recall_f1_score_binary():
# Test Precision Recall and F1 Score for binary classification task
y_true, y_pred, _ = make_prediction(binary=True)
# detailed measures for each class
p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
assert_array_almost_equal(p, [0.73, 0.85], 2)
assert_array_almost_equal(r, [0.88, 0.68], 2)
assert_array_almost_equal(f, [0.80, 0.76], 2)
assert_array_equal(s, [25, 25])
# individual scoring function that can be used for grid search: in the
# binary class case the score is the value of the measure for the positive
# class (e.g. label == 1). This is deprecated for average != 'binary'.
for kwargs, my_assert in [({}, assert_no_warnings),
({'average': 'binary'}, assert_no_warnings)]:
ps = my_assert(precision_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(ps, 0.85, 2)
rs = my_assert(recall_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(rs, 0.68, 2)
fs = my_assert(f1_score, y_true, y_pred, **kwargs)
assert_array_almost_equal(fs, 0.76, 2)
assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2,
**kwargs),
(1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
示例26
def test_precision_recall_fscore_support_errors():
y_true, y_pred, _ = make_prediction(binary=True)
# Bad beta
assert_raises(ValueError, precision_recall_fscore_support,
y_true, y_pred, beta=0.0)
# Bad pos_label
assert_raises(ValueError, precision_recall_fscore_support,
y_true, y_pred, pos_label=2, average='binary')
# Bad average option
assert_raises(ValueError, precision_recall_fscore_support,
[0, 1, 2], [1, 2, 0], average='mega')
示例27
def test_precision_recall_f_unused_pos_label():
# Check warning that pos_label unused when set to non-default value
# but average != 'binary'; even if data is binary.
assert_warns_message(UserWarning,
"Note that pos_label (set to 2) is "
"ignored when average != 'binary' (got 'macro'). You "
"may use labels=[pos_label] to specify a single "
"positive class.", precision_recall_fscore_support,
[1, 2, 1], [1, 2, 2], pos_label=2, average='macro')
示例28
def calculate_precision_and_recall(outputs, targets, pos_label=1):
with torch.no_grad():
_, pred = outputs.topk(1, 1, largest=True, sorted=True)
precision, recall, _, _ = precision_recall_fscore_support(
targets.view(-1, 1).cpu().numpy(),
pred.cpu().numpy())
return precision[pos_label], recall[pos_label]
示例29
def eval_performance(y_true, y_pred):
'''
Evaluate the performance of a multiclass classification model.
:param y_true: the gold-standard labels
:param y_pred: the predictions
:return: mean F1
'''
pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred, average='weighted')
print '=== Performance ==='
print 'Mean precision: %.03f%%' % pre # (100*sum(pre * support)/sum(support))
print 'Mean recall: %.03f%%' % rec # (100*sum(rec * support)/sum(support))
print 'Mean F1: %.03f%%' % f1 # mean_f1
return pre, rec, f1, support
示例30
def eval_performance(y_true, y_pred, tagnames):
pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred)
print "=== Performance (omitting 'O' class) ==="
print "Mean precision: %.02f%%" % (100*sum(pre[1:] * support[1:])/sum(support[1:]))
print "Mean recall: %.02f%%" % (100*sum(rec[1:] * support[1:])/sum(support[1:]))
print "Mean F1: %.02f%%" % (100*sum(f1[1:] * support[1:])/sum(support[1:]))
##
# Implement this!
##