Python源码示例:sklearn.metrics.f1_score()
示例1
def multi_class_classification(data_X,data_Y):
'''
calculate multi-class classification and return related evaluation metrics
'''
svc = svm.SVC(C=1, kernel='linear')
# X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0)
clf = svc.fit(data_X, data_Y) #svm
# array = svc.coef_
# print array
predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
print "accuracy",metrics.accuracy_score(data_Y, predicted)
print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro')
print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro')
print "precision score",metrics.precision_score(data_Y, predicted, average='macro')
print "recall score",metrics.recall_score(data_Y, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
print "classification_report", metrics.classification_report(data_Y, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
# print "log_loss", metrics.log_loss(data_Y, predicted)
print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
# print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
# print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)
示例2
def evaluation_analysis(true_label,predicted):
'''
return all metrics results
'''
print "accuracy",metrics.accuracy_score(true_label, predicted)
print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')
print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro')
print "precision score",metrics.precision_score(true_label, predicted, average='macro')
print "recall score",metrics.recall_score(true_label, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(true_label, predicted)
print "classification_report", metrics.classification_report(true_label, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
print "log_loss", metrics.log_loss(true_label, predicted)
print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)
示例3
def eval_class(ids_to_eval, model, z_obs):
"""
Evaluate the model's classification performance.
Parameters
----------
ids_to_eval: np.array
The indices of the nodes whose predictions will be evaluated.
model: GCN
The model to evaluate.
z_obs: np.array
The labels of the nodes in ids_to_eval
Returns
-------
[f1_micro, f1_macro] scores
"""
test_pred = model.predictions.eval(session=model.session, feed_dict={model.node_ids: ids_to_eval}).argmax(1)
test_real = z_obs[ids_to_eval]
return f1_score(test_real, test_pred, average='micro'), f1_score(test_real, test_pred, average='macro')
示例4
def classification_scores(gts, preds, labels):
accuracy = metrics.accuracy_score(gts, preds)
class_accuracies = []
for lab in labels: # TODO Fix
class_accuracies.append(metrics.accuracy_score(gts[gts == lab], preds[gts == lab]))
class_accuracies = np.array(class_accuracies)
f1_micro = metrics.f1_score(gts, preds, average='micro')
precision_micro = metrics.precision_score(gts, preds, average='micro')
recall_micro = metrics.recall_score(gts, preds, average='micro')
f1_macro = metrics.f1_score(gts, preds, average='macro')
precision_macro = metrics.precision_score(gts, preds, average='macro')
recall_macro = metrics.recall_score(gts, preds, average='macro')
# class wise score
f1s = metrics.f1_score(gts, preds, average=None)
precisions = metrics.precision_score(gts, preds, average=None)
recalls = metrics.recall_score(gts, preds, average=None)
confusion = metrics.confusion_matrix(gts,preds, labels=labels)
#TODO confusion matrix, recall, precision
return accuracy, f1_micro, precision_micro, recall_micro, f1_macro, precision_macro, recall_macro, confusion, class_accuracies, f1s, precisions, recalls
示例5
def test(self, z, pos_edge_index, neg_edge_index):
"""Evaluates node embeddings :obj:`z` on positive and negative test
edges by computing AUC and F1 scores.
Args:
z (Tensor): The node embeddings.
pos_edge_index (LongTensor): The positive edge indices.
neg_edge_index (LongTensor): The negative edge indices.
"""
with torch.no_grad():
pos_p = self.discriminate(z, pos_edge_index)[:, :2].max(dim=1)[1]
neg_p = self.discriminate(z, neg_edge_index)[:, :2].max(dim=1)[1]
pred = (1 - torch.cat([pos_p, neg_p])).cpu()
y = torch.cat(
[pred.new_ones((pos_p.size(0))),
pred.new_zeros(neg_p.size(0))])
pred, y = pred.numpy(), y.numpy()
auc = roc_auc_score(y, pred)
f1 = f1_score(y, pred, average='binary') if pred.sum() > 0 else 0
return auc, f1
示例6
def test_classification_2classes_small():
X, y = make_classification(n_samples=1000,
n_features=10,
n_classes=2,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification').fit(X, y,
'test_classification_2classes_small')
cls.generate_module_sample()
from sklearn.metrics import f1_score
pred = cls.predict(X)
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 5
assert cls.algorithms[0].best_score is not None
示例7
def test_classification_2classes_small_jp():
X, y = make_classification(n_samples=1000,
n_features=10,
n_classes=2,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification',
lang='jp').fit(X, y, 'test_classification_2classes_small_jp')
cls.generate_module_sample()
from sklearn.metrics import f1_score
pred = cls.predict(X)
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 5
assert cls.algorithms[0].best_score is not None
示例8
def test_classification_multiclass_small():
X, y = make_classification(n_samples=1000,
n_features=10,
n_classes=3,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification').fit(X, y,
'test_classification_multiclass_small')
cls.generate_module_sample()
from sklearn.metrics import f1_score
pred = cls.predict(X)
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 5
assert cls.algorithms[0].best_score is not None
示例9
def test_classification_2classes_medium():
X, y = make_classification(n_samples=100000,
n_features=10,
n_classes=2,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification').fit(X, y,
'test_classification_2classes_medium')
from sklearn.metrics import f1_score
pred = cls.predict(X)
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 4
assert cls.algorithms[0].best_score is not None
示例10
def test_classification_2classes_big():
X, y = make_classification(n_samples=200000,
n_features=20,
n_classes=2,
n_clusters_per_class=1,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('classification').fit(X, y,
'test_classification_2classes_big')
cls.generate_module_sample()
from sklearn.metrics import f1_score
pred = cls.predict(X)
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 1
assert cls.algorithms[0].best_score is not None
示例11
def test_ndarray():
data = pd.read_csv('http://faculty.marshall.usc.edu/gareth-james/ISL/Heart.csv',
index_col=0, na_values=[''])
y = data['AHD']
del data['AHD']
cls = MALSS('classification').fit(np.array(data), np.array(y),
'test_ndarray')
cls.generate_module_sample()
from sklearn.metrics import f1_score
pred = cls.predict(np.array(data))
print(f1_score(y, pred, average=None))
assert len(cls.algorithms) == 5
assert cls.algorithms[0].best_score is not None
示例12
def f1_score(y_true, y_pred):
"""
Compute the micro f(b) score with b=1.
"""
y_true = tf.cast(y_true, "float32")
y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
y_correct = y_true * y_pred
sum_true = tf.reduce_sum(y_true, axis=1)
sum_pred = tf.reduce_sum(y_pred, axis=1)
sum_correct = tf.reduce_sum(y_correct, axis=1)
precision = sum_correct / sum_pred
recall = sum_correct / sum_true
f_score = 2 * precision * recall / (precision + recall)
f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)
return tf.reduce_mean(f_score)
示例13
def load_model(stamp):
"""
"""
json_file = open(stamp+'.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json, {'AttentionWithContext': AttentionWithContext})
model.load_weights(stamp+'.h5')
print("Loaded model from disk")
model.summary()
adam = Adam(lr=0.001)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=[f1_score])
return model
示例14
def f1_score(y_true, y_pred):
"""
Compute the micro f(b) score with b=1.
"""
y_true = tf.cast(y_true, "float32")
y_pred = tf.cast(tf.round(y_pred), "float32") # implicit 0.5 threshold via tf.round
y_correct = y_true * y_pred
sum_true = tf.reduce_sum(y_true, axis=1)
sum_pred = tf.reduce_sum(y_pred, axis=1)
sum_correct = tf.reduce_sum(y_correct, axis=1)
precision = sum_correct / sum_pred
recall = sum_correct / sum_true
f_score = 2 * precision * recall / (precision + recall)
f_score = tf.where(tf.is_nan(f_score), tf.zeros_like(f_score), f_score)
return tf.reduce_mean(f_score)
示例15
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
score = model.evaluate(eval_data, eval_labels, verbose=0)
print("Test accuracy: " + str(score[1]))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr
示例16
def get_all_metrics_(eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, precision, recall, f1, average_precision, fpr, tpr
示例17
def compute_acc(emb, labels, train_nids, val_nids, test_nids):
"""
Compute the accuracy of prediction given the labels.
"""
emb = emb.cpu().numpy()
train_nids = train_nids.cpu().numpy()
train_labels = labels[train_nids].cpu().numpy()
val_nids = val_nids.cpu().numpy()
val_labels = labels[val_nids].cpu().numpy()
test_nids = test_nids.cpu().numpy()
test_labels = labels[test_nids].cpu().numpy()
emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)
lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
lr.fit(emb[train_nids], labels[train_nids])
pred = lr.predict(emb)
f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
return f1_micro_eval, f1_micro_test
示例18
def run_evaluate(self, test):
"""Evaluates performance on test set
Args:
test: dataset that yields tuple of (sentences, relation tags)
Returns:
metrics: (dict) metrics["acc"] = 98.4, ...
"""
y_true, y_pred = [], []
for data in minibatches(test, self.config.batch_size):
word_batch, pos1_batch, pos2_batch, pos_batch, y_batch = data
relations_pred = self.predict_batch(word_batch, pos1_batch, pos2_batch, pos_batch)
assert len(relations_pred) == len(y_batch)
y_true += y_batch
y_pred += relations_pred.tolist()
acc = accuracy_score(y_true, y_pred)
p = precision_score(y_true, y_pred, average='macro')
r = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')
return {"acc":acc, "p":p, "r":r, "f1":f1}
示例19
def score(self,data,labels,batch_size=64):
'''
return the micro and macro f-score of predicted labels on given data
parameters:
- data: numpy array
2d numpy array (doc x word ids) of input data
- labels: numpy array
1d numpy array of labels for given data
- batch size: int (default: 64)
batch size to use during inference
outputs:
tuple of floats (micro,macro) representing micro and macro f-score
of predicted labels on given data
'''
y_pred = self.predict(data,batch_size)
micro = f1_score(labels,y_pred,average='micro')
macro = f1_score(labels,y_pred,average='macro')
return micro,macro
示例20
def score(self,data,labels,batch_size=64):
'''
return the micro and macro f-score of predicted labels on given data
parameters:
- data: numpy array
2d numpy array (doc x word ids) of input data
- labels: numpy array
1d numpy array of labels for given data
- batch size: int (default: 64)
batch size to use during inference
outputs:
tuple of floats (micro,macro) representing micro and macro f-score
of predicted labels on given data
'''
y_pred = self.predict(data,batch_size)
micro = f1_score(labels,y_pred,average='micro')
macro = f1_score(labels,y_pred,average='macro')
return micro,macro
示例21
def score(self,data,labels,batch_size=64):
'''
return the micro and macro f-score of predicted labels on given data
parameters:
- data: numpy array
3d numpy array (doc x sentence x word ids) of input data
- labels: numpy array
1d numpy array of labels for given data
- batch size: int (default: 64)
batch size to use during inference
outputs:
tuple of floats (micro,macro) representing micro and macro f-score
of predicted labels on given data
'''
y_pred = self.predict(data,batch_size)
micro = f1_score(labels,y_pred,average='micro')
macro = f1_score(labels,y_pred,average='macro')
return micro,macro
示例22
def score(self,data,labels,batch_size=64):
'''
return the micro and macro f-score of predicted labels on given data
parameters:
- data: numpy array
3d numpy array (doc x sentence x word ids) of input data
- labels: numpy array
1d numpy array of labels for given data
- batch size: int (default: 64)
batch size to use during inference
outputs:
tuple of floats (micro,macro) representing micro and macro f-score
of predicted labels on given data
'''
y_pred = self.predict(data,batch_size)
micro = f1_score(labels,y_pred,average='micro')
macro = f1_score(labels,y_pred,average='macro')
return micro,macro
示例23
def evaluate(trueValues, predicted, decimals, note):
print note
label = 1
avg = 'weighted'
a = accuracy_score(trueValues, predicted)
p = precision_score(trueValues, predicted, pos_label=label, average=avg)
r = recall_score(trueValues, predicted, pos_label=label, average=avg)
avg_f1 = f1_score(trueValues, predicted, pos_label=label, average=avg)
fclasses = f1_score(trueValues, predicted, average=None)
f1c1 = fclasses[0]; f1c2 = fclasses[1]
fw = (f1c1 + f1c2)/2.0
print 'accuracy:\t', str(round(a,decimals))
print 'precision:\t', str(round(p,decimals))
print 'recall:\t', str(round(r,decimals))
print 'avg f1:\t', str(round(avg_f1,decimals))
print 'c1 f1:\t', str(round(f1c1,decimals))
print 'c2 f1:\t', str(round(f1c2,decimals))
print 'avg(c1,c2):\t', str(round(fw,decimals))
print '------------'
###################################################################################
# split a parallel or comparable corpus into two parts
示例24
def accuracy(y_true, y_pred):
# 计算混淆矩阵
y = np.zeros(len(y_true))
y_ = np.zeros(len(y_true))
for i in range(len(y_true)):
y[i] = np.argmax(y_true[i,:])
y_[i] = np.argmax(y_pred[i,:])
cnf_mat = confusion_matrix(y, y_)
# Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
# Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
# Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
# # 绘制ROC曲线
# fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
# Auc = auc(fpr, tpr)
# 计算多分类评价值
Sens = recall_score(y, y_, average='macro')
Prec = precision_score(y, y_, average='macro')
F1 = f1_score(y, y_, average='weighted')
Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
return Sens, Prec, F1, cnf_mat
示例25
def eval_fn(result):
i = 0
total_accuracy = 0
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i,
accuracy, f1))
return total_accuracy/ i, f1
示例26
def eval_fn(result):
i = 0
total_accuracy = 0
total_loss = 0.0
pred_prob = []
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
total_loss += eval_result["loss"]
pred_prob.extend(eval_result["pred_prob"])
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i,
accuracy, total_loss/i, f1))
return pred_prob
示例27
def eval_fn(result):
i = 0
total_accuracy = 0
total_loss = 0.0
pred_prob = []
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
total_loss += eval_result["loss"]
pred_prob.extend(eval_result["pred_prob"])
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy {} accuracy {} loss {} f1 {}".format(total_accuracy/i,
accuracy, total_loss/i, f1))
return pred_prob
示例28
def eval_fn(result):
i = 0
total_accuracy = 0
total_loss = 0.0
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
total_loss += eval_result["loss"]
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy {} accuracy {} loss {}, f1 {}".format(total_accuracy/i,
accuracy, total_loss/i, f1))
return total_accuracy/ i
示例29
def eval_fn(result):
i = 0
total_accuracy = 0
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy accuracy {} {}, f1 {}".format(total_accuracy/i,
accuracy, f1))
return total_accuracy/ i, f1
示例30
def eval_fn(result):
i = 0
total_accuracy = 0
label, label_id = [], []
while True:
try:
eval_result = sess.run(result)
total_accuracy += eval_result["accuracy"]
label_id.extend(eval_result["label_ids"])
label.extend(eval_result["pred_label"])
i += 1
except tf.errors.OutOfRangeError:
print("End of dataset")
break
f1 = f1_score(label_id, label, average="macro")
accuracy = accuracy_score(label_id, label)
print("test accuracy accuracy {} {} f1 {}".format(total_accuracy/i,
accuracy, f1))
return total_accuracy/ i, f1