Python源码示例:sklearn.metrics.classification_report()
示例1
def multi_class_classification(data_X,data_Y):
'''
calculate multi-class classification and return related evaluation metrics
'''
svc = svm.SVC(C=1, kernel='linear')
# X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0)
clf = svc.fit(data_X, data_Y) #svm
# array = svc.coef_
# print array
predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
print "accuracy",metrics.accuracy_score(data_Y, predicted)
print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro')
print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro')
print "precision score",metrics.precision_score(data_Y, predicted, average='macro')
print "recall score",metrics.recall_score(data_Y, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
print "classification_report", metrics.classification_report(data_Y, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
# print "log_loss", metrics.log_loss(data_Y, predicted)
print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
# print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
# print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted)
示例2
def evaluation_analysis(true_label,predicted):
'''
return all metrics results
'''
print "accuracy",metrics.accuracy_score(true_label, predicted)
print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')
print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro')
print "precision score",metrics.precision_score(true_label, predicted, average='macro')
print "recall score",metrics.recall_score(true_label, predicted, average='macro')
print "hamming_loss",metrics.hamming_loss(true_label, predicted)
print "classification_report", metrics.classification_report(true_label, predicted)
print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
print "log_loss", metrics.log_loss(true_label, predicted)
print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted)
示例3
def train_and_evaluate(clf, X_train, X_test, y_train, y_test):
clf.fit(X_train, y_train)
print ("Accuracy on training set:")
print (clf.score(X_train, y_train))
print ("Accuracy on testing set:")
print (clf.score(X_test, y_test))
y_pred = clf.predict(X_test)
print ("Classification Report:")
print (metrics.classification_report(y_test, y_pred))
print ("Confusion Matrix:")
print (metrics.confusion_matrix(y_test, y_pred))
# ===============================================================================
# from FaceDetectPredict.py
# ===============================================================================
示例4
def eval_batch_col(classifier, val_dataset, batch_size, device):
val_batch_generator = datasets.generate_batches_col(val_dataset,
batch_size=batch_size,
shuffle=False,
drop_last=True,
device=device)
y_pred, y_true = [], []
for batch_idx, batch_dict in enumerate(val_batch_generator):
y = batch_dict["label"]
X = batch_dict["data"]
# Pred
pred = classifier(X)
y_pred.extend(pred.cpu().numpy())
y_true.extend(y.cpu().numpy())
report = classification_report(y_true, np.argmax(y_pred, axis=1), output_dict=True)
return report
# evaluate and return prediction & true labels of a table batch
示例5
def score_binary_classification(y, y_hat, report=True):
"""
Create binary classification output
:param y: true value
:param y_hat: class 1 probabilities
:param report:
:return:
"""
y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat] # convert probability to class for classification report
report_string = "---Binary Classification Score--- \n"
report_string += classification_report(y, y_hat_class)
score = roc_auc_score(y, y_hat)
report_string += "\nAUC = " + str(score)
if report:
print(report_string)
return score, report_string
示例6
def score_multiclass_classification(y, y_hat, report=True):
"""
Create multiclass classification score
:param y:
:param y_hat:
:return:
"""
report_string = "---Multiclass Classification Score--- \n"
report_string += classification_report(y, y_hat)
score = accuracy_score(y, y_hat)
report_string += "\nAccuracy = " + str(score)
if report:
print(report_string)
return score, report_string
示例7
def print_evaluation(model,data,ls,log=None):
features,actual = data
predictions = predict(model, features, 500).data.numpy().reshape(-1).tolist()
labels = [ls.idx[i] for i, _ in enumerate(ls.idx)]
actual = [labels[i] for i in actual]
predictions = [labels[i] for i in predictions]
print(accuracy_score(actual, predictions))
print(classification_report(actual, predictions))
print(confusion_matrix(actual, predictions))
data = zip(actual,predictions)
if log is not None:
f = open(log, "w+")
for a,p in data:
f.write(json.dumps({"actual": a, "predicted": p}) + "\n")
f.close()
示例8
def evaluate(config, model, data_iter, test=False):
model.eval()
loss_total = 0
predict_all = np.array([], dtype=int)
labels_all = np.array([], dtype=int)
with torch.no_grad():
for texts, labels in data_iter:
outputs = model(texts)
loss = F.cross_entropy(outputs, labels)
loss_total += loss
labels = labels.data.cpu().numpy()
predic = torch.max(outputs.data, 1)[1].cpu().numpy()
labels_all = np.append(labels_all, labels)
predict_all = np.append(predict_all, predic)
acc = metrics.accuracy_score(labels_all, predict_all)
if test:
report = metrics.classification_report(labels_all, predict_all, target_names=config.class_list, digits=4)
confusion = metrics.confusion_matrix(labels_all, predict_all)
return acc, loss_total / len(data_iter), report, confusion
return acc, loss_total / len(data_iter)
示例9
def calc_test_result(result, test_label, test_mask):
true_label=[]
predicted_label=[]
for i in range(result.shape[0]):
for j in range(result.shape[1]):
if test_mask[i,j]==1:
true_label.append(np.argmax(test_label[i,j] ))
predicted_label.append(np.argmax(result[i,j] ))
print("Confusion Matrix :")
print(confusion_matrix(true_label, predicted_label))
print("Classification Report :")
print(classification_report(true_label, predicted_label,digits=4))
print("Accuracy ", accuracy_score(true_label, predicted_label))
print("Macro Classification Report :")
print(precision_recall_fscore_support(true_label, predicted_label,average='macro'))
print("Weighted Classification Report :")
print(precision_recall_fscore_support(true_label, predicted_label,average='weighted'))
#print "Normal Classification Report :"
#print precision_recall_fscore_support(true_label, predicted_label)
示例10
def main():
args = parse_args()
features_extractor = FaceFeaturesExtractor()
embeddings, labels, class_to_idx = load_data(args, features_extractor)
clf = train(args, embeddings, labels)
idx_to_class = {v: k for k, v in class_to_idx.items()}
target_names = map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0]))
print(metrics.classification_report(labels, clf.predict(embeddings), target_names=list(target_names)))
if not os.path.isdir(MODEL_DIR_PATH):
os.mkdir(MODEL_DIR_PATH)
model_path = os.path.join('model', 'face_recogniser.pkl')
joblib.dump(FaceRecogniser(features_extractor, clf, idx_to_class), model_path)
示例11
def learn_structure(self, samples):
X_train, X_train_label, X_test, X_test_label = \
self._generate_train_test_sets(samples, 0.75)
logger.info('Training with ' + str(len(X_train)) +
'samples; testing with ' + str(len(X_test)) + ' samples.')
svc_detector = self._get_best_detector(X_train, X_train_label)
Y_test = svc_detector.predict(X_test)
num_anomalies = Y_test[Y_test == ANOMALY].size
logger.info('Found ' + str(num_anomalies) +
' anomalies in testing set')
logger.info('Confusion Matrix: \n{}'.
format(classification_report(
X_test_label,
Y_test,
target_names=['no', 'yes'])))
return svc_detector
示例12
def learn_structure(self, samples):
X_train, X_train_label, X_test, X_test_label = \
self._generate_train_test_sets(samples, 0.75)
logger.info('Training with ' + str(len(X_train)) +
'samples; testing with ' + str(len(X_test)) + ' samples.')
dt_detector = self._get_best_detector(X_train, X_train_label)
Y_test = dt_detector.predict(X_test)
num_anomalies = Y_test[Y_test == ANOMALY].size
logger.info('Found ' + str(num_anomalies) +
' anomalies in testing set')
logger.info('Confusion Matrix: \n{}'.
format(classification_report(
X_test_label,
Y_test,
target_names=['no', 'yes'])))
return dt_detector
示例13
def learn_structure(self, samples):
X_train, X_train_label, X_test, X_test_label = \
self._generate_train_test_sets(samples, 0.75)
logger.info('Training with ' + str(len(X_train)) +
'samples; testing with ' + str(len(X_test)) + ' samples.')
lr_detector = self._get_best_detector(X_train, X_train_label)
Y_test = lr_detector.predict(X_test)
num_anomalies = Y_test[Y_test == ANOMALY].size
logger.info('Found ' + str(num_anomalies) +
' anomalies in testing set')
logger.info('Confusion Matrix: \n{}'.
format(classification_report(
X_test_label,
Y_test,
target_names=['no', 'yes'])))
return lr_detector
示例14
def learn_structure(self, samples):
X_train, X_train_label, X_test, X_test_label = \
self._generate_train_test_sets(samples, 0.75)
logger.info('Training with ' + str(len(X_train)) +
'samples; testing with ' + str(len(X_test)) + ' samples.')
rf_detector = self._get_best_detector(X_train, X_train_label)
Y_test = rf_detector.predict(X_test)
num_anomalies = Y_test[Y_test == ANOMALY].size
logger.info('Found ' + str(num_anomalies) +
' anomalies in testing set')
logger.info('Confusion Matrix: \n{}'.
format(classification_report(
X_test_label,
Y_test,
target_names=['no', 'yes'])))
return rf_detector
示例15
def test_classification_report_multiclass():
# Test performance report
iris = datasets.load_iris()
y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
# print classification report with class names
expected_report = """\
precision recall f1-score support
setosa 0.83 0.79 0.81 24
versicolor 0.33 0.10 0.15 31
virginica 0.42 0.90 0.57 20
accuracy 0.53 75
macro avg 0.53 0.60 0.51 75
weighted avg 0.51 0.53 0.47 75
"""
report = classification_report(
y_true, y_pred, labels=np.arange(len(iris.target_names)),
target_names=iris.target_names)
assert_equal(report, expected_report)
示例16
def test_classification_report_multiclass_balanced():
y_true, y_pred = [0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]
expected_report = """\
precision recall f1-score support
0 0.33 0.33 0.33 3
1 0.33 0.33 0.33 3
2 0.33 0.33 0.33 3
accuracy 0.33 9
macro avg 0.33 0.33 0.33 9
weighted avg 0.33 0.33 0.33 9
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例17
def test_classification_report_multiclass_with_label_detection():
iris = datasets.load_iris()
y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
# print classification report with label detection
expected_report = """\
precision recall f1-score support
0 0.83 0.79 0.81 24
1 0.33 0.10 0.15 31
2 0.42 0.90 0.57 20
accuracy 0.53 75
macro avg 0.53 0.60 0.51 75
weighted avg 0.51 0.53 0.47 75
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例18
def test_classification_report_multiclass_with_digits():
# Test performance report with added digits in floating point values
iris = datasets.load_iris()
y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
# print classification report with class names
expected_report = """\
precision recall f1-score support
setosa 0.82609 0.79167 0.80851 24
versicolor 0.33333 0.09677 0.15000 31
virginica 0.41860 0.90000 0.57143 20
accuracy 0.53333 75
macro avg 0.52601 0.59615 0.50998 75
weighted avg 0.51375 0.53333 0.47310 75
"""
report = classification_report(
y_true, y_pred, labels=np.arange(len(iris.target_names)),
target_names=iris.target_names, digits=5)
assert_equal(report, expected_report)
示例19
def test_classification_report_multiclass_with_unicode_label():
y_true, y_pred, _ = make_prediction(binary=False)
labels = np.array(["blue\xa2", "green\xa2", "red\xa2"])
y_true = labels[y_true]
y_pred = labels[y_pred]
expected_report = """\
precision recall f1-score support
blue\xa2 0.83 0.79 0.81 24
green\xa2 0.33 0.10 0.15 31
red\xa2 0.42 0.90 0.57 20
accuracy 0.53 75
macro avg 0.53 0.60 0.51 75
weighted avg 0.51 0.53 0.47 75
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例20
def test_classification_report_multiclass_with_long_string_label():
y_true, y_pred, _ = make_prediction(binary=False)
labels = np.array(["blue", "green" * 5, "red"])
y_true = labels[y_true]
y_pred = labels[y_pred]
expected_report = """\
precision recall f1-score support
blue 0.83 0.79 0.81 24
greengreengreengreengreen 0.33 0.10 0.15 31
red 0.42 0.90 0.57 20
accuracy 0.53 75
macro avg 0.53 0.60 0.51 75
weighted avg 0.51 0.53 0.47 75
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例21
def _fit_and_score(clf, domains, labels, train_index, test_index, repetition, data_set_id, fold_count):
log.debug('Train index: {!s}\nTest index: {!s}'.format(train_index, test_index))
clf_type = clf.clf_type
clf.training(domains[train_index], labels[train_index])
y_true, y_pred = clf.predict(domains[test_index], labels[test_index])
if fold_count == -1:
stats = Statistic(set_id=data_set_id,
id='logo_cv_{!s}_{!s}'.format(clf_type, data_set_id))
else:
stats = Statistic(set_id=data_set_id,
id='{!s}fold_cv_{!s}_rep{!s}_{!s}'.format(fold_count, clf_type, repetition, data_set_id))
stats.add_run(y_true, y_pred, domains[test_index])
log.verbose('Truth vs. Prediction: \n' + str(list(y_true)) + '\n' + str(list(y_pred)))
log.debug('\n' + classification_report(y_true, y_pred, target_names=['Benign', 'Malicious']))
log.debug('\n' + str(confusion_matrix(y_true, y_pred)))
log.debug('MIssclassifications: {!s}'.format(stats.missclassified))
return stats, data_set_id
示例22
def simple_evaluate(y_true, y_pred):
"""
evaluate precision, recall, f1
:param y_true:
:param y_pred:
:return:score
"""
assert len(y_true) == len(y_pred), \
"the count of pred label should be same with true label"
classify_report = metrics.classification_report(y_true, y_pred)
confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
overall_accuracy = metrics.accuracy_score(y_true, y_pred)
acc_for_each_class = metrics.precision_score(y_true, y_pred, average=None)
average_accuracy = np.mean(acc_for_each_class)
score = metrics.accuracy_score(y_true, y_pred)
print('classify_report : \n', classify_report)
print('confusion_matrix : \n', confusion_matrix)
print('acc_for_each_class : \n', acc_for_each_class)
print('average_accuracy: {0:f}'.format(average_accuracy))
print('overall_accuracy: {0:f}'.format(overall_accuracy))
print('score: {0:f}'.format(score))
return score
示例23
def eval(model, test_data, test_label, thresholds=0.5, num_classes=2, pr_figure_path=None, pred_save_path=None):
print('{0}, val mean acc:{1}'.format(model.__str__(), model.score(test_data, test_label)))
if num_classes == 2:
# binary classification
label_pred_probas = model.predict_proba(test_data)[:, 1]
label_pred = label_pred_probas > thresholds
precision, recall, threshold = precision_recall_curve(test_label, label_pred)
plot_pr(thresholds, precision, recall, figure_path=pr_figure_path)
else:
# multi
label_pred = model.predict(test_data)
# precision_recall_curve: multiclass format is not supported
print(classification_report(test_label, label_pred))
if pred_save_path:
with open(pred_save_path, 'w', encoding='utf-8') as f:
for i in label_pred:
f.write(str(i) + '\n')
return label_pred
示例24
def skl_knn(self):
"""k: number of neighbors to use in classification
test_data: the data/targets used to test the classifier
stored_data: the data/targets used to classify the test_data
"""
fifty_x, fifty_y = self.mk_dataset(50000)
test_img = [self.data[i] for i in self.indx[60000:70000]]
test_img1 = np.array(test_img)
test_target = [self.target[i] for i in self.indx[60000:70000]]
test_target1 = np.array(test_target)
self.classifier.fit(fifty_x, fifty_y)
y_pred = self.classifier.predict(test_img1)
pickle.dump(self.classifier, open('knn.sav', 'wb'))
print(classification_report(test_target1, y_pred))
print("KNN Classifier model saved as knn.sav!")
示例25
def bio_classification_report(y_true, y_pred):
"""
Classification report for a l ist of BIOSE-encoded sequences.
It computes token-level metrics and discards 'O' labels.
:param y_true:
:param y_pred:
:return:
"""
lb = LabelBinarizer()
y_true_combined = lb.fit_transform(y_true)
y_pred_combined = lb.transform(y_pred)
tagset = set(lb.classes_) - {'O'}
tagset = set(lb.classes_)
tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
class_indices = {
cls: idx for idx, cls in enumerate(lb.classes_)
}
return classification_report(
y_true_combined,
y_pred_combined,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset
)
示例26
def print_metric(self, y_true, y_pred, mode):
if mode in ["train", "test"]:
print(mode)
if (self.config.data == "dailydialog"):
print(classification_report(y_true, y_pred, labels=[1,2,3,4,5,6], digits=4))
else:
print(classification_report(y_true, y_pred, digits=4))
if (self.config.data == "dailydialog"):
weighted_fscore = classification_report(y_true, y_pred, labels=[1,2,3,4,5,6], output_dict=True, digits=4)["weighted avg"]["f1-score"]
else:
weighted_fscore = classification_report(y_true, y_pred, output_dict=True, digits=4)["weighted avg"]["f1-score"]
return weighted_fscore
示例27
def report(y_true: List[int], y_pred: List[int], labels: List[int],
class_name: List[str] = None, report_name: str = "Default Name") -> Tuple:
print("{} Result Reporting:".format(report_name))
if class_name is None:
class_name = ["class {}".format(i) for i in labels]
assert len(class_name) == len(labels)
cmatrix = confusion_matrix(y_true, y_pred, labels=labels)
print("Sklearn Confusion Matrix:")
print(cmatrix)
print(classification_report(y_true, y_pred, target_names=class_name, digits=5))
RkCC = compute_RkCC(cmatrix)
print('Rk correlation coefficient = %.4f' % RkCC)
return cmatrix, RkCC
示例28
def eval(self, test_x, test_y, crf_model):
tagger = pycrfsuite.Tagger()
tagger.open(crf_model)
y_pred = []
for feat_list in test_x:
preds = tagger.tag(feat_list)
y_pred.append(preds)
lb = LabelBinarizer()
y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))
tagset = sorted(set(lb.classes_))
class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}
print(classification_report(
y_true_all,
y_pred_all,
labels=[class_indices[cls] for cls in tagset],
target_names=tagset,
digits=5
))
示例29
def _evaluate_final(self, model, xy_test, batch_size, history):
res = {}
pred_test = None
if 'val_acc' in history.history:
res['val_acc'] = max(history.history['val_acc'])
rev_ix = -1 - list(reversed(history.history['val_acc'])).index(res['val_acc'])
res['val_loss'] = history.history['val_loss'][rev_ix]
res['acc'] = history.history['acc'][-1]
res['loss'] = history.history['loss'][-1]
if len(xy_test[0]):
from sklearn.metrics import classification_report, roc_auc_score
# evaluate with test data
x_test, y_test = xy_test
pred_test = model.predict(x_test, batch_size=batch_size, verbose=0)
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
res['test_loss'] = test_loss
res['test_acc'] = test_acc
report = classification_report(y_true = np.argmax(y_test, axis=1),
y_pred = np.argmax(pred_test, axis=1),
target_names=self.labels,
digits=4,
output_dict=True)
res['auc'] = roc_auc_score(y_test.astype(np.int), pred_test)
for label in self.labels:
stats = report[label]
res[label+"-precision"] = stats['precision']
res[label+"-recall"] = stats['recall']
res[label+"-f1"] = stats['f1-score']
return pred_test, res
示例30
def perf(y_true, y_pred, y_score):
"""perf."""
print('Accuracy: %.2f' % accuracy_score(y_true, y_pred))
print(' AUC ROC: %.2f' % roc_auc_score(y_true, y_score))
print(' AUC AP: %.2f' % average_precision_score(y_true, y_score))
print()
print('Classification Report:')
print(classification_report(y_true, y_pred))
print()
plot_confusion_matrices(y_true, y_pred, size=int(len(set(y_true)) * 2.5))
print()
plot_aucs(y_true, y_score, size=10)