Python源码示例:sklearn.metrics.classification_report()

示例1
def multi_class_classification(data_X,data_Y):
    '''
    calculate multi-class classification and return related evaluation metrics
    '''

    svc = svm.SVC(C=1, kernel='linear')
    # X_train, X_test, y_train, y_test = train_test_split( data_X, data_Y, test_size=0.4, random_state=0) 
    clf = svc.fit(data_X, data_Y) #svm
    # array = svc.coef_
    # print array
    predicted = cross_val_predict(clf, data_X, data_Y, cv=2)
    print "accuracy",metrics.accuracy_score(data_Y, predicted)
    print "f1 score macro",metrics.f1_score(data_Y, predicted, average='macro') 
    print "f1 score micro",metrics.f1_score(data_Y, predicted, average='micro') 
    print "precision score",metrics.precision_score(data_Y, predicted, average='macro') 
    print "recall score",metrics.recall_score(data_Y, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(data_Y, predicted)
    print "classification_report", metrics.classification_report(data_Y, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(data_Y, predicted)
    # print "log_loss", metrics.log_loss(data_Y, predicted)
    print "zero_one_loss", metrics.zero_one_loss(data_Y, predicted)
    # print "AUC&ROC",metrics.roc_auc_score(data_Y, predicted)
    # print "matthews_corrcoef", metrics.matthews_corrcoef(data_Y, predicted) 
示例2
def evaluation_analysis(true_label,predicted): 
    '''
    return all metrics results
    '''
    print "accuracy",metrics.accuracy_score(true_label, predicted)
    print "f1 score macro",metrics.f1_score(true_label, predicted, average='macro')     
    print "f1 score micro",metrics.f1_score(true_label, predicted, average='micro') 
    print "precision score",metrics.precision_score(true_label, predicted, average='macro') 
    print "recall score",metrics.recall_score(true_label, predicted, average='macro') 
    print "hamming_loss",metrics.hamming_loss(true_label, predicted)
    print "classification_report", metrics.classification_report(true_label, predicted)
    print "jaccard_similarity_score", metrics.jaccard_similarity_score(true_label, predicted)
    print "log_loss", metrics.log_loss(true_label, predicted)
    print "zero_one_loss", metrics.zero_one_loss(true_label, predicted)
    print "AUC&ROC",metrics.roc_auc_score(true_label, predicted)
    print "matthews_corrcoef", metrics.matthews_corrcoef(true_label, predicted) 
示例3
def train_and_evaluate(clf, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)
    print ("Accuracy on training set:")
    print (clf.score(X_train, y_train))
    print ("Accuracy on testing set:")
    print (clf.score(X_test, y_test))
    y_pred = clf.predict(X_test)
    print ("Classification Report:")
    print (metrics.classification_report(y_test, y_pred))
    print ("Confusion Matrix:")
    print (metrics.confusion_matrix(y_test, y_pred))


# ===============================================================================
# from FaceDetectPredict.py
# =============================================================================== 
示例4
def eval_batch_col(classifier, val_dataset, batch_size, device):

    val_batch_generator = datasets.generate_batches_col(val_dataset,
                                               batch_size=batch_size,
                                               shuffle=False,
                                               drop_last=True,
                                               device=device)

    y_pred, y_true = [], []
    for batch_idx, batch_dict in enumerate(val_batch_generator):
        y = batch_dict["label"]
        X = batch_dict["data"]

        # Pred
        pred = classifier(X)
        y_pred.extend(pred.cpu().numpy())
        y_true.extend(y.cpu().numpy())

    
    report = classification_report(y_true, np.argmax(y_pred, axis=1), output_dict=True)
    return report

# evaluate and return prediction & true labels of a table batch 
示例5
def score_binary_classification(y, y_hat, report=True):
    """
    Create binary classification output
    :param y: true value
    :param y_hat: class 1 probabilities
    :param report:
    :return:
    """
    y_hat_class = [1 if x >= 0.5 else 0 for x in y_hat]  # convert probability to class for classification report

    report_string = "---Binary Classification Score--- \n"
    report_string += classification_report(y, y_hat_class)
    score = roc_auc_score(y, y_hat)
    report_string += "\nAUC = " + str(score)

    if report:
        print(report_string)

    return score, report_string 
示例6
def score_multiclass_classification(y, y_hat, report=True):
    """
    Create multiclass classification score
    :param y:
    :param y_hat:
    :return:
    """
    report_string = "---Multiclass Classification Score--- \n"
    report_string += classification_report(y, y_hat)
    score = accuracy_score(y, y_hat)
    report_string += "\nAccuracy = " + str(score)

    if report:
        print(report_string)

    return score, report_string 
示例7
def print_evaluation(model,data,ls,log=None):
    features,actual = data
    predictions = predict(model, features, 500).data.numpy().reshape(-1).tolist()

    labels = [ls.idx[i] for i, _ in enumerate(ls.idx)]

    actual = [labels[i] for i in actual]
    predictions = [labels[i] for i in predictions]

    print(accuracy_score(actual, predictions))
    print(classification_report(actual, predictions))
    print(confusion_matrix(actual, predictions))

    data = zip(actual,predictions)
    if log is not None:
        f = open(log, "w+")
        for a,p in data:
            f.write(json.dumps({"actual": a, "predicted": p}) + "\n")
        f.close() 
示例8
def evaluate(config, model, data_iter, test=False):
    model.eval()
    loss_total = 0
    predict_all = np.array([], dtype=int)
    labels_all = np.array([], dtype=int)
    with torch.no_grad():
        for texts, labels in data_iter:
            outputs = model(texts)
            loss = F.cross_entropy(outputs, labels)
            loss_total += loss
            labels = labels.data.cpu().numpy()
            predic = torch.max(outputs.data, 1)[1].cpu().numpy()
            labels_all = np.append(labels_all, labels)
            predict_all = np.append(predict_all, predic)

    acc = metrics.accuracy_score(labels_all, predict_all)
    if test:
        report = metrics.classification_report(labels_all, predict_all, target_names=config.class_list, digits=4)
        confusion = metrics.confusion_matrix(labels_all, predict_all)
        return acc, loss_total / len(data_iter), report, confusion
    return acc, loss_total / len(data_iter) 
示例9
def calc_test_result(result, test_label, test_mask):

  true_label=[]
  predicted_label=[]

  for i in range(result.shape[0]):
    for j in range(result.shape[1]):
      if test_mask[i,j]==1:
        true_label.append(np.argmax(test_label[i,j] ))
        predicted_label.append(np.argmax(result[i,j] ))
    
  print("Confusion Matrix :")
  print(confusion_matrix(true_label, predicted_label))
  print("Classification Report :")
  print(classification_report(true_label, predicted_label,digits=4))
  print("Accuracy ", accuracy_score(true_label, predicted_label))
  print("Macro Classification Report :")
  print(precision_recall_fscore_support(true_label, predicted_label,average='macro'))
  print("Weighted Classification Report :")
  print(precision_recall_fscore_support(true_label, predicted_label,average='weighted'))
  #print "Normal Classification Report :"
  #print precision_recall_fscore_support(true_label, predicted_label) 
示例10
def main():
    args = parse_args()

    features_extractor = FaceFeaturesExtractor()
    embeddings, labels, class_to_idx = load_data(args, features_extractor)
    clf = train(args, embeddings, labels)

    idx_to_class = {v: k for k, v in class_to_idx.items()}

    target_names = map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0]))
    print(metrics.classification_report(labels, clf.predict(embeddings), target_names=list(target_names)))

    if not os.path.isdir(MODEL_DIR_PATH):
        os.mkdir(MODEL_DIR_PATH)
    model_path = os.path.join('model', 'face_recogniser.pkl')
    joblib.dump(FaceRecogniser(features_extractor, clf, idx_to_class), model_path) 
示例11
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        svc_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = svc_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return svc_detector 
示例12
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        dt_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = dt_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return dt_detector 
示例13
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        lr_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = lr_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return lr_detector 
示例14
def learn_structure(self, samples):
        X_train, X_train_label, X_test, X_test_label = \
            self._generate_train_test_sets(samples, 0.75)
        logger.info('Training with ' + str(len(X_train)) +
                    'samples; testing with ' + str(len(X_test)) + ' samples.')

        rf_detector = self._get_best_detector(X_train, X_train_label)
        Y_test = rf_detector.predict(X_test)

        num_anomalies = Y_test[Y_test == ANOMALY].size
        logger.info('Found ' + str(num_anomalies) +
                    ' anomalies in testing set')

        logger.info('Confusion Matrix: \n{}'.
                    format(classification_report(
                        X_test_label,
                        Y_test,
                        target_names=['no', 'yes'])))
        return rf_detector 
示例15
def test_classification_report_multiclass():
    # Test performance report
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa       0.83      0.79      0.81        24
  versicolor       0.33      0.10      0.15        31
   virginica       0.42      0.90      0.57        20

    accuracy                           0.53        75
   macro avg       0.53      0.60      0.51        75
weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names)
    assert_equal(report, expected_report) 
示例16
def test_classification_report_multiclass_balanced():
    y_true, y_pred = [0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]

    expected_report = """\
              precision    recall  f1-score   support

           0       0.33      0.33      0.33         3
           1       0.33      0.33      0.33         3
           2       0.33      0.33      0.33         3

    accuracy                           0.33         9
   macro avg       0.33      0.33      0.33         9
weighted avg       0.33      0.33      0.33         9
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report) 
示例17
def test_classification_report_multiclass_with_label_detection():
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with label detection
    expected_report = """\
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        24
           1       0.33      0.10      0.15        31
           2       0.42      0.90      0.57        20

    accuracy                           0.53        75
   macro avg       0.53      0.60      0.51        75
weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report) 
示例18
def test_classification_report_multiclass_with_digits():
    # Test performance report with added digits in floating point values
    iris = datasets.load_iris()
    y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)

    # print classification report with class names
    expected_report = """\
              precision    recall  f1-score   support

      setosa    0.82609   0.79167   0.80851        24
  versicolor    0.33333   0.09677   0.15000        31
   virginica    0.41860   0.90000   0.57143        20

    accuracy                        0.53333        75
   macro avg    0.52601   0.59615   0.50998        75
weighted avg    0.51375   0.53333   0.47310        75
"""
    report = classification_report(
        y_true, y_pred, labels=np.arange(len(iris.target_names)),
        target_names=iris.target_names, digits=5)
    assert_equal(report, expected_report) 
示例19
def test_classification_report_multiclass_with_unicode_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    labels = np.array(["blue\xa2", "green\xa2", "red\xa2"])
    y_true = labels[y_true]
    y_pred = labels[y_pred]

    expected_report = """\
              precision    recall  f1-score   support

       blue\xa2       0.83      0.79      0.81        24
      green\xa2       0.33      0.10      0.15        31
        red\xa2       0.42      0.90      0.57        20

    accuracy                           0.53        75
   macro avg       0.53      0.60      0.51        75
weighted avg       0.51      0.53      0.47        75
"""
    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report) 
示例20
def test_classification_report_multiclass_with_long_string_label():
    y_true, y_pred, _ = make_prediction(binary=False)

    labels = np.array(["blue", "green" * 5, "red"])
    y_true = labels[y_true]
    y_pred = labels[y_pred]

    expected_report = """\
                           precision    recall  f1-score   support

                     blue       0.83      0.79      0.81        24
greengreengreengreengreen       0.33      0.10      0.15        31
                      red       0.42      0.90      0.57        20

                 accuracy                           0.53        75
                macro avg       0.53      0.60      0.51        75
             weighted avg       0.51      0.53      0.47        75
"""

    report = classification_report(y_true, y_pred)
    assert_equal(report, expected_report) 
示例21
def _fit_and_score(clf, domains, labels, train_index, test_index, repetition, data_set_id, fold_count):
    log.debug('Train index: {!s}\nTest index: {!s}'.format(train_index, test_index))

    clf_type = clf.clf_type

    clf.training(domains[train_index], labels[train_index])
    y_true, y_pred = clf.predict(domains[test_index], labels[test_index])
    if fold_count == -1:
        stats = Statistic(set_id=data_set_id,
                          id='logo_cv_{!s}_{!s}'.format(clf_type, data_set_id))
    else:
        stats = Statistic(set_id=data_set_id,
                          id='{!s}fold_cv_{!s}_rep{!s}_{!s}'.format(fold_count, clf_type, repetition, data_set_id))

    stats.add_run(y_true, y_pred, domains[test_index])
    log.verbose('Truth vs. Prediction: \n' + str(list(y_true)) + '\n' + str(list(y_pred)))
    log.debug('\n' + classification_report(y_true, y_pred, target_names=['Benign', 'Malicious']))
    log.debug('\n' + str(confusion_matrix(y_true, y_pred)))
    log.debug('MIssclassifications: {!s}'.format(stats.missclassified))

    return stats, data_set_id 
示例22
def simple_evaluate(y_true, y_pred):
    """
    evaluate precision, recall, f1
    :param y_true:
    :param y_pred:
    :return:score
    """
    assert len(y_true) == len(y_pred), \
        "the count of pred label should be same with true label"

    classify_report = metrics.classification_report(y_true, y_pred)
    confusion_matrix = metrics.confusion_matrix(y_true, y_pred)
    overall_accuracy = metrics.accuracy_score(y_true, y_pred)
    acc_for_each_class = metrics.precision_score(y_true, y_pred, average=None)
    average_accuracy = np.mean(acc_for_each_class)
    score = metrics.accuracy_score(y_true, y_pred)
    print('classify_report : \n', classify_report)
    print('confusion_matrix : \n', confusion_matrix)
    print('acc_for_each_class : \n', acc_for_each_class)
    print('average_accuracy: {0:f}'.format(average_accuracy))
    print('overall_accuracy: {0:f}'.format(overall_accuracy))
    print('score: {0:f}'.format(score))
    return score 
示例23
def eval(model, test_data, test_label, thresholds=0.5, num_classes=2, pr_figure_path=None, pred_save_path=None):
    print('{0}, val mean acc:{1}'.format(model.__str__(), model.score(test_data, test_label)))
    if num_classes == 2:
        # binary classification
        label_pred_probas = model.predict_proba(test_data)[:, 1]
        label_pred = label_pred_probas > thresholds
        precision, recall, threshold = precision_recall_curve(test_label, label_pred)
        plot_pr(thresholds, precision, recall, figure_path=pr_figure_path)
    else:
        # multi
        label_pred = model.predict(test_data)
        # precision_recall_curve: multiclass format is not supported
    print(classification_report(test_label, label_pred))
    if pred_save_path:
        with open(pred_save_path, 'w', encoding='utf-8') as f:
            for i in label_pred:
                f.write(str(i) + '\n')
    return label_pred 
示例24
def skl_knn(self):
        """k: number of neighbors to use in classification
        test_data: the data/targets used to test the classifier
        stored_data: the data/targets used to classify the test_data
        """
        fifty_x, fifty_y = self.mk_dataset(50000)
        test_img = [self.data[i] for i in self.indx[60000:70000]]
        test_img1 = np.array(test_img)
        test_target = [self.target[i] for i in self.indx[60000:70000]]
        test_target1 = np.array(test_target)
        self.classifier.fit(fifty_x, fifty_y)

        y_pred = self.classifier.predict(test_img1)
        pickle.dump(self.classifier, open('knn.sav', 'wb'))
        print(classification_report(test_target1, y_pred))
        print("KNN Classifier model saved as knn.sav!") 
示例25
def bio_classification_report(y_true, y_pred):
    """
    Classification report for a l ist of BIOSE-encoded sequences.
    It computes token-level metrics and discards 'O' labels.
    :param y_true:
    :param y_pred:
    :return:
    """
    lb = LabelBinarizer()
    y_true_combined = lb.fit_transform(y_true)
    y_pred_combined = lb.transform(y_pred)

    tagset = set(lb.classes_) - {'O'}
    tagset = set(lb.classes_)
    tagset = sorted(tagset, key=lambda tag: tag.split('-', 1)[::-1])
    class_indices = {
        cls: idx for idx, cls in enumerate(lb.classes_)
    }

    return classification_report(
        y_true_combined,
        y_pred_combined,
        labels=[class_indices[cls] for cls in tagset],
        target_names=tagset
    ) 
示例26
def print_metric(self, y_true, y_pred, mode):

        if mode in ["train", "test"]:
            print(mode)
            if (self.config.data == "dailydialog"):
                print(classification_report(y_true, y_pred, labels=[1,2,3,4,5,6], digits=4))
            else:
                print(classification_report(y_true, y_pred, digits=4))
        

        if (self.config.data == "dailydialog"):
            weighted_fscore = classification_report(y_true, y_pred, labels=[1,2,3,4,5,6], output_dict=True, digits=4)["weighted avg"]["f1-score"]
        else:
            weighted_fscore = classification_report(y_true, y_pred, output_dict=True, digits=4)["weighted avg"]["f1-score"]

        return weighted_fscore 
示例27
def report(y_true: List[int], y_pred: List[int], labels: List[int],
           class_name: List[str] = None, report_name: str = "Default Name") -> Tuple:
    print("{} Result Reporting:".format(report_name))

    if class_name is None:
        class_name = ["class {}".format(i) for i in labels]

    assert len(class_name) == len(labels)

    cmatrix = confusion_matrix(y_true, y_pred, labels=labels)

    print("Sklearn Confusion Matrix:")
    print(cmatrix)
    print(classification_report(y_true, y_pred, target_names=class_name, digits=5))

    RkCC = compute_RkCC(cmatrix)
    print('Rk correlation coefficient = %.4f' % RkCC)

    return cmatrix, RkCC 
示例28
def eval(self, test_x, test_y, crf_model):
        tagger = pycrfsuite.Tagger()
        tagger.open(crf_model)

        y_pred = []
        for feat_list in test_x:
            preds = tagger.tag(feat_list)
            y_pred.append(preds)

        lb = LabelBinarizer()
        y_true_all = lb.fit_transform(list(chain.from_iterable(test_y)))
        y_pred_all = lb.transform(list(chain.from_iterable(y_pred)))

        tagset = sorted(set(lb.classes_))
        class_indices = {cls: idx for idx, cls in enumerate(lb.classes_)}

        print(classification_report(
            y_true_all,
            y_pred_all,
            labels=[class_indices[cls] for cls in tagset],
            target_names=tagset,
            digits=5
        )) 
示例29
def _evaluate_final(self, model, xy_test, batch_size, history):
        res = {}
        pred_test = None

        if 'val_acc' in history.history:
            res['val_acc'] = max(history.history['val_acc'])
            rev_ix = -1 - list(reversed(history.history['val_acc'])).index(res['val_acc'])
            res['val_loss'] = history.history['val_loss'][rev_ix]

        res['acc'] = history.history['acc'][-1]
        res['loss'] = history.history['loss'][-1]

        if len(xy_test[0]):
            from sklearn.metrics import classification_report, roc_auc_score
            # evaluate with test data
            x_test, y_test = xy_test
            pred_test = model.predict(x_test, batch_size=batch_size, verbose=0)
            test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=batch_size, verbose=0)
            res['test_loss'] = test_loss
            res['test_acc'] = test_acc

            report = classification_report(y_true = np.argmax(y_test, axis=1),
                                           y_pred = np.argmax(pred_test, axis=1),
                                           target_names=self.labels,
                                           digits=4,
                                           output_dict=True)

            res['auc'] = roc_auc_score(y_test.astype(np.int), pred_test)

            for label in self.labels:
                stats = report[label]
                res[label+"-precision"] = stats['precision']
                res[label+"-recall"] = stats['recall']
                res[label+"-f1"] = stats['f1-score']

        return pred_test, res 
示例30
def perf(y_true, y_pred, y_score):
    """perf."""
    print('Accuracy: %.2f' % accuracy_score(y_true, y_pred))
    print(' AUC ROC: %.2f' % roc_auc_score(y_true, y_score))
    print('  AUC AP: %.2f' % average_precision_score(y_true, y_score))
    print()
    print('Classification Report:')
    print(classification_report(y_true, y_pred))
    print()
    plot_confusion_matrices(y_true, y_pred, size=int(len(set(y_true)) * 2.5))
    print()
    plot_aucs(y_true, y_score, size=10)