Python源码示例:sklearn.metrics.precision_recall_fscore_support()

示例1
def accuracy(y_true, y_pred):        
    # 计算混淆矩阵
    y = np.zeros(len(y_true))
    y_ = np.zeros(len(y_true))    
    for i in range(len(y_true)): 
        y[i] = np.argmax(y_true[i,:])
        y_[i] = np.argmax(y_pred[i,:])
    cnf_mat = confusion_matrix(y, y_)
    
    # Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
    # Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
    # Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
    
    # # 绘制ROC曲线
    # fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
    # Auc = auc(fpr, tpr)
    
    
    # 计算多分类评价值
    Sens = recall_score(y, y_, average='macro')
    Prec = precision_score(y, y_, average='macro')
    F1 = f1_score(y, y_, average='weighted') 
    Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
    return Sens, Prec, F1, cnf_mat 
示例2
def calc_test_result(result, test_label, test_mask):

  true_label=[]
  predicted_label=[]

  for i in range(result.shape[0]):
    for j in range(result.shape[1]):
      if test_mask[i,j]==1:
        true_label.append(np.argmax(test_label[i,j] ))
        predicted_label.append(np.argmax(result[i,j] ))
    
  print("Confusion Matrix :")
  print(confusion_matrix(true_label, predicted_label))
  print("Classification Report :")
  print(classification_report(true_label, predicted_label,digits=4))
  print("Accuracy ", accuracy_score(true_label, predicted_label))
  print("Macro Classification Report :")
  print(precision_recall_fscore_support(true_label, predicted_label,average='macro'))
  print("Weighted Classification Report :")
  print(precision_recall_fscore_support(true_label, predicted_label,average='weighted'))
  #print "Normal Classification Report :"
  #print precision_recall_fscore_support(true_label, predicted_label) 
示例3
def test_precision_recall_f1_score_binary_averaged():
    y_true = np.array([0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1])
    y_pred = np.array([1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1])

    # compute scores with default labels introspection
    ps, rs, fs, _ = precision_recall_fscore_support(y_true, y_pred,
                                                    average=None)
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                 average='macro')
    assert_equal(p, np.mean(ps))
    assert_equal(r, np.mean(rs))
    assert_equal(f, np.mean(fs))
    p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                 average='weighted')
    support = np.bincount(y_true)
    assert_equal(p, np.average(ps, weights=support))
    assert_equal(r, np.average(rs, weights=support))
    assert_equal(f, np.average(fs, weights=support)) 
示例4
def test_precision_recall_f1_no_labels(beta, average):
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    p, r, f, s = assert_warns(UndefinedMetricWarning,
                              precision_recall_fscore_support,
                              y_true, y_pred, average=average,
                              beta=beta)
    assert_almost_equal(p, 0)
    assert_almost_equal(r, 0)
    assert_almost_equal(f, 0)
    assert_equal(s, None)

    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                         y_true, y_pred,
                         beta=beta, average=average)
    assert_almost_equal(fbeta, 0) 
示例5
def test_precision_recall_f1_no_labels_average_none():
    y_true = np.zeros((20, 3))
    y_pred = np.zeros_like(y_true)

    beta = 1

    # tp = [0, 0, 0]
    # fn = [0, 0, 0]
    # fp = [0, 0, 0]
    # support = [0, 0, 0]
    # |y_hat_i inter y_i | = [0, 0, 0]
    # |y_i| = [0, 0, 0]
    # |y_hat_i| = [0, 0, 0]

    p, r, f, s = assert_warns(UndefinedMetricWarning,
                              precision_recall_fscore_support,
                              y_true, y_pred, average=None, beta=beta)
    assert_array_almost_equal(p, [0, 0, 0], 2)
    assert_array_almost_equal(r, [0, 0, 0], 2)
    assert_array_almost_equal(f, [0, 0, 0], 2)
    assert_array_almost_equal(s, [0, 0, 0], 2)

    fbeta = assert_warns(UndefinedMetricWarning, fbeta_score,
                         y_true, y_pred, beta=beta, average=None)
    assert_array_almost_equal(fbeta, [0, 0, 0], 2) 
示例6
def test_fbeta_multiclass_with_weighted_average(self, device: str):
        self.predictions = self.predictions.to(device)
        self.targets = self.targets.to(device)

        labels = [0, 1]
        fbeta = FBetaMeasure(average="weighted", labels=labels)
        fbeta(self.predictions, self.targets)
        metric = fbeta.get_metric()
        precisions = metric["precision"]
        recalls = metric["recall"]
        fscores = metric["fscore"]

        weighted_precision, weighted_recall, weighted_fscore, _ = precision_recall_fscore_support(
            self.targets.cpu().numpy(),
            self.predictions.argmax(dim=1).cpu().numpy(),
            labels=labels,
            average="weighted",
        )

        # check value
        assert_allclose(precisions, weighted_precision)
        assert_allclose(recalls, weighted_recall)
        assert_allclose(fscores, weighted_fscore) 
示例7
def print_result(fold, y, y_predicted, id_class_mapping):
    """ print result matrix """

    n_classes = len(np.unique(y))

    p, r, f, s = precision_recall_fscore_support(y, y_predicted, labels=None, pos_label=1, average=None)
    a = [(accuracy_score(y[y == c], y_predicted[y == c])) for c in xrange(n_classes)]

    # count occurrences of classes
    count = Counter(y)

    print("\n")
    if fold is not None:
        print("Results on fold %d" % fold)
    print("\n")
    print("%30s  |  %s  |  %5s  |  %4s  |  %4s  |   %4s   |" % ("LABEL", "CNT", "ACC ", "PR ", "RE ", "F1 "))
    print('-' * 70)
    for c in xrange(n_classes):
        print("%30s  |  %03d  |  %0.3f  |  %.2f  |  %.2f  |  %.3f   |" % (id_class_mapping[c], count[c], a[c], p[c], r[c], f[c]))
    print('-' * 70)
    print("%30s  |  %03d  |  %0.3f  |  %.2f  |  %.2f  |  %.3f   |" % ('average', len(y), np.mean(a), np.mean(p), np.mean(r), np.mean(f)))
    print('=' * 70)
    print("Overall Accuracy: %.3f %%" % (100.0 * accuracy_score(y, y_predicted)))
    print('=' * 70) 
示例8
def _update_onco_metrics(self, y_true, y_pred, prob):
        self.onco_gene_pred = pd.Series(y_pred, self.y.index)
        self.onco_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.onco_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        self.onco_precision[self.num_pred] = prec[self.onco_num]
        self.onco_recall[self.num_pred] = recall[self.onco_num]
        self.onco_f1_score[self.num_pred] = fscore[self.onco_num]
        self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr)
        #self.onco_mean_tpr[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p)
        self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh) 
示例9
def _update_tsg_metrics(self, y_true, y_pred, prob):
        self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
        self.tsg_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.tsg_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        tsg_col = 1  # column for metrics relate to tsg
        self.tsg_precision[self.num_pred] = prec[tsg_col]
        self.tsg_recall[self.num_pred] = recall[tsg_col]
        self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
        self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
        #self.tsg_tpr_array[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p) 
示例10
def metrics(y_pred, y_true):
    """ Calucate evaluation metrics for precision, recall, and f1.

    Arguments
    ---------
        y_pred: ndarry, the predicted result list
        y_true: ndarray, the ground truth label list

    Returns
    -------
        precision: float, precision value
        recall: float, recall value
        f1: float, f1 measure value
    """
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='binary')
    return precision, recall, f1 
示例11
def validation(classifier, data, y_data, y_target, class_names, title):
    #kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
    #cv = kfold
    t = 'Confusion matrix: '+str(title)
    x =  np.transpose(data)
    if (classifier == None):
        print ("No accuracy to be computed")
    else:
        accuracy = model_selection.cross_val_score(classifier, x, y_target, scoring='accuracy')
        print("Accuracy: "+ str(accuracy))
    #precision = model_selection.cross_val_score(self.classifier, x, target, scoring='precision')
    #precision_score(y_true, y_pred, average='macro')  
    #recall = model_selection.cross_val_score(self.classifier, x, target, scoring='recall')
    precision, recall, fscore, m = precision_recall_fscore_support(y_target, y_data, average='macro')
    cnf_matrix = confusion_matrix(y_target, y_data)
    print("Precision: " +str(precision) +", Recall:" +str(recall) + ", f-score:" +str(fscore))

    np.set_printoptions(precision=2)
    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, title=t)
    print ("... finishing matrix plot")
    plt.show() 
示例12
def evaluate(model, data_iterator, num_steps, metric_labels):
    """Evaluate the model on `num_steps` batches."""
    # set model to evaluation mode
    model.eval()

    output_labels = list()
    target_labels = list()

    # compute metrics over the dataset
    for _ in range(num_steps):
        # fetch the next evaluation batch
        batch_data, batch_labels = next(data_iterator)
        
        # compute model output
        batch_output = model(batch_data)  # batch_size x num_labels
        batch_output_labels = torch.max(batch_output, dim=1)[1]
        output_labels.extend(batch_output_labels.data.cpu().numpy().tolist())
        target_labels.extend(batch_labels.data.cpu().numpy().tolist())

    # Calculate precision, recall and F1 for all relation categories
    p_r_f1_s = precision_recall_fscore_support(target_labels, output_labels, labels=metric_labels, average='micro')
    p_r_f1 = {'precison': p_r_f1_s[0] * 100,
              'recall': p_r_f1_s[1] * 100,
              'f1': p_r_f1_s[2] * 100}
    return p_r_f1 
示例13
def summary_util(self, type):
        if type == "test":
            Y_hat = self.model.predict(self.X_test)
            Y = self.Y_test
        elif type == "train":
            Y_hat = self.model.predict(self.X_train) 
            Y = self.Y_train
        elif type == "val":
            Y_hat = self.model.predict(self.X_val)
            Y = self.Y_val
        elif type == "forecast":
            Y_hat = self.model.predict(self.X_forecast) 
            Y = self.Y_forecast

        Y_pred = Y_hat  > 0.5

        precision,recall,F1,junk = precision_recall_fscore_support(Y,Y_pred)
        out = dict()
        out['precision']=precision[1]
        out['recall']=recall[1]
        out['F1']=F1[1]

        return out 
示例14
def boot_human(i, sample_size=sample_size):
    np.random.seed(seed=i)
    random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
    test = np.array([p_2_id_sub[pid] for pid in random_pids])
    boot_list = []
    for ids in test:
        size = len(ids)
        boot_list.append(np.random.choice(ids))    
    y_pred_sub = human_authored[boot_list, :]
    y_true_sub = subset_y[boot_list, :]
    # evaluate model
#     print('calculating')

    
    output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
    precision = output[0][2]
    recall = output[1][2]
    f1 = output[2][2]

#     print('done')
    return precision, recall, f1 
示例15
def boot_human_clinic(i, sample_size=sample_size):
    np.random.seed(seed=i)
    random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
    test = np.array([p_2_id_sub[pid] for pid in random_pids])
    boot_list = []
    for ids in test:
        size = len(ids)
        boot_list.append(np.random.choice(ids))   
    y_pred_sub = subset_ClinicNet[boot_list, :]
    y_true_sub = subset_y[boot_list, :]
    auroc = roc_auc_score(y_true_sub, y_pred_sub, average='micro')
    avg_precision = average_precision_score(y_true_sub, y_pred_sub, average='micro')
    y_pred_sub[y_pred_sub<threshold_clinicnet] = 0
    y_pred_sub[y_pred_sub>=threshold_clinicnet] = 1
    # evaluate model
#     print('calculating')
    output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
#     print('done')
    precision = output[0][1]
    recall = output[1][1]
    f1 = output[2][1]
#     print('done')
    return auroc, avg_precision, precision, recall, f1 
示例16
def boot_human_logistic(i, sample_size=sample_size):
    np.random.seed(seed=i)
    random_pids = np.random.choice(pt_list_unique_sub, size=sample_size, replace=True)
    test = np.array([p_2_id_sub[pid] for pid in random_pids])
    boot_list = []
    for ids in test:
        size = len(ids)
        boot_list.append(np.random.choice(ids))   
    y_pred_sub = subset_log[boot_list, :]
    y_true_sub = subset_y[boot_list, :]
    auroc = roc_auc_score(y_true_sub, y_pred_sub, average='micro')
    avg_precision = average_precision_score(y_true_sub, y_pred_sub, average='micro')
    y_pred_sub[y_pred_sub<threshold_log] = 0
    y_pred_sub[y_pred_sub>=threshold_log] = 1
    # evaluate model
#     print('calculating')
    output = precision_recall_fscore_support(y_true_sub.flatten(), y_pred_sub.flatten())
#     print('done')
    precision = output[0][1]
    recall = output[1][1]
    f1 = output[2][1]
#     print('done')
    return auroc, avg_precision, precision, recall, f1 
示例17
def eval_epoch(model, sess, eval_set):
    result, labels = [], []
    avg_loss, avg_acc, steps, total_len = 0, 0, 0, 0
    for batch in eval_set.next_batch():
        steps += 1
        predictions, batch_loss, batch_acc = sess.run([model.pred, model.loss, model.accuracy],
                                                      feed_dict={model.dropout_keep_prob: 1.0,
                                                                 model.input_x: batch.texts, model.input_y: batch.labels})
        batch_len = len(batch.texts)
        avg_loss += batch_loss * batch_len
        avg_acc += batch_acc * batch_len
        total_len += batch_len

        result.extend(predictions.tolist())
        labels.extend(batch.labels.tolist())

    avg_loss, avg_acc = avg_loss / total_len, avg_acc / total_len
    precision, recall, fscore, support = precision_recall_fscore_support(labels, result, average='weighted')
    metrics = {'loss': avg_loss, 'accuracy': avg_acc, 'precision': precision, 'recall': recall, 'fscore': fscore }

    return metrics, result 
示例18
def _get_class_stats(y_true, y_pred, labels):
        """
        Method for getting some basic statistics by class.

        Returns:
            dict: A structured dictionary containing precision, recall, f_beta, and support \
                  vectors (1 x number of classes)
        """
        precision, recall, f_beta, support = score(
            y_true=y_true, y_pred=y_pred, labels=labels
        )

        stats = {
            "precision": precision,
            "recall": recall,
            "f_beta": f_beta,
            "support": support,
        }
        return stats 
示例19
def get_dice(pred, gt, num_labels):
    if num_labels != 2:
        print('Dice evaluation score is only implemented for 2 labels')
        sys.exit()
    return 1.0 - scipy.spatial.distance.dice(pred.reshape(-1), gt.reshape(-1))

#f1 score at beta = 1 is the same as dice score

# recall = (num detected WMH) / (num true WMH) 
示例20
def evaluate(model: DetectModel, loader: DataLoader, config: dict)->tuple:
    batch_size = loader.batch_size
    device = torch.device("cuda:{}".format(args.cuda) if args.cuda else 'cpu')
    criterion = nn.CrossEntropyLoss(reduction='none')

    total_loss = []
    total_output = []
    total_labels = []

    with torch.no_grad():
        initial_hidden_state = torch.zeros(config['rnn_layers'], batch_size, config['hidden_size'], dtype=torch.float).to(device)
        for step, (seq_inputs, labels) in enumerate(loader):
            seq_inputs = torch.tensor(seq_inputs, dtype=torch.float).to(device)
            labels = torch.tensor(labels, dtype=torch.long).to(device)

            output = model(seq_inputs, initial_hidden_state)
            loss = criterion(output, labels)
            total_loss.append(loss)
            output = F.softmax(output, dim=1).argmax(dim=1)
            total_output.append(output)
            total_labels.append(labels)

        total_loss = torch.cat(total_loss).contiguous().mean()
        total_output = torch.cat(total_output).cpu().numpy()
        total_labels = torch.cat(total_labels).cpu().numpy()

        precision, recall, f1, _ = precision_recall_fscore_support(total_labels, total_output, labels=[0, 1])

    return precision, recall, f1, total_loss 
示例21
def f1_weighted(y_true, y_pred):
    '''
    This method is used to supress UndefinedMetricWarning
    in f1_score of scikit-learn.
    "filterwarnings" doesn't work in CV with multiprocess.
    '''
    _, _, f, _ = precision_recall_fscore_support(y_true, y_pred,
                                                 beta=1,
                                                 labels=None,
                                                 pos_label=1,
                                                 average='weighted',
                                                 warn_for=(),
                                                 sample_weight=None)
    return f 
示例22
def calc_test_result(self, pred_label, test_label, test_mask):

		true_label=[]
		predicted_label=[]

		for i in range(pred_label.shape[0]):
			for j in range(pred_label.shape[1]):
				if test_mask[i,j]==1:
					true_label.append(np.argmax(test_label[i,j] ))
					predicted_label.append(np.argmax(pred_label[i,j] ))
		print("Confusion Matrix :")
		print(confusion_matrix(true_label, predicted_label))
		print("Classification Report :")
		print(classification_report(true_label, predicted_label, digits=4))
		print('Weighted FScore: \n ', precision_recall_fscore_support(true_label, predicted_label, average='weighted')) 
示例23
def cnf_roc(y_true, y_pred, classes, isPlot, save_tag = ''):
    # 计算混淆矩阵
    y = np.zeros(len(y_true))
    y_ = np.zeros(len(y_true))    
    for i in range(len(y_true)): 
        y[i] = np.argmax(y_true[i,:])
        y_[i] = np.argmax(y_pred[i,:])
    cnf_mat = confusion_matrix(y, y_)
    print cnf_mat
    
    if isPlot:
        # # 绘制混淆矩阵
        plot_confusion_matrix(cnf_mat, range(classes), save_tag=save_tag)
        # # 绘制ROC曲线
        plot_roc_curve(y_true, y_pred, range(classes), save_tag)

    if classes > 2: 
        # 计算多分类评价值
        Sens = recall_score(y, y_, average='macro')
        Prec = precision_score(y, y_, average='macro')
        F1 = f1_score(y, y_, average='weighted') 
        Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
        print Support
        return Sens, Prec, F1, cnf_mat
    else:
        Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
        Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
        Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
        # 计算AUC值
        Auc = roc_auc_score(y_true[:,1], y_pred[:,1])
        return Acc, Sens, Spec, Auc 
示例24
def run_evaluate(self, test, report=True):
        """Evaluates performance on test set

        Args:
            test: dataset that yields tuple of (sentences, tags)

        Returns:
            metrics: (dict) metrics["acc"] = 98.4, ...

        """
        accs = []
        labs = []
        labs_pred = []
        for words, labels in minibatches(test, self.config.batch_size):
            labels_pred, document_lengths = self.predict_batch(words)

            for lab, lab_pred, length in zip(labels, labels_pred,
                                             document_lengths):
                lab      = lab[:length]
                lab_pred = lab_pred[:length]
                accs    += [a==b for (a, b) in zip(lab, lab_pred)]

                labs.extend(lab)
                labs_pred.extend(lab_pred)

        labs = [self.idx_to_tag[lab].split('_')[0] for lab in labs]
        labs_pred = [self.idx_to_tag[lab_pred].split('_')[0] for lab_pred in labs_pred]
        _, _, macro_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='macro')
        _, _, micro_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='micro')
        _, _, weighted_f1, _ = precision_recall_fscore_support(labs, labs_pred, average='weighted')
        acc = np.mean(accs)

        if report == True:
            class_report = classification_report(labs, labs_pred, digits=4)
            print(class_report)
            confusion = confusion_matrix(labs, labs_pred)
            print(confusion)

        return {"acc": 100*acc, "macro-f1": 100*macro_f1, "micro-f1": 100*micro_f1, 
                "weighted-f1": 100*weighted_f1, "classification-report": class_report, 
                "confusion-matrix": confusion} 
示例25
def test_precision_recall_f1_score_binary():
    # Test Precision Recall and F1 Score for binary classification task
    y_true, y_pred, _ = make_prediction(binary=True)

    # detailed measures for each class
    p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None)
    assert_array_almost_equal(p, [0.73, 0.85], 2)
    assert_array_almost_equal(r, [0.88, 0.68], 2)
    assert_array_almost_equal(f, [0.80, 0.76], 2)
    assert_array_equal(s, [25, 25])

    # individual scoring function that can be used for grid search: in the
    # binary class case the score is the value of the measure for the positive
    # class (e.g. label == 1). This is deprecated for average != 'binary'.
    for kwargs, my_assert in [({}, assert_no_warnings),
                              ({'average': 'binary'}, assert_no_warnings)]:
        ps = my_assert(precision_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(ps, 0.85, 2)

        rs = my_assert(recall_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(rs, 0.68, 2)

        fs = my_assert(f1_score, y_true, y_pred, **kwargs)
        assert_array_almost_equal(fs, 0.76, 2)

        assert_almost_equal(my_assert(fbeta_score, y_true, y_pred, beta=2,
                                      **kwargs),
                            (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2) 
示例26
def test_precision_recall_fscore_support_errors():
    y_true, y_pred, _ = make_prediction(binary=True)

    # Bad beta
    assert_raises(ValueError, precision_recall_fscore_support,
                  y_true, y_pred, beta=0.0)

    # Bad pos_label
    assert_raises(ValueError, precision_recall_fscore_support,
                  y_true, y_pred, pos_label=2, average='binary')

    # Bad average option
    assert_raises(ValueError, precision_recall_fscore_support,
                  [0, 1, 2], [1, 2, 0], average='mega') 
示例27
def test_precision_recall_f_unused_pos_label():
    # Check warning that pos_label unused when set to non-default value
    # but average != 'binary'; even if data is binary.
    assert_warns_message(UserWarning,
                         "Note that pos_label (set to 2) is "
                         "ignored when average != 'binary' (got 'macro'). You "
                         "may use labels=[pos_label] to specify a single "
                         "positive class.", precision_recall_fscore_support,
                         [1, 2, 1], [1, 2, 2], pos_label=2, average='macro') 
示例28
def calculate_precision_and_recall(outputs, targets, pos_label=1):
    with torch.no_grad():
        _, pred = outputs.topk(1, 1, largest=True, sorted=True)
        precision, recall, _, _ = precision_recall_fscore_support(
            targets.view(-1, 1).cpu().numpy(),
            pred.cpu().numpy())

        return precision[pos_label], recall[pos_label] 
示例29
def eval_performance(y_true, y_pred):
    '''
    Evaluate the performance of a multiclass classification model.
    :param y_true: the gold-standard labels
    :param y_pred: the predictions
    :return: mean F1
    '''
    pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred, average='weighted')
    print '=== Performance ==='
    print 'Mean precision:  %.03f%%' % pre  # (100*sum(pre * support)/sum(support))
    print 'Mean recall:     %.03f%%' % rec  # (100*sum(rec * support)/sum(support))
    print 'Mean F1:         %.03f%%' % f1  # mean_f1
    return pre, rec, f1, support 
示例30
def eval_performance(y_true, y_pred, tagnames):
    pre, rec, f1, support = metrics.precision_recall_fscore_support(y_true, y_pred)
    print "=== Performance (omitting 'O' class) ==="
    print "Mean precision:  %.02f%%" % (100*sum(pre[1:] * support[1:])/sum(support[1:]))
    print "Mean recall:     %.02f%%" % (100*sum(rec[1:] * support[1:])/sum(support[1:]))
    print "Mean F1:         %.02f%%" % (100*sum(f1[1:] * support[1:])/sum(support[1:]))


##
# Implement this!
##