Python源码示例:sklearn.metrics.precision_recall_curve()

示例1
def plot_precision_recall_curve(y_true, y_score, size=None):
    """plot_precision_recall_curve."""
    precision, recall, thresholds = precision_recall_curve(y_true, y_score)
    if size is not None:
        plt.figure(figsize=(size, size))
        plt.axis('equal')
    plt.plot(recall, precision, lw=2, color='navy')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([-0.05, 1.05])
    plt.xlim([-0.05, 1.05])
    plt.grid()
    plt.title('Precision-Recall AUC={0:0.2f}'.format(average_precision_score(
        y_true, y_score))) 
示例2
def save_precision_recall_curve(eval_labels, pred_labels, average_precision, smell, config, out_folder, dim, method):
    fig = plt.figure()
    precision, recall, _ = precision_recall_curve(eval_labels, pred_labels)

    step_kwargs = ({'step': 'post'}
                   if 'step' in signature(plt.fill_between).parameters
                   else {})
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    if isinstance(config, cfg.CNN_config):
        title_str = smell + " (" + method + " - " + dim + ") - L=" + str(config.layers) + ", E=" + str(config.epochs) + ", F=" + str(config.filters) + \
                    ", K=" + str(config.kernel) + ", PW=" + str(config.pooling_window) + ", AP={0:0.2f}".format(average_precision)
    # plt.title(title_str)
    # plt.show()
    file_name = get_plot_file_name(smell, config, out_folder, dim, method, "_prc_")
    fig.savefig(file_name) 
示例3
def precision_recall_auc(loss_file,reverse,smoothing):
    if not os.path.isdir(loss_file):
        loss_file_list = [loss_file]
    else:
        loss_file_list = os.listdir(loss_file)
        loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]

    optimal_results = RecordResult()
    for sub_loss_file in loss_file_list:
        dataset, scores, labels = get_scores_labels(sub_loss_file,reverse,smoothing)
        precision, recall, thresholds = metrics.precision_recall_curve(labels, scores, pos_label=0)
        auc = metrics.auc(recall, precision)

        results = RecordResult(recall, precision, auc, dataset, sub_loss_file)

        if optimal_results < results:
            optimal_results = results

        if os.path.isdir(loss_file):
            print(results)
    print('##### optimal result and model PR-AUC = {}'.format(optimal_results))
    return optimal_results 
示例4
def __call__(self, pos_triples, neg_triples=None):
        triples = pos_triples + neg_triples
        labels = [1 for _ in range(len(pos_triples))] + [0 for _ in range(len(neg_triples))]

        Xr, Xe = [], []
        for (s_idx, p_idx, o_idx), label in zip(triples, labels):
            Xr += [[p_idx]]
            Xe += [[s_idx, o_idx]]

        ascores = self.scoring_function([Xr, Xe])
        ays = np.array(labels)

        if self.rescale_predictions:
            diffs = np.diff(np.sort(ascores))
            min_diff = min(abs(diffs[np.nonzero(diffs)]))

            if min_diff < 1e-8:
                ascores = (ascores * (1e-7 / min_diff)).astype(np.float64)

        aucroc_value = metrics.roc_auc_score(ays, ascores)
        precision, recall, thresholds = metrics.precision_recall_curve(ays, ascores, pos_label=1)
        aucpr_value = metrics.auc(recall, precision)

        return aucroc_value, aucpr_value 
示例5
def threshold_by_f1(true_vessels, generated, masks, flatten=True, f1_score=False):
    vessels_in_mask, generated_in_mask = pixel_values_in_mask(true_vessels, generated, masks)
    precision, recall, thresholds = precision_recall_curve(vessels_in_mask.flatten(), generated_in_mask.flatten(),  pos_label=1)
    best_f1,best_threshold=best_f1_threshold(precision, recall, thresholds)
        
    pred_vessels_bin=np.zeros(generated.shape)
    pred_vessels_bin[generated>=best_threshold]=1
    
    if flatten:
        if f1_score:
            return pred_vessels_bin[masks==1].flatten(), best_f1
        else:
            return pred_vessels_bin[masks==1].flatten()
    else:
        if f1_score:
            return pred_vessels_bin, best_f1
        else:
            return pred_vessels_bin 
示例6
def _average_precision_slow(y_true, y_score):
    """A second alternative implementation of average precision that closely
    follows the Wikipedia article's definition (see References). This should
    give identical results as `average_precision_score` for all inputs.

    References
    ----------
    .. [1] `Wikipedia entry for the Average precision
       <https://en.wikipedia.org/wiki/Average_precision>`_
    """
    precision, recall, threshold = precision_recall_curve(y_true, y_score)
    precision = list(reversed(precision))
    recall = list(reversed(recall))
    average_precision = 0
    for i in range(1, len(precision)):
        average_precision += precision[i] * (recall[i] - recall[i - 1])
    return average_precision 
示例7
def test_precision_recall_curve():
    y_true, _, probas_pred = make_prediction(binary=True)
    _test_precision_recall_curve(y_true, probas_pred)

    # Use {-1, 1} for labels; make sure original labels aren't modified
    y_true[np.where(y_true == 0)] = -1
    y_true_copy = y_true.copy()
    _test_precision_recall_curve(y_true, probas_pred)
    assert_array_equal(y_true_copy, y_true)

    labels = [1, 0, 0, 1]
    predict_probas = [1, 2, 3, 4]
    p, r, t = precision_recall_curve(labels, predict_probas)
    assert_array_almost_equal(p, np.array([0.5, 0.33333333, 0.5, 1., 1.]))
    assert_array_almost_equal(r, np.array([1., 0.5, 0.5, 0.5, 0.]))
    assert_array_almost_equal(t, np.array([1, 2, 3, 4]))
    assert_equal(p.size, r.size)
    assert_equal(p.size, t.size + 1) 
示例8
def _test_precision_recall_curve(y_true, probas_pred):
    # Test Precision-Recall and aread under PR curve
    p, r, thresholds = precision_recall_curve(y_true, probas_pred)
    precision_recall_auc = _average_precision_slow(y_true, probas_pred)
    assert_array_almost_equal(precision_recall_auc, 0.859, 3)
    assert_array_almost_equal(precision_recall_auc,
                              average_precision_score(y_true, probas_pred))
    assert_almost_equal(_average_precision(y_true, probas_pred),
                        precision_recall_auc, decimal=3)
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1)
    # Smoke test in the case of proba having only one value
    p, r, thresholds = precision_recall_curve(y_true,
                                              np.zeros_like(probas_pred))
    assert_equal(p.size, r.size)
    assert_equal(p.size, thresholds.size + 1) 
示例9
def eval(model, test_data, test_label, thresholds=0.5, num_classes=2, pr_figure_path=None, pred_save_path=None):
    print('{0}, val mean acc:{1}'.format(model.__str__(), model.score(test_data, test_label)))
    if num_classes == 2:
        # binary classification
        label_pred_probas = model.predict_proba(test_data)[:, 1]
        label_pred = label_pred_probas > thresholds
        precision, recall, threshold = precision_recall_curve(test_label, label_pred)
        plot_pr(thresholds, precision, recall, figure_path=pr_figure_path)
    else:
        # multi
        label_pred = model.predict(test_data)
        # precision_recall_curve: multiclass format is not supported
    print(classification_report(test_label, label_pred))
    if pred_save_path:
        with open(pred_save_path, 'w', encoding='utf-8') as f:
            for i in label_pred:
                f.write(str(i) + '\n')
    return label_pred 
示例10
def compute_fdr(all_targets,all_predictions, fdr_cutoff=0.5):
    fdr_array = []
    for i in range(all_targets.shape[1]):
        try:
            precision, recall, thresholds = metrics.precision_recall_curve(all_targets[:,i], all_predictions[:,i],pos_label=1)
            fdr = 1- precision
            cutoff_index = next(i for i, x in enumerate(fdr) if x <= fdr_cutoff)
            fdr_at_cutoff = recall[cutoff_index]
            if not math.isnan(fdr_at_cutoff):
                fdr_array.append(numpy.nan_to_num(fdr_at_cutoff))
        except: 
            pass
    
    fdr_array = numpy.array(fdr_array)
    mean_fdr = numpy.mean(fdr_array)
    median_fdr = numpy.median(fdr_array)
    var_fdr = numpy.var(fdr_array)
    return mean_fdr,median_fdr,var_fdr,fdr_array 
示例11
def compute_aupr(all_targets,all_predictions):
    aupr_array = []
    for i in range(all_targets.shape[1]):
        try:
            precision, recall, thresholds = metrics.precision_recall_curve(all_targets[:,i], all_predictions[:,i], pos_label=1)
            auPR = metrics.auc(recall,precision,reorder=True)
            if not math.isnan(auPR):
                aupr_array.append(numpy.nan_to_num(auPR))
        except: 
            pass
    
    aupr_array = numpy.array(aupr_array)
    mean_aupr = numpy.mean(aupr_array)
    median_aupr = numpy.median(aupr_array)
    var_aupr = numpy.var(aupr_array)
    return mean_aupr,median_aupr,var_aupr,aupr_array 
示例12
def eval_intentPredict(intent_probs, intent_trueLabel):
    ''' Inputs:
            intent_probs: shape = (sample_nb, intent_vocab_size), predicted probs for intent prediction
            intent_trueLabel: shape = (sample_nb, intent_vocab_size), target binary matrix
        Output:
            precision, recall, f1_score, and threshold (prob >= threshold)
                        frame level accuracy
    '''
    # exclude the last element in precision and recall
    # which denotes 0 recall, and 1 precision
    precision, recall, thresholds = precision_recall_curve(
        intent_trueLabel.ravel(), intent_probs.ravel(), pos_label=1)
    f1_score = 2. * precision * recall / (precision + recall)
    f1_score[np.isnan(f1_score)] = 0.
    max_idx = np.argmax(f1_score[:-1])
    indicator = np.zeros_like(intent_probs)
    indicator[intent_probs >= thresholds[max_idx]] = 1
    accuracy_frame = calculate_FrameAccuracy(indicator, intent_trueLabel)
    return (precision[max_idx], recall[max_idx], f1_score[max_idx], accuracy_frame, thresholds[max_idx]) 
示例13
def _update_onco_metrics(self, y_true, y_pred, prob):
        self.onco_gene_pred = pd.Series(y_pred, self.y.index)
        self.onco_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.onco_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        self.onco_precision[self.num_pred] = prec[self.onco_num]
        self.onco_recall[self.num_pred] = recall[self.onco_num]
        self.onco_f1_score[self.num_pred] = fscore[self.onco_num]
        self.logger.debug('Onco Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.onco_tpr_array[self.num_pred, :] = interp(self.onco_fpr_array, fpr, tpr)
        #self.onco_mean_tpr[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        thresh = np.insert(thresh, 0, 1.0)
        self.onco_precision_array[self.num_pred, :] = interp(self.onco_recall_array, r, p)
        self.onco_threshold_array[self.num_pred, :] = interp(self.onco_recall_array, r, thresh) 
示例14
def _update_tsg_metrics(self, y_true, y_pred, prob):
        self.tsg_gene_pred = pd.Series(y_pred, self.y.index)
        self.tsg_gene_score = pd.Series(prob, self.y.index)

        # compute metrics for classification
        self.tsg_gene_count[self.num_pred] = sum(y_pred)
        prec, recall, fscore, support = metrics.precision_recall_fscore_support(y_true, y_pred)
        tsg_col = 1  # column for metrics relate to tsg
        self.tsg_precision[self.num_pred] = prec[tsg_col]
        self.tsg_recall[self.num_pred] = recall[tsg_col]
        self.tsg_f1_score[self.num_pred] = fscore[tsg_col]
        self.logger.debug('Tsg Iter %d: Precission=%s, Recall=%s, f1_score=%s' % (
                          self.num_pred + 1, str(prec), str(recall), str(fscore)))

        # compute ROC curve metrics
        fpr, tpr, thresholds = metrics.roc_curve(y_true, prob)
        self.tsg_tpr_array[self.num_pred, :] = interp(self.tsg_fpr_array, fpr, tpr)
        #self.tsg_tpr_array[0] = 0.0

        # compute Precision-Recall curve metrics
        p, r, thresh = metrics.precision_recall_curve(y_true, prob)
        p, r, thresh = p[::-1], r[::-1], thresh[::-1]  # reverse order of results
        self.tsg_precision_array[self.num_pred, :] = interp(self.tsg_recall_array, r, p) 
示例15
def plot_PR_curve(classifier):
    
    precision, recall, thresholds = precision_recall_curve(DataPrep.test_news['Label'], classifier)
    average_precision = average_precision_score(DataPrep.test_news['Label'], classifier)
    
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2,
                     color='b')
    
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('2-class Random Forest Precision-Recall curve: AP={0:0.2f}'.format(
              average_precision)) 
示例16
def prc_auc_score(self, y_total, t_total):
        # --- ignore labels if specified ---
        if self.ignore_labels:
            valid_ind = numpy.in1d(t_total, self.ignore_labels, invert=True)
            y_total = y_total[valid_ind]
            t_total = t_total[valid_ind]

        # --- set positive labels to 1, negative labels to 0 ---
        pos_indices = numpy.in1d(t_total, self.pos_labels)
        t_total = numpy.where(pos_indices, 1, 0)

        if len(numpy.unique(t_total)) != 2:
            if self.raise_value_error:
                raise ValueError("Only one class present in y_true. PRC AUC "
                                 "score is not defined in that case.")
            else:
                return numpy.nan

        precision, recall, _ = metrics.precision_recall_curve(t_total, y_total)
        prc_auc = metrics.auc(recall, precision)
        return prc_auc 
示例17
def evaluate(embs, true_edges, false_edges):
    true_list = list()
    prediction_list = list()
    for edge in true_edges:
        true_list.append(1)
        prediction_list.append(get_score(embs, edge[0], edge[1]))

    for edge in false_edges:
        true_list.append(0)
        prediction_list.append(get_score(embs, edge[0], edge[1]))

    sorted_pred = prediction_list[:]
    sorted_pred.sort()
    threshold = sorted_pred[-len(true_edges)]

    y_pred = np.zeros(len(prediction_list), dtype=np.int32)
    for i in range(len(prediction_list)):
        if prediction_list[i] >= threshold:
            y_pred[i] = 1

    y_true = np.array(true_list)
    y_scores = np.array(prediction_list)
    ps, rs, _ = precision_recall_curve(y_true, y_scores)
    return roc_auc_score(y_true, y_scores), f1_score(y_true, y_pred), auc(rs, ps) 
示例18
def evaluate(embs, true_edges, false_edges):
    true_list = list()
    prediction_list = list()
    for edge in true_edges:
        true_list.append(1)
        prediction_list.append(get_score(embs, edge[0], edge[1]))

    for edge in false_edges:
        true_list.append(0)
        prediction_list.append(get_score(embs, edge[0], edge[1]))

    sorted_pred = prediction_list[:]
    sorted_pred.sort()
    threshold = sorted_pred[-len(true_edges)]

    y_pred = np.zeros(len(prediction_list), dtype=np.int32)
    for i in range(len(prediction_list)):
        if prediction_list[i] >= threshold:
            y_pred[i] = 1

    y_true = np.array(true_list)
    y_scores = np.array(prediction_list)
    ps, rs, _ = precision_recall_curve(y_true, y_scores)
    return roc_auc_score(y_true, y_scores), f1_score(y_true, y_pred), auc(rs, ps) 
示例19
def plot(self, y: np.array, pred: np.array):
        p, r, t = precision_recall_curve(y, pred)
        fig, ax = plt.subplots(figsize=(4.2, 2.7))
        ax2 = ax.twinx()

        t = np.hstack([t, t[-1]])

        ax.plot(r, p)

        ax.set_xlabel('Recall')
        ax.set_ylabel('Precision')
        ax.set_ylim([0.0, 1.05])
        ax.set_xlim([0.0, 1.0])
        ax2.set_ylabel('Threashold')
        ax2.plot(r, t, c='red')
        return figure_to_binary(fig) 
示例20
def __call__(self, args, env):

        import numpy as np
        import matplotlib.pyplot as plt
        from sklearn.metrics import average_precision_score
        from sklearn.metrics import precision_recall_curve
        from vergeml.plots import load_labels, load_predictions

        try:
            labels = load_labels(env)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        nclasses = len(labels)
        if args['class'] not in labels:
            raise VergeMLError("Unknown class: " + args['class'])

        try:
            y_test, y_score = load_predictions(env, nclasses)
        except FileNotFoundError:
            raise VergeMLError("Can't plot PR curve - not supported by model.")

        # From:
        # https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html#sphx-glr-auto-examples-model-selection-plot-precision-recall-py

        ix = labels.index(args['class'])
        y_test = y_test[:,ix].astype(np.int)
        y_score = y_score[:,ix]

        precision, recall, _ = precision_recall_curve(y_test, y_score)
        average_precision = average_precision_score(y_test, y_score)

        plt.step(recall, precision, color='b', alpha=0.2, where='post')
        plt.fill_between(recall, precision, alpha=0.2, color='b', step='post')

        plt.xlabel('Recall ({})'.format(args['class']))
        plt.ylabel('Precision ({})'.format(args['class']))
        plt.ylim([0.0, 1.05])
        plt.xlim([0.0, 1.0])
        plt.title('Precision-Recall curve for @{0}: AP={1:0.2f}'.format(args['@AI'], average_precision))
        plt.show() 
示例21
def plot_pr_curve(y, p):
    precision, recall, _ = precision_recall_curve(y, p)

    plt.step(recall, precision, color='b', alpha=0.2, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0]) 
示例22
def get_threshold(model_id):
    trained_models = pd.read_csv(common.DEFAULT_TRAINED_MODELS_FILE, sep='\t')
    model_config = trained_models[trained_models["model_id"] == model_id]
    if model_config.empty:
        raise ValueError("Can't find the model %s in %s" %
                         (model_id, common.DEFAULT_TRAINED_MODELS_FILE))
    model_config = model_config.to_dict(orient="list")
    model_settings=eval(model_config['dataset_settings'][0])

    Y_test = np.load(common.DATASETS_DIR+'/item_factors_test_%s_%s_%s.npy' % (model_settings['fact'],model_settings['dim'],model_settings['dataset']))
    Y_pred = np.load(common.FACTORS_DIR+'/factors_%s.npy' % model_id)

    good_scores = Y_pred[Y_test==1]
    th = good_scores.mean()
    std = good_scores.std()
    print 'Mean th',th
    print 'Std',std

    p, r, thresholds = precision_recall_curve(Y_test.flatten(), Y_pred.flatten())
    f = np.nan_to_num((2 * (p*r) / (p+r)) * (p>r))
    print f
    max_f = np.argmax(f)
    fth = thresholds[max_f]
    print f[max_f],p[max_f],r[max_f]
    print 'F th %.2f' % fth
    plt.plot(r, p, 
             label='Precision-recall curve of class {0}')

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.savefig("pr_curve.png") 
示例23
def average_precision_score(y_true, y_score, average="macro",
                            sample_weight=None):
    def _binary_average_precision(y_true, y_score, sample_weight=None):
        precision, recall, thresholds = precision_recall_curve(
            y_true, y_score, sample_weight=sample_weight)
        return auc(recall, precision)

    return _average_binary_score(_binary_average_precision, y_true, y_score,
                                 average, sample_weight=sample_weight) 
示例24
def calc_metrics(testy, scores):
    precision, recall, _ = precision_recall_curve(testy, scores)
    roc_auc = roc_auc_score(testy, scores)
    prc_auc = auc(recall, precision)

    return roc_auc, prc_auc 
示例25
def prc_auc_score(y, y_pred):
  """Compute area under precision-recall curve"""
  if y.shape != y_pred.shape:
    y = _ensure_one_hot(y)
  assert y_pred.shape == y.shape
  assert y_pred.shape[1] == 2
  precision, recall, _ = precision_recall_curve(y[:, 1], y_pred[:, 1])
  return auc(recall, precision) 
示例26
def precision_recall_curve(conditions, prediction_scores, pos_label=None,
                           sample_weight=None):
    return metrics.precision_recall_curve(conditions, prediction_scores,
                                          pos_label, sample_weight) 
示例27
def compute_pr(y_test, probability_predictions):
    """
    Compute Precision-Recall, thresholds and PR AUC.

    Args:
        y_test (list) : true label values corresponding to the predictions. Also length n.
        probability_predictions (list) : predictions coming from an ML algorithm of length n.

    Returns:
        dict: 

    """
    _validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)

    # Calculate PR
    precisions, recalls, pr_thresholds = skmetrics.precision_recall_curve(y_test, probability_predictions)
    pr_auc = skmetrics.average_precision_score(y_test, probability_predictions)

    # get ideal cutoffs for suggestions (upper right or 1,1)
    pr_distances = (precisions - 1) ** 2 + (recalls - 1) ** 2

    # To prevent the case where there are two points with the same minimum distance, return only the first
    # np.where returns a tuple (we want the first element in the first array)
    pr_index = np.where(pr_distances == np.min(pr_distances))[0][0]
    best_precision = precisions[pr_index]
    best_recall = recalls[pr_index]
    ideal_pr_cutoff = pr_thresholds[pr_index]

    return {'pr_auc': pr_auc,
            'best_pr_cutoff': ideal_pr_cutoff,
            'best_precision': best_precision,
            'best_recall': best_recall,
            'precisions': precisions,
            'recalls': recalls,
            'pr_thresholds': pr_thresholds} 
示例28
def prc_auc_score(y, y_pred):
  """Compute area under precision-recall curve"""
  assert y_pred.shape == y.shape
  assert y_pred.shape[1] == 2
  precision, recall, _ = precision_recall_curve(y[:, 1], y_pred[:, 1])
  return auc(recall, precision) 
示例29
def __call__(self, y, scores):
        scores = self._preprocess_scores(scores)
        precision, recall, thresholds = metrics.precision_recall_curve(y, scores, pos_label=self.pos_label)
        ans = metrics.auc(recall, precision)
        return ans 
示例30
def AUC_PR(true_vessel_img, pred_vessel_img, save_fname):
    """
    Precision-recall curve
    """
    precision, recall, _ = precision_recall_curve(true_vessel_img.flatten(), pred_vessel_img.flatten(),  pos_label=1)
    save_obj({"precision":precision, "recall":recall}, save_fname)
    AUC_prec_rec = auc(recall, precision)
    return AUC_prec_rec