Python源码示例:sklearn.metrics.roc_curve()
示例1
def fit_model(self, data, cross_val_data, cross_val_labels):
eval_metrics = []
for i in range(self.n_ensemble):
train_sm = np.concatenate(cross_val_data[:i] +
cross_val_data[(i + 1):])
test_sm = cross_val_data[i]
train_labels = np.concatenate(cross_val_labels[:i] +
cross_val_labels[(i + 1):])
test_labels = cross_val_labels[i]
fp_train = get_fp(train_sm)
fp_test = get_fp(test_sm)
self.model[i].fit(fp_train, train_labels.ravel())
predicted = self.model[i].predict(fp_test)
if self.model_type == 'classifier':
fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
eval_metrics.append(metrics.auc(fpr, tpr))
metrics_type = 'AUC'
elif self.model_type == 'regressor':
r2 = metrics.r2_score(test_labels, predicted)
eval_metrics.append(r2)
metrics_type = 'R^2 score'
return eval_metrics, metrics_type
示例2
def plot_roc_curve(y_true, y_score, size=None):
"""plot_roc_curve."""
false_positive_rate, true_positive_rate, thresholds = roc_curve(
y_true, y_score)
if size is not None:
plt.figure(figsize=(size, size))
plt.axis('equal')
plt.plot(false_positive_rate, true_positive_rate, lw=2, color='navy')
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.ylim([-0.05, 1.05])
plt.xlim([-0.05, 1.05])
plt.grid()
plt.title('Receiver operating characteristic AUC={0:0.2f}'.format(
roc_auc_score(y_true, y_score)))
示例3
def compute_roc(y_true, y_pred, plot=False):
"""
TODO
:param y_true: ground truth
:param y_pred: predictions
:param plot:
:return:
"""
fpr, tpr, _ = roc_curve(y_true, y_pred)
auc_score = auc(fpr, tpr)
if plot:
plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, color='blue',
label='ROC (AUC = %0.4f)' % auc_score)
plt.legend(loc='lower right')
plt.title("ROC Curve")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.show()
return fpr, tpr, auc_score
示例4
def compute_roc_rfeinman(probs_neg, probs_pos, plot=False):
"""
TODO
:param probs_neg:
:param probs_pos:
:param plot:
:return:
"""
probs = np.concatenate((probs_neg, probs_pos))
labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
fpr, tpr, _ = roc_curve(labels, probs)
auc_score = auc(fpr, tpr)
if plot:
plt.figure(figsize=(7, 6))
plt.plot(fpr, tpr, color='blue',
label='ROC (AUC = %0.4f)' % auc_score)
plt.legend(loc='lower right')
plt.title("ROC Curve")
plt.xlabel("FPR")
plt.ylabel("TPR")
plt.show()
return fpr, tpr, auc_score
示例5
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
# Remove excluded candidates
FROCGTList_local = []
FROCProbList_local = []
for i in range(len(excludeList)):
if excludeList[i] == False:
FROCGTList_local.append(FROCGTList[i])
FROCProbList_local.append(FROCProbList[i])
numberOfDetectedLesions = sum(FROCGTList_local)
totalNumberOfLesions = sum(FROCGTList)
totalNumberOfCandidates = len(FROCProbList_local)
fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
print "WARNING, this system has no false positives.."
fps = np.zeros(len(fpr))
else:
fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
return fps, sens, thresholds
示例6
def print_roc(self, y_true, y_scores, filename):
'''
Prints the ROC for this model.
'''
fpr, tpr, thresholds = metrics.roc_curve(y_true, y_scores)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', label='ROC curve (area = %0.2f)' % self.roc_auc)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig(filename)
plt.close()
示例7
def compute_auc(y_true, y_pred, label_index):
"""Compute Area Under the Curve (AUC) metric.
Args:
y_true: true class
y_pred: probabilities for a class
label_index:
label_index == 1 => laughter (class1) vs. others (class0)
label_index == 2 => filler (class1) vs. others (class0)
Returns:
auc_val: AUC metric accuracy
"""
for i in range(y_true.shape[0]):
y_true[i] = 0 if y_true[i] != label_index else 1
y_true = np.reshape(y_true, (-1,))
y_pred = np.reshape(y_pred[:, label_index], (-1,))
try:
fpr, tpr, _ = roc_curve(y_true, y_pred, pos_label=1)
except UndefinedMetricWarning:
pass
auc_val = auc(fpr, tpr)
return auc_val
示例8
def roc(self, data, model, tt, name):
scores = self.get_predictions_loss(data, model, tt)[0]
labels = [prot["label"][:, 2] for prot in data[tt]]
fprs = []
tprs = []
roc_aucs = []
for s, l in zip(scores, labels):
fpr, tpr, _ = roc_curve(l, s)
roc_auc = auc(fpr, tpr)
fprs.append(fpr)
tprs.append(tpr)
roc_aucs.append(roc_auc)
auc_prot_med = np.median(roc_aucs)
auc_prot_ave = np.mean(roc_aucs)
printt("{} average protein auc: {:0.3f}".format(name, auc_prot_ave))
printt("{} median protein auc: {:0.3f}".format(name, auc_prot_med))
return ["auc_prot_ave_" + tt, "auc_prot_med_" + tt], [auc_prot_ave, auc_prot_med]
示例9
def get_all_metrics(model, eval_data, eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
score = model.evaluate(eval_data, eval_labels, verbose=0)
print("Test accuracy: " + str(score[1]))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, score[1], precision, recall, f1, average_precision, fpr, tpr
示例10
def get_all_metrics_(eval_labels, pred_labels):
fpr, tpr, thresholds_keras = roc_curve(eval_labels, pred_labels)
auc_ = auc(fpr, tpr)
print("auc_keras:" + str(auc_))
precision = precision_score(eval_labels, pred_labels)
print('Precision score: {0:0.2f}'.format(precision))
recall = recall_score(eval_labels, pred_labels)
print('Recall score: {0:0.2f}'.format(recall))
f1 = f1_score(eval_labels, pred_labels)
print('F1 score: {0:0.2f}'.format(f1))
average_precision = average_precision_score(eval_labels, pred_labels)
print('Average precision-recall score: {0:0.2f}'.format(average_precision))
return auc_, precision, recall, f1, average_precision, fpr, tpr
示例11
def accuracy(y_true, y_pred):
# 计算混淆矩阵
y = np.zeros(len(y_true))
y_ = np.zeros(len(y_true))
for i in range(len(y_true)):
y[i] = np.argmax(y_true[i,:])
y_[i] = np.argmax(y_pred[i,:])
cnf_mat = confusion_matrix(y, y_)
# Acc = 1.0*(cnf_mat[1][1]+cnf_mat[0][0])/len(y_true)
# Sens = 1.0*cnf_mat[1][1]/(cnf_mat[1][1]+cnf_mat[1][0])
# Spec = 1.0*cnf_mat[0][0]/(cnf_mat[0][0]+cnf_mat[0][1])
# # 绘制ROC曲线
# fpr, tpr, thresholds = roc_curve(y_true[:,0], y_pred[:,0])
# Auc = auc(fpr, tpr)
# 计算多分类评价值
Sens = recall_score(y, y_, average='macro')
Prec = precision_score(y, y_, average='macro')
F1 = f1_score(y, y_, average='weighted')
Support = precision_recall_fscore_support(y, y_, beta=0.5, average=None)
return Sens, Prec, F1, cnf_mat
示例12
def compute_eer(loss_file,reverse,smoothing):
if not os.path.isdir(loss_file):
loss_file_list = [loss_file]
else:
loss_file_list = os.listdir(loss_file)
loss_file_list = [os.path.join(loss_file, sub_loss_file) for sub_loss_file in loss_file_list]
optimal_results = RecordResult(auc=np.inf)
for sub_loss_file in loss_file_list:
dataset, scores, labels = get_scores_labels(sub_loss_file,reverse,smoothing)
fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=0)
eer = cal_eer(fpr, tpr)
results = RecordResult(fpr, tpr, eer, dataset, sub_loss_file)
if optimal_results > results:
optimal_results = results
if os.path.isdir(loss_file):
print(results)
print('##### optimal result and model EER = {}'.format(optimal_results))
return optimal_results
示例13
def test_roc_returns_consistency():
# Test whether the returned threshold matches up with tpr
# make small toy dataset
y_true, _, probas_pred = make_prediction(binary=True)
fpr, tpr, thresholds = roc_curve(y_true, probas_pred)
# use the given thresholds to determine the tpr
tpr_correct = []
for t in thresholds:
tp = np.sum((probas_pred >= t) & y_true)
p = np.sum(y_true)
tpr_correct.append(1.0 * tp / p)
# compare tpr and tpr_correct to see if the thresholds' order was correct
assert_array_almost_equal(tpr, tpr_correct, decimal=2)
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
示例14
def test_roc_curve_one_label():
y_true = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
y_pred = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]
# assert there are warnings
w = UndefinedMetricWarning
fpr, tpr, thresholds = assert_warns(w, roc_curve, y_true, y_pred)
# all true labels, all fpr should be nan
assert_array_equal(fpr, np.full(len(thresholds), np.nan))
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
# assert there are warnings
fpr, tpr, thresholds = assert_warns(w, roc_curve,
[1 - x for x in y_true],
y_pred)
# all negative labels, all tpr should be nan
assert_array_equal(tpr, np.full(len(thresholds), np.nan))
assert_equal(fpr.shape, tpr.shape)
assert_equal(fpr.shape, thresholds.shape)
示例15
def computeFROC(FROCGTList, FROCProbList, totalNumberOfImages, excludeList):
# Remove excluded candidates
FROCGTList_local = []
FROCProbList_local = []
for i in range(len(excludeList)):
if excludeList[i] == False:
FROCGTList_local.append(FROCGTList[i])
FROCProbList_local.append(FROCProbList[i])
numberOfDetectedLesions = sum(FROCGTList_local)
totalNumberOfLesions = sum(FROCGTList)
totalNumberOfCandidates = len(FROCProbList_local)
fpr, tpr, thresholds = skl_metrics.roc_curve(FROCGTList_local, FROCProbList_local)
if sum(FROCGTList) == len(FROCGTList): # Handle border case when there are no false positives and ROC analysis give nan values.
print "WARNING, this system has no false positives.."
fps = np.zeros(len(fpr))
else:
fps = fpr * (totalNumberOfCandidates - numberOfDetectedLesions) / totalNumberOfImages
sens = (tpr * numberOfDetectedLesions) / totalNumberOfLesions
return fps, sens, thresholds
示例16
def test_auc_gold_labels_behaviour(self, device: str):
# Check that it works with different pos_label
auc = Auc(positive_label=4)
predictions = torch.randn(8, device=device)
labels = torch.randint(3, 5, (8,), dtype=torch.long, device=device)
# We make sure that the positive label is always present.
labels[0] = 4
auc(predictions, labels)
computed_auc_value = auc.get_metric(reset=True)
false_positive_rates, true_positive_rates, _ = metrics.roc_curve(
labels.cpu().numpy(), predictions.cpu().numpy(), pos_label=4
)
real_auc_value = metrics.auc(false_positive_rates, true_positive_rates)
assert_allclose(real_auc_value, computed_auc_value)
# Check that it errs on getting more than 2 labels.
with pytest.raises(ConfigurationError) as _:
labels = torch.tensor([3, 4, 5, 6, 7, 8, 9, 10], device=device)
auc(predictions, labels)
示例17
def roc(self, labels, pred_scores):
if self.eval_type == consts.BINARY:
fpr, tpr, thresholds = roc_curve(np.array(labels), np.array(pred_scores), drop_intermediate=1)
fpr, tpr, thresholds = list(map(float, fpr)), list(map(float, tpr)), list(map(float, thresholds))
filt_thresholds, cuts = self.__filt_threshold(thresholds=thresholds, step=0.01)
new_thresholds = []
new_tpr = []
new_fpr = []
for threshold in filt_thresholds:
index = thresholds.index(threshold)
new_tpr.append(tpr[index])
new_fpr.append(fpr[index])
new_thresholds.append(threshold)
fpr = new_fpr
tpr = new_tpr
thresholds = new_thresholds
return fpr, tpr, thresholds, cuts
else:
logging.warning("roc_curve is just suppose Binary Classification! return None as results")
fpr, tpr, thresholds, cuts = None, None, None, None
return fpr, tpr, thresholds, cuts
示例18
def add_graph_for_best(self, func_name):
"""Adds a graph to report that gives performance of the best Trial
Parameters
----------
func_name : str
Name of a function that can be run on a Trial that returns a
figure. For example 'roc_curve' or 'prec_recall_curve'
"""
if self.__exp is None:
raise ReportError('No experiment provided for this report. '
'Cannot add graph for best trial.')
best_trial = max(
self.__exp.trials,
key=lambda trial: trial.average_score())
fig = getattr(best_trial, func_name)()
self.add_fig(fig)
self.add_text('Best trial is trial {} ({})]'.format(
self.__back_indices[best_trial],
best_trial))
plt.close()
示例19
def plot_roc_curve(y, p):
fpr, tpr, _ = roc_curve(y, p)
plt.plot(fpr, tpr)
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
示例20
def print_metrics(test_word_arrayLabel, result_type):
true_positives = 0
false_negatives = 0
false_positives = 0
true_negatives = 0
num_examples = len(test_word_arrayLabel)
for example_num in range(0, num_examples):
predicted_label = result_type[example_num]
if test_word_arrayLabel[example_num] == 1:
if predicted_label == 1:
true_positives += 1
elif predicted_label == 2:
false_negatives += 1
elif test_word_arrayLabel[example_num] == 2:
if predicted_label == 1:
false_positives += 1
elif predicted_label == 2:
true_negatives += 1
TPR=true_positives/(true_positives+false_negatives)
FPR=false_positives/(true_negatives+false_positives)
return TPR,FPR
# def plotROCCurve(ROC_value):
# fpr = dict()
# tpr = dict()
# roc_auc = dict()
# for i in range(2):
# fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
# roc_auc[i] = auc(fpr[i], tpr[i])
#
# # Compute micro-average ROC curve and ROC area
# fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
# roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
示例21
def compute_eer(y_true, y_pred):
fpr, tpr, _ = roc_curve(y_true, y_pred, pos_label=1)
eer = brentq(lambda x : 1. - x - interp1d(fpr, tpr)(x), 0., 1)
return 100. * eer
示例22
def roc(labels, scores, saveto=None):
"""Compute ROC curve and ROC area for each class"""
fpr = dict()
tpr = dict()
roc_auc = dict()
labels = labels.cpu()
scores = scores.cpu()
# True/False Positive Rates.
fpr, tpr, _ = roc_curve(labels, scores)
roc_auc = auc(fpr, tpr)
# Equal Error Rate
eer = brentq(lambda x: 1. - x - interp1d(fpr, tpr)(x), 0., 1.)
if saveto:
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='(AUC = %0.2f, EER = %0.2f)' % (roc_auc, eer))
plt.plot([eer], [1-eer], marker='o', markersize=5, color="navy")
plt.plot([0, 1], [1, 0], color='navy', lw=1, linestyle=':')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic')
plt.legend(loc="lower right")
plt.savefig(os.path.join(saveto, "ROC.pdf"))
plt.close()
return roc_auc
示例23
def compute_roc(probs_neg, probs_pos):
probs = np.concatenate((probs_neg, probs_pos))
labels = np.concatenate((np.zeros_like(probs_neg), np.ones_like(probs_pos)))
fpr, tpr, _ = roc_curve(labels, probs)
auc_score = auc(fpr, tpr)
return fpr, tpr, auc_score
示例24
def fit_model(self, data):
eval_metrics = []
if self.feature_type == 'fingerprints':
fps = get_fp(data.smiles)
elif self.feature_type == 'descriptors':
fps, _, _ = get_desc(data.smiles, self.calc)
if self.model_type == 'classifier':
cross_val_data, cross_val_labels = \
cross_validation_split(fps, data.binary_labels)
elif self.model_type == 'regressor':
cross_val_data, cross_val_labels = \
cross_validation_split(fps, data.property)
for i in range(self.n_ensemble):
train_sm = np.concatenate(cross_val_data[:i] + cross_val_data[(i + 1):])
test_sm = cross_val_data[i]
train_labels = np.concatenate(cross_val_labels[:i] +
cross_val_labels[(i + 1):])
test_labels = cross_val_labels[i]
if self.feature_type == 'descriptors':
train_sm, desc_mean = normalize_desc(train_sm)
self.desc_mean[i] = desc_mean
test_sm, _ = normalize_desc(test_sm, desc_mean)
self.model[i].fit(train_sm, train_labels.ravel())
predicted = self.model[i].predict(test_sm)
if self.model_type == 'classifier':
fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
eval_metrics.append(metrics.auc(fpr, tpr))
metrics_type = 'AUC'
elif self.model_type == 'regressor':
r2 = metrics.r2_score(test_labels, predicted)
eval_metrics.append(r2)
metrics_type = 'R^2 score'
return eval_metrics, metrics_type
示例25
def plot_roc(true,pred):
'''
plot the ROC curve
'''
fpr, tpr, thresholds = metrics.roc_curve(true, pred, pos_label=1)
plt.plot(fpr, tpr,c = "blue",markersize=2,label='edge2vec')
plt.show()
示例26
def get_metrics(predictions,targets):
# Calculate metrics
# Accuarcy
acc = np.mean(np.equal(np.argmax(predictions,1),np.argmax(targets,1)))
# Confusion matrix
conf = confusion_matrix(np.argmax(targets,1),np.argmax(predictions,1))
# Class weighted accuracy
wacc = conf.diagonal()/conf.sum(axis=1)
# Auc
fpr = {}
tpr = {}
roc_auc = np.zeros([numClasses])
for i in range(numClasses):
fpr[i], tpr[i], _ = roc_curve(targets[:, i], predictions[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
# F1 Score
f1 = f1_score(np.argmax(predictions,1),np.argmax(targets,1),average='weighted')
# Print
print("Accuracy:",acc)
print("F1-Score:",f1)
print("WACC:",wacc)
print("Mean WACC:",np.mean(wacc))
print("AUC:",roc_auc)
print("Mean Auc:",np.mean(roc_auc))
return acc, f1, wacc, roc_auc
# If its actual evaluation, evaluate each CV indipendently, show results both for each CV set and all of them together
示例27
def evalEnsemble(currComb,eval_auc=False):
currWacc = np.zeros([cvSize])
currAUC = np.zeros([cvSize])
for i in range(cvSize):
if evaluate_method == 'vote':
pred_argmax = np.argmax(accum_preds[i][currComb,:,:],2)
pred_eval = np.zeros([pred_argmax.shape[1],numClasses])
for j in range(pred_eval.shape[0]):
pred_eval[j,:] = np.bincount(pred_argmax[:,j],minlength=numClasses)
else:
pred_eval = np.mean(accum_preds[i][currComb,:,:],0)
# Confusion matrix
conf = confusion_matrix(np.argmax(final_targets[i],1),np.argmax(pred_eval,1))
# Class weighted accuracy
currWacc[i] = np.mean(conf.diagonal()/conf.sum(axis=1))
if eval_auc:
currAUC_ = np.zeros([numClasses])
for j in range(numClasses):
fpr, tpr, _ = roc_curve(final_targets[i][:,j], pred_eval[:, j])
currAUC_[j] = auc(fpr, tpr)
currAUC[i] = np.mean(currAUC_)
if eval_auc:
currAUCstd = np.std(currAUC)
currAUC = np.mean(currAUC)
else:
currAUCstd = currAUC
currWaccStd = np.std(currWacc)
currWacc = np.mean(currWacc)
if eval_auc:
return currWacc, currWaccStd, currAUC, currAUCstd
else:
return currWacc
示例28
def roc_curve(conditions, prediction_scores, pos_label=None,
sample_weight=None):
return metrics.roc_curve(conditions, prediction_scores, pos_label,
sample_weight)
示例29
def fold_roc(X, y, folds=8, random_state=44):
"""Compute ROC for a single value, sans model."""
aurocs = []
fpr_folds = []
tpr_folds = []
fpr_mean = np.linspace(0, 1, 256)
tpr_mean = []
# preds_full = np.zeros(y.shape)
kf = KFold(n_splits=folds, shuffle=True, random_state=random_state)
for train_index, test_index in kf.split(X):
# predict test set (as is)
preds = X[test_index,:]
# save
# preds_full[test_index] = preds.squeeze()
# compute ROC curve
fpr, tpr, _ = roc_curve(y[test_index], preds)
fpr_folds.append(fpr)
tpr_folds.append(tpr)
interp_tpr = np.interp(fpr_mean, fpr, tpr)
interp_tpr[0] = 0.0
tpr_mean.append(interp_tpr)
# compute AUROC
aurocs.append(roc_auc_score(y[test_index], preds))
# fpr_full, tpr_full, _ = roc_curve(y, preds_full)
tpr_mean = np.array(tpr_mean).mean(axis=0)
return np.array(aurocs), np.array(fpr_folds), np.array(tpr_folds), fpr_mean, tpr_mean
示例30
def compute_roc(y_test, probability_predictions):
"""
Compute TPRs, FPRs, best cutoff, ROC auc, and raw thresholds.
Args:
y_test (list) : true label values corresponding to the predictions. Also length n.
probability_predictions (list) : predictions coming from an ML algorithm of length n.
Returns:
dict:
"""
_validate_predictions_and_labels_are_equal_length(probability_predictions, y_test)
# Calculate ROC
false_positive_rates, true_positive_rates, roc_thresholds = skmetrics.roc_curve(y_test, probability_predictions)
roc_auc = skmetrics.roc_auc_score(y_test, probability_predictions)
# get ROC ideal cutoffs (upper left, or 0,1)
roc_distances = (false_positive_rates - 0) ** 2 + (true_positive_rates - 1) ** 2
# To prevent the case where there are two points with the same minimum distance, return only the first
# np.where returns a tuple (we want the first element in the first array)
roc_index = np.where(roc_distances == np.min(roc_distances))[0][0]
best_tpr = true_positive_rates[roc_index]
best_fpr = false_positive_rates[roc_index]
ideal_roc_cutoff = roc_thresholds[roc_index]
return {'roc_auc': roc_auc,
'best_roc_cutoff': ideal_roc_cutoff,
'best_true_positive_rate': best_tpr,
'best_false_positive_rate': best_fpr,
'true_positive_rates': true_positive_rates,
'false_positive_rates': false_positive_rates,
'roc_thresholds': roc_thresholds}