Python源码示例:sklearn.preprocessing.label_binarize()
示例1
def test_precision_recall_f_ignored_labels():
# Test a subset of labels may be requested for PRF
y_true = [1, 1, 2, 3]
y_pred = [1, 3, 3, 3]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
recall_all = partial(recall_score, y_true, y_pred, labels=None)
assert_array_almost_equal([.5, 1.], recall_13(average=None))
assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
assert_almost_equal((.5 * 2 + 1. * 1) / 3,
recall_13(average='weighted'))
assert_almost_equal(2. / 3, recall_13(average='micro'))
# ensure the above were meaningful tests:
for average in ['macro', 'weighted', 'micro']:
assert_not_equal(recall_13(average=average),
recall_all(average=average))
示例2
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
if sample_weight is not None:
sample_weight = sample_weight.ravel()
enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
cm_weights = sample_weight if sample_weight is not None else None
# multiclass
if enc_predicted.shape[1] > 1:
enc_predicted = enc_predicted.ravel()
enc_actual = label_binarize(enc_actual, labels).ravel()
cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
assert enc_predicted.shape == enc_actual.shape
assert cm_weights is None or enc_predicted.shape == cm_weights.shape
cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
cms = cms.loc[
cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above
cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
return cms['metric'].mean() # in case of ties
示例3
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
if sample_weight is not None:
sample_weight = sample_weight.ravel()
enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
cm_weights = sample_weight if sample_weight is not None else None
# multiclass
if enc_predicted.shape[1] > 1:
enc_predicted = enc_predicted.ravel()
enc_actual = label_binarize(enc_actual, labels).ravel()
cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
assert enc_predicted.shape == enc_actual.shape
assert cm_weights is None or enc_predicted.shape == cm_weights.shape
cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
cms = cms.loc[
cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above
cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
return cms['metric'].mean() # in case of ties
示例4
def score(self,
actual: np.array,
predicted: np.array,
sample_weight: typing.Optional[np.array] = None,
labels: typing.Optional[np.array] = None,
**kwargs) -> float:
if sample_weight is not None:
sample_weight = sample_weight.ravel()
enc_actual, enc_predicted, labels = prep_actual_predicted(actual, predicted, labels)
cm_weights = sample_weight if sample_weight is not None else None
# multiclass
if enc_predicted.shape[1] > 1:
enc_predicted = enc_predicted.ravel()
enc_actual = label_binarize(enc_actual, labels).ravel()
cm_weights = np.repeat(cm_weights, predicted.shape[1]).ravel() if cm_weights is not None else None
assert enc_predicted.shape == enc_actual.shape
assert cm_weights is None or enc_predicted.shape == cm_weights.shape
cms = daicx.confusion_matrices(enc_actual.ravel(), enc_predicted.ravel(), sample_weight=cm_weights)
cms = cms.loc[
cms[[self.__class__._threshold_optimizer]].idxmax()] # get row(s) for optimal metric defined above
cms['metric'] = cms[['tp', 'fp', 'tn', 'fn']].apply(lambda x: self.protected_metric(*x), axis=1, raw=True)
return cms['metric'].mean() # in case of ties
示例5
def weight_dict_fc(trainLabel, para):
train_labels = []
for i in range(len(trainLabel)):
[train_labels.append(j) for j in trainLabel[i]]
from sklearn.preprocessing import label_binarize
y_total_40=label_binarize(train_labels, classes=[i for i in range(40)])
class_distribution_40_class=np.sum(y_total_40,axis=0)
class_distribution_40_class=[float(i) for i in class_distribution_40_class]
class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
inverse_dist=1/class_distribution_40_class
norm_inv_dist=inverse_dist/np.sum(inverse_dist)
weights=norm_inv_dist*para.weight_scaler+1
weight_dict = dict()
for classID, value in enumerate(weights):
weight_dict.update({classID: value})
return weight_dict
示例6
def weight_dict_fc(trainLabel, para):
train_labels = []
for i in range(len(trainLabel)):
[train_labels.append(j) for j in trainLabel[i]]
class_number = len(np.unique(train_labels))
from sklearn.preprocessing import label_binarize
y_total_40=label_binarize(train_labels, classes=[i for i in range(para.outputClassN)])
class_distribution_40_class=np.sum(y_total_40,axis=0)
class_distribution_40_class=[float(i) for i in class_distribution_40_class]
class_distribution_40_class=class_distribution_40_class/np.sum(class_distribution_40_class)
inverse_dist=1/class_distribution_40_class
norm_inv_dist=inverse_dist/np.sum(inverse_dist)
weights=norm_inv_dist*para.weight_scaler+1
weight_dict = dict()
for classID, value in enumerate(weights):
weight_dict.update({classID: value})
return weight_dict
示例7
def __init__(self, feats_path, class_nums, n_classes, n_frames_per_video, batch_size, n_feat_maps, feat_map_side_dim, n_threads=10):
random.seed(101)
np.random.seed(101)
self.__feats_pathes = feats_path
self.__class_nums = class_nums
self.__n_frames_per_video = n_frames_per_video
self.__n_feat_maps = n_feat_maps
self.__feat_map_side_dim = feat_map_side_dim
self.__batch_size = batch_size
# binarize the labels
classes = range(1, n_classes + 1)
self.__y = label_binarize(self.__class_nums, classes)
self.__is_busy = False
self.__batch_features = None
self.__batch_y = None
self.__n_threads_in_pool = n_threads
self.__pool = Pool(self.__n_threads_in_pool)
示例8
def cross_val_roc_auc_score(self, cv=10, **kwargs):
"""
被装饰器entry_wrapper(support=(EMLFitType.E_FIT_CLF,))装饰,
即支持有监督学习分类,使用cross_val_score对数据进行roc_auc度量,如果数据的y的
label标签 > 2,通过label_binarize将label标签进行二值化处理,
依次计算二值化的列的roc_auc,结果返回score最好的数据度量
:param cv: 透传cross_val_score的参数,默认10
:param kwargs: 外部可以传递x, y, 通过
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
确定传递self._do_cross_val_score中参数x,y,
以及装饰器使用的fiter_type,eg:ttn_abu.cross_val_roc_auc_score(fiter_type=ml.EMLFitType.E_FIT_REG)
:return: cross_val_score返回的score序列,
eg: array([ 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 1. , 0.9 , 0.95, 1. ])
"""
x = kwargs.pop('x', self.x)
y = kwargs.pop('y', self.y)
return self._do_cross_val_score(x, y, cv, _EMLScoreType.E_SCORE_ROC_AUC.value)
示例9
def test_precision_recall_f_ignored_labels():
# Test a subset of labels may be requested for PRF
y_true = [1, 1, 2, 3]
y_pred = [1, 3, 3, 3]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
recall_13 = partial(recall_score, y_true, y_pred, labels=[1, 3])
recall_all = partial(recall_score, y_true, y_pred, labels=None)
assert_array_almost_equal([.5, 1.], recall_13(average=None))
assert_almost_equal((.5 + 1.) / 2, recall_13(average='macro'))
assert_almost_equal((.5 * 2 + 1. * 1) / 3,
recall_13(average='weighted'))
assert_almost_equal(2. / 3, recall_13(average='micro'))
# ensure the above were meaningful tests:
for average in ['macro', 'weighted', 'micro']:
assert_not_equal(recall_13(average=average),
recall_all(average=average))
示例10
def test_matthews_corrcoef():
rng = np.random.RandomState(0)
y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]
# corrcoef of same vectors must be 1
assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
# corrcoef, when the two vectors are opposites of each other, should be -1
y_true_inv = ["b" if i == "a" else "a" for i in y_true]
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)
y_true_inv2 = label_binarize(y_true, ["a", "b"])
y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)
# For the zero vector case, the corrcoef cannot be calculated and should
# result in a RuntimeWarning
mcc = assert_warns_div0(matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])
# But will output 0
assert_almost_equal(mcc, 0.)
# And also for any other vector with 0 variance
mcc = assert_warns_div0(matthews_corrcoef, y_true, ['a'] * len(y_true))
# But will output 0
assert_almost_equal(mcc, 0.)
# These two vectors have 0 correlation and hence mcc should be 0
y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)
# Check that sample weight is able to selectively exclude
mask = [1] * 10 + [0] * 10
# Now the first half of the vector elements are alone given a weight of 1
# and hence the mcc will not be a perfect 0 as in the previous case
assert_raises(AssertionError, assert_almost_equal,
matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
示例11
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
test_loss = []
test_acc = []
test_predict = []
for i in range(len(inputCoor)):
xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
graphTest = graphTest.tocsr()
labelBinarize = label_binarize(labelTest, classes=[i for i in range(para.outputClassN)])
test_batch_size = para.testBatchSize
for testBatchID in range(len(labelTest) / test_batch_size):
start = testBatchID * test_batch_size
end = start + test_batch_size
batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
batchWeight = uniform_weight(batchLabel)
batchGraph = batchGraph.todense()
feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0}
predict, loss_test, acc_test = sess.run(
[trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
test_loss.append(loss_test)
test_acc.append(acc_test)
test_predict.append(predict)
test_average_loss = np.mean(test_loss)
test_average_acc = np.mean(test_acc)
return test_average_loss, test_average_acc, test_predict
示例12
def _compute_roc_stats(y_test, y_test_probas, num_class):
"""Compute ROC AUC statistics and visualize ROC curves.
Arguments:
y_test: [int]
list of test class labels as integer indices
y_test_probas: np.ndarray, float
array of predicted probabilities with shape
(num_sample, num_class)
num_class: int
number of classes
Returns:
roc_auc_dict: {int: float}
dictionary mapping classes to ROC AUC scores
fpr_dict: {string: np.ndarray}
dictionary mapping names of classes or an averaging method to
arrays of increasing false positive rates
tpr_dict: {string: float}
dictionary mapping names of classes or an averaging method to
arrays of increasing true positive rates
"""
y_test = label_binarize(y_test, classes=range(0, num_class))
fpr_dict, tpr_dict, roc_auc_dict = {}, {}, {}
for i in range(num_class):
fpr_dict[i], tpr_dict[i], _ = roc_curve(
y_test[:, i], y_test_probas[:, i])
roc_auc_dict[i] = auc(fpr_dict[i], tpr_dict[i])
# Compute micro-average ROC curve and ROC area
fpr_dict["micro"], tpr_dict["micro"], _ = roc_curve(
y_test.ravel(), y_test_probas.ravel())
roc_auc_dict["micro"] = auc(fpr_dict["micro"], tpr_dict["micro"])
return roc_auc_dict, fpr_dict, tpr_dict
示例13
def roc_graph_example():
"""
Plot an example ROC graph of an SVM model predictions over the Iris
dataset.
Based on sklearn examples (as was seen on April 2018):
http://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
"""
# Load data
iris = datasets.load_iris()
X = iris.data
y = label_binarize(iris.target, classes=[0, 1, 2])
# Add noisy features
random_state = np.random.RandomState(4)
n_samples, n_features = X.shape
X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
# Train a model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0)
classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True, random_state=0))
# Predict
y_score = classifier.fit(X_train, y_train).predict_proba(X_test)
# Plot ROC graphs
return roc_graph(y_test, y_score, class_names=iris.target_names)
示例14
def average_precision(prob_np, target_np):
num_class = prob_np.shape[1]
label = label_binarize(target_np, classes=list(range(num_class)))
with np.errstate(divide='ignore', invalid='ignore'):
return average_precision_score(label, prob_np, None)
示例15
def _marg_rounded(self, x, y):
y_node = y.nodes
y_link = y.links
Y_node = label_binarize(y_node, self.prop_encoder_.classes_)
Y_link = label_binarize(y_link, self.link_encoder_.classes_)
# XXX can this be avoided?
Y_node, Y_link = map(_binary_2d, (Y_node, Y_link))
src_type = Y_node[x.link_to_prop[:, 0]]
trg_type = Y_node[x.link_to_prop[:, 1]]
if self.compat_features:
pw = np.einsum('...j,...k,...l->...jkl',
src_type, trg_type, Y_link)
compat = np.tensordot(x.X_compat.T, pw, axes=[1, 0])
else:
# equivalent to compat_features == np.ones(n_links)
compat = np.einsum('ij,ik,il->jkl', src_type, trg_type, Y_link)
second_order = []
if self.coparents_ or self.grandparents_ or self.siblings_:
link = {(a, b): k for k, (a, b) in enumerate(x.link_to_prop)}
if self.coparents_:
second_order.extend(y_link[link[a, b]] & y_link[link[c, b]]
for a, b, c in x.second_order)
if self.grandparents_:
second_order.extend(y_link[link[a, b]] & y_link[link[b, c]]
for a, b, c in x.second_order)
if self.siblings_:
second_order.extend(y_link[link[b, a]] & y_link[link[b, c]]
for a, b, c in x.second_order)
second_order = np.array(second_order)
return Y_node, Y_link, compat, second_order
示例16
def __call__(self, y_true, y_pred, **kwargs):
"""
Compute auroc
Parameters
----------
y_true: np.ndarray
ground truth data with shape (N)
y_pred: np.ndarray
predictions of network in numpy format with shape (N, nclasses)
kwargs:
variable number of keyword arguments passed to roc_auc_score
Returns
-------
float
computes auc score
Raises
------
ValueError
if two classes are given and the predictions contain more than two
classes
"""
# binary classification
if len(self.classes) == 2:
# single output unit (e.g. sigmoid)
if len(y_pred.shape) == 1 or y_pred.shape[2] == 1:
return roc_auc_score(y_true, y_pred, **kwargs)
# output of two units (e.g. softmax)
elif y_pred.shape[2] == 2:
return roc_auc_score(y_true, y_pred[:, 1], **kwargs)
else:
raise ValueError("Can not compute auroc metric for binary "
"classes with {} predicted "
"classes.".format(y_pred.shape[2]))
# classification with multiple classes
if len(self.classes) > 2:
y_true_bin = label_binarize(y_true, self.classes)
return roc_auc_score(y_true_bin, y_pred, **kwargs, **self.kwargs)
示例17
def make_roc(gt,cpl,cl):
from sklearn.preprocessing import label_binarize
y_predict = label_binarize(gt, classes=[0, 1, 2, 3, 4, 5])
print('c=',cl)
y = label_binarize(cl, classes=[0, 1, 2, 3, 4, 5])
n_classesi = y.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
from sklearn.metrics import roc_curve, auc
for i in range(n_classesi):
fpr[i], tpr[i], thre = roc_curve(y_predict[:, i], cpl[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
print('state=, {}, auc=,{}'.format(i,roc_auc[i]))
示例18
def performance_report(labels, predictions):
from sklearn.preprocessing import label_binarize
from sklearn.metrics import precision_recall_fscore_support
classes = list(range(labels.shape[1]))
roc_aucs, pr_aucs = [], []
if len(classes) == 2:
roc_aucs = [auROC(labels[:, 0], predictions[:, 0])[2]] * 2
pr_aucs = [auPR(labels[:, 0], predictions[:, 0])[2]] * 2
labels = label_binarize(np.argmax(labels, axis = 1), classes = classes)
else:
for x in classes:
roc_aucs.append(auROC(labels[:, x], predictions[:, x])[2])
pr_aucs.append(auPR(labels[:, x], predictions[:, x])[2])
if not np.isclose(np.sum(predictions, axis=1), 1).all():
# multi-label classification
y_pred = predictions > 0.5
y_pred.dtype = np.uint8
else:
y_pred = label_binarize(np.argmax(predictions, axis = 1), classes = classes)
prec_recall_f1_support = precision_recall_fscore_support(labels, y_pred)
report = np.empty((len(classes), 6))
for x in classes:
report[x,:] = [prec_recall_f1_support[0][x], prec_recall_f1_support[1][x],
prec_recall_f1_support[2][x], roc_aucs[x],
pr_aucs[x], prec_recall_f1_support[3][x]]
return report
示例19
def test_precision_recall_f_extra_labels():
# Test handling of explicit additional (not in input) labels to PRF
y_true = [1, 3, 3, 2]
y_pred = [1, 1, 3, 2]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
# No average: zeros in array
actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
average=None)
assert_array_almost_equal([0., 1., 1., .5, 0.], actual)
# Macro average is changed
actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
average='macro')
assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)
# No effect otheriwse
for average in ['micro', 'weighted', 'samples']:
if average == 'samples' and i == 0:
continue
assert_almost_equal(recall_score(y_true, y_pred,
labels=[0, 1, 2, 3, 4],
average=average),
recall_score(y_true, y_pred, labels=None,
average=average))
# Error when introducing invalid label in multilabel case
# (although it would only affect performance if average='macro'/None)
for average in [None, 'macro', 'micro', 'samples']:
assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
labels=np.arange(6), average=average)
assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
labels=np.arange(-1, 4), average=average)
示例20
def test_matthews_corrcoef():
rng = np.random.RandomState(0)
y_true = ["a" if i == 0 else "b" for i in rng.randint(0, 2, size=20)]
# corrcoef of same vectors must be 1
assert_almost_equal(matthews_corrcoef(y_true, y_true), 1.0)
# corrcoef, when the two vectors are opposites of each other, should be -1
y_true_inv = ["b" if i == "a" else "a" for i in y_true]
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv), -1)
y_true_inv2 = label_binarize(y_true, ["a", "b"])
y_true_inv2 = np.where(y_true_inv2, 'a', 'b')
assert_almost_equal(matthews_corrcoef(y_true, y_true_inv2), -1)
# For the zero vector case, the corrcoef cannot be calculated and should
# result in a RuntimeWarning
mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
matthews_corrcoef, [0, 0, 0, 0], [0, 0, 0, 0])
# But will output 0
assert_almost_equal(mcc, 0.)
# And also for any other vector with 0 variance
mcc = assert_warns_message(RuntimeWarning, 'invalid value encountered',
matthews_corrcoef, y_true, ['a'] * len(y_true))
# But will output 0
assert_almost_equal(mcc, 0.)
# These two vectors have 0 correlation and hence mcc should be 0
y_1 = [1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1]
y_2 = [1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
assert_almost_equal(matthews_corrcoef(y_1, y_2), 0.)
# Check that sample weight is able to selectively exclude
mask = [1] * 10 + [0] * 10
# Now the first half of the vector elements are alone given a weight of 1
# and hence the mcc will not be a perfect 0 as in the previous case
assert_raises(AssertionError, assert_almost_equal,
matthews_corrcoef(y_1, y_2, sample_weight=mask), 0.)
示例21
def test_precision_recall_f_extra_labels():
# Test handling of explicit additional (not in input) labels to PRF
y_true = [1, 3, 3, 2]
y_pred = [1, 1, 3, 2]
y_true_bin = label_binarize(y_true, classes=np.arange(5))
y_pred_bin = label_binarize(y_pred, classes=np.arange(5))
data = [(y_true, y_pred),
(y_true_bin, y_pred_bin)]
for i, (y_true, y_pred) in enumerate(data):
# No average: zeros in array
actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
average=None)
assert_array_almost_equal([0., 1., 1., .5, 0.], actual)
# Macro average is changed
actual = recall_score(y_true, y_pred, labels=[0, 1, 2, 3, 4],
average='macro')
assert_array_almost_equal(np.mean([0., 1., 1., .5, 0.]), actual)
# No effect otheriwse
for average in ['micro', 'weighted', 'samples']:
if average == 'samples' and i == 0:
continue
assert_almost_equal(recall_score(y_true, y_pred,
labels=[0, 1, 2, 3, 4],
average=average),
recall_score(y_true, y_pred, labels=None,
average=average))
# Error when introducing invalid label in multilabel case
# (although it would only affect performance if average='macro'/None)
for average in [None, 'macro', 'micro', 'samples']:
assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
labels=np.arange(6), average=average)
assert_raises(ValueError, recall_score, y_true_bin, y_pred_bin,
labels=np.arange(-1, 4), average=average)
# tests non-regression on issue #10307
y_true = np.array([[0, 1, 1], [1, 0, 0]])
y_pred = np.array([[1, 1, 1], [1, 0, 1]])
p, r, f, _ = precision_recall_fscore_support(y_true, y_pred,
average='samples',
labels=[0, 1])
assert_almost_equal(np.array([p, r, f]), np.array([3 / 4, 1, 5 / 6]))
示例22
def evaluateOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion):
# Description: Performance on the test set data
# Input: (1)inputCoor: input coordinates (B, N, 3) (2) inputGraph: input graph (B, N*N) (3) inputLabel: labels (B, 1)
# (4) para: global Parameters (5) sess: Session (6) trainOperaion: placeholder dictionary
# Return: average loss, acc, regularization loss for test set
test_loss = []
test_acc = []
test_predict = []
for i in range(len(inputCoor)):
xTest, graphTest, labelTest = inputCoor[i], inputGraph[i], inputLabel[i]
graphTest = graphTest.tocsr()
labelBinarize = label_binarize(labelTest, classes=[j for j in range(40)])
test_batch_size = para.testBatchSize
for testBatchID in range(len(labelTest) / test_batch_size):
start = testBatchID * test_batch_size
end = start + test_batch_size
batchCoor, batchGraph, batchLabel = get_mini_batch(xTest, graphTest, labelBinarize, start, end)
batchWeight = uniform_weight(batchLabel)
batchGraph = batchGraph.todense()
batchIndexL1, centroid_coordinates = farthest_sampling_new(batchCoor, M=para.clusterNumberL1,
k=para.nearestNeighborL1, batch_size=test_batch_size,
nodes_n=para.pointNumber)
batchMiddleGraph = middle_graph_generation(centroid_coordinates, batch_size = test_batch_size, M = para.clusterNumberL1)
feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
trainOperaion['outputLabel']: batchLabel, trainOperaion['weights']: batchWeight,
trainOperaion['keep_prob_1']: 1.0, trainOperaion['keep_prob_2']: 1.0,
trainOperaion['batch_index_l1']: batchIndexL1,
trainOperaion['l2Graph']: batchMiddleGraph, trainOperaion['batch_size']: test_batch_size
}
predict, loss_test, acc_test = sess.run(
[trainOperaion['predictLabels'], trainOperaion['loss'], trainOperaion['acc']], feed_dict=feed_dict)
test_loss.append(loss_test)
test_acc.append(acc_test)
test_predict.append(predict)
test_average_loss = np.mean(test_loss)
test_average_acc = np.mean(test_acc)
return test_average_loss, test_average_acc, test_predict
示例23
def trainOneEpoch(inputCoor, inputGraph, inputLabel, para, sess, trainOperaion, weight_dict, learningRate):
dataChunkLoss = []
dataChunkAcc = []
dataChunkRegLoss = []
for i in range(len(inputCoor)):
xTrain_1, graphTrain_1, labelTrain_1 = inputCoor[i], inputGraph[i], inputLabel[i]
graphTrain_1 = graphTrain_1.tocsr()
labelBinarize = label_binarize(labelTrain_1, classes=[j for j in range(para.outputClassN)])
xTrain, graphTrain, labelTrain = shuffle(xTrain_1, graphTrain_1, labelBinarize)
# labelBinarize = label_binarize(labelTrain, classes=[j for j in range(40)])
batch_loss = []
batch_acc = []
batch_reg = []
batchSize = para.batchSize
for batchID in range(len(labelBinarize) / para.batchSize):
start = batchID * batchSize
end = start + batchSize
batchCoor, batchGraph, batchLabel = get_mini_batch(xTrain, graphTrain, labelTrain, start, end)
batchGraph = batchGraph.todense()
batchCoor = add_noise(batchCoor, sigma=0.008, clip=0.02)
if para.weighting_scheme == 'uniform':
batchWeight = uniform_weight(batchLabel)
elif para.weighting_scheme == 'weighted':
batchWeight = weights_calculation(batchLabel, weight_dict)
else:
print 'please enter the valid weighting scheme'
#print batchWeight
feed_dict = {trainOperaion['inputPC']: batchCoor, trainOperaion['inputGraph']: batchGraph,
trainOperaion['outputLabel']: batchLabel, trainOperaion['lr']: learningRate,
trainOperaion['weights']: batchWeight,
trainOperaion['keep_prob_1']: para.keep_prob_1, trainOperaion['keep_prob_2']: para.keep_prob_2}
opt, loss_train, acc_train, loss_reg_train = sess.run(
[trainOperaion['train'], trainOperaion['loss_total'], trainOperaion['acc'], trainOperaion['loss_reg']],
feed_dict=feed_dict)
#print('The loss loss_reg and acc for this batch is {},{} and {}'.format(loss_train, loss_reg_train, acc_train))
batch_loss.append(loss_train)
batch_acc.append(acc_train)
batch_reg.append(loss_reg_train)
dataChunkLoss.append(np.mean(batch_loss))
dataChunkAcc.append(np.mean(batch_acc))
dataChunkRegLoss.append(np.mean(batch_reg))
train_average_loss = np.mean(dataChunkLoss)
train_average_acc = np.mean(dataChunkAcc)
loss_reg_average = np.mean(dataChunkRegLoss)
return train_average_loss, train_average_acc, loss_reg_average
示例24
def roc(y_true, y_score, ax=None):
"""
Plot ROC curve.
Parameters
----------
y_true : array-like, shape = [n_samples]
Correct target values (ground truth).
y_score : array-like, shape = [n_samples] or [n_samples, 2] for binary
classification or [n_samples, n_classes] for multiclass
Target scores (estimator predictions).
ax: matplotlib Axes
Axes object to draw the plot onto, otherwise uses current Axes
Notes
-----
It is assumed that the y_score parameter columns are in order. For example,
if ``y_true = [2, 2, 1, 0, 0, 1, 2]``, then the first column in y_score
must countain the scores for class 0, second column for class 1 and so on.
Returns
-------
ax: matplotlib Axes
Axes containing the plot
Examples
--------
.. plot:: ../../examples/roc.py
"""
if any((val is None for val in (y_true, y_score))):
raise ValueError("y_true and y_score are needed to plot ROC")
if ax is None:
ax = plt.gca()
# get the number of classes based on the shape of y_score
y_score_is_vector = is_column_vector(y_score) or is_row_vector(y_score)
if y_score_is_vector:
n_classes = 2
else:
_, n_classes = y_score.shape
# check data shape?
if n_classes > 2:
# convert y_true to binary format
y_true_bin = label_binarize(y_true, classes=np.unique(y_true))
_roc_multi(y_true_bin, y_score, ax=ax)
for i in range(n_classes):
_roc(y_true_bin[:, i], y_score[:, i], ax=ax)
else:
if y_score_is_vector:
_roc(y_true, y_score, ax)
else:
_roc(y_true, y_score[:, 1], ax)
# raise error if n_classes = 1?
return ax