Python源码示例:sklearn.metrics.cohen_kappa_score()
示例1
def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
print(test_X.shape,test_y.shape)
pred = np.zeros(test_X.shape[0])
for k in range(1,folds + 1):
print(f'running inference on fold: {k}')
model = keras.models.load_model(model_save_dest[k])
pred = pred + model.predict(test_X)[:,0]
pred = pred
print(pred.shape)
print(pred)
pred = pred/float(folds)
pred_class = np.round(pred)
pred_class = np.array(pred_class,dtype=int)
pred_class = list(map(lambda x:4 if x > 4 else x,pred_class))
pred_class = list(map(lambda x:0 if x < 0 else x,pred_class))
act_class = test_y
accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
return pred_class,accuracy,kappa
示例2
def class_wise_kappa(true, pred, n_classes=None, ignore_zero=True):
from sklearn.metrics import cohen_kappa_score
if n_classes is None:
classes = np.unique(true)
else:
classes = np.arange(max(2, n_classes))
# Ignore background class?
if ignore_zero:
classes = classes[np.where(classes != 0)]
# Calculate kappa for all targets
kappa_scores = np.empty(shape=classes.shape, dtype=np.float32)
kappa_scores.fill(np.nan)
for idx, _class in enumerate(classes):
s1 = true == _class
s2 = pred == _class
if np.any(s1) or np.any(s2):
kappa_scores[idx] = cohen_kappa_score(s1, s2)
return kappa_scores
示例3
def toy_cohens_kappa():
# rater1 = [1, 1, 1, 0]
# rater2 = [1, 1, 0, 0]
# rater3 = [0, 1, 1]
rater1 = ['s', 's', 's', 'g', 'u']
rater2 = ['s', 's', 'g', 'g', 's']
taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [
[1, str(i), str(rater2[i])] for i in range(0, len(rater2))] # + [
# [2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
print(taskdata)
ratingtask = agreement.AnnotationTask(data=taskdata)
print("kappa " + str(ratingtask.kappa()))
print("fleiss " + str(ratingtask.multi_kappa()))
print("alpha " + str(ratingtask.alpha()))
print("scotts " + str(ratingtask.pi()))
print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
示例4
def predict(self):
"""
Predicts the model output, and computes precision, recall, and F1 score.
INPUT
model: Model trained in Keras
OUTPUT
Precision, Recall, and F1 score
"""
predictions = self.model.predict(self.X_test)
predictions = np.argmax(predictions, axis=1)
# predictions[predictions >=1] = 1 # Remove when non binary classifier
self.y_test = np.argmax(self.y_test, axis=1)
precision = precision_score(self.y_test, predictions, average="micro")
recall = recall_score(self.y_test, predictions, average="micro")
f1 = f1_score(self.y_test, predictions, average="micro")
cohen_kappa = cohen_kappa_score(self.y_test, predictions)
quad_kappa = kappa(self.y_test, predictions, weights='quadratic')
return precision, recall, f1, cohen_kappa, quad_kappa
示例5
def predict(self):
"""
Predicts the model output, and computes precision, recall, and F1 score.
INPUT
model: Model trained in Keras
OUTPUT
Precision, Recall, and F1 score
"""
predictions = self.model.predict(self.X_test)
predictions = np.argmax(predictions, axis=1)
# predictions[predictions >=1] = 1 # Remove when non binary classifier
self.y_test = np.argmax(self.y_test, axis=1)
precision = precision_score(self.y_test, predictions, average="micro")
recall = recall_score(self.y_test, predictions, average="micro")
f1 = f1_score(self.y_test, predictions, average="micro")
cohen_kappa = cohen_kappa_score(self.y_test, predictions)
quad_kappa = kappa(self.y_test, predictions, weights='quadratic')
return precision, recall, f1, cohen_kappa, quad_kappa
示例6
def evaluate(source, source_batch):
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss = 0
y_true = [] # true labels
y_pred = [] # predicted labels
hidden = model.init_hidden(args.bsz)
for i in range(len(source_batch)):
data, targets = get_batch(source, source_batch, i)
output, hidden = model(data, hidden)
total_loss += len(targets) * criterion(output[-1], targets).data
_, predicted = torch.max(output[-1], 1)
y_true.extend(targets.tolist())
y_pred.extend(predicted.tolist())
hidden = repackage_hidden(hidden)
val_loss = total_loss.item() / np.size(source_batch)
# Make report for the classfier
report = classification_report(y_true, y_pred, target_names=classes)
kappa = cohen_kappa_score(y_true, y_pred)
return val_loss, kappa, report
# Loop over epochs
示例7
def evaluate(source, source_batch):
# Turn on evaluation mode which disables dropout.
model.eval()
total_loss = 0
y_true = [] # true labels
y_pred = [] # predicted labels
for i in range(len(source_batch)):
data, targets = get_batch(source, source_batch, i)
outputs = model(data)
total_loss += len(targets) * criterion(outputs, targets).data
_, predicted = torch.max(outputs, 1)
y_true.extend(targets.tolist())
y_pred.extend(predicted.tolist())
val_loss = total_loss.item() / np.size(source_batch)
# Make report for the classfier
report = classification_report(y_true, y_pred, target_names=classes)
kappa = cohen_kappa_score(y_true, y_pred)
return val_loss, kappa, report
# Loop over epochs
示例8
def quadratic_weighted_kappa(y_pred, y_true):
if torch.is_tensor(y_pred):
y_pred = y_pred.data.cpu().numpy()
if torch.is_tensor(y_true):
y_true = y_true.data.cpu().numpy()
if y_pred.shape[1] == 1:
y_pred = y_pred[:, 0]
else:
y_pred = np.argmax(y_pred, axis=1)
return metrics.cohen_kappa_score(y_pred, y_true, weights='quadratic')
示例9
def kappa_score(self):
return metrics.cohen_kappa_score(self.conditions, self.predictions)
示例10
def test_cohen_kappa():
# These label vectors reproduce the contingency matrix from Artstein and
# Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).
y1 = np.array([0] * 40 + [1] * 60)
y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
kappa = cohen_kappa_score(y1, y2)
assert_almost_equal(kappa, .348, decimal=3)
assert_equal(kappa, cohen_kappa_score(y2, y1))
# Add spurious labels and ignore them.
y1 = np.append(y1, [2] * 4)
y2 = np.append(y2, [2] * 4)
assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)
assert_almost_equal(cohen_kappa_score(y1, y1), 1.)
# Multiclass example: Artstein and Poesio, Table 4.
y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
y2 = np.array([0] * 52 + [1] * 32 + [2] * 16)
assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)
# Weighting example: none, linear, quadratic.
y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
assert_almost_equal(cohen_kappa_score(y1, y2,
weights="linear"), 0.9412, decimal=4)
assert_almost_equal(cohen_kappa_score(y1, y2,
weights="quadratic"), 0.9541, decimal=4)
示例11
def inference_validation(self,test_X,test_y,model_save_dest,n_class=5,folds=5):
pred = np.zeros((len(test_X),n_class))
for k in range(1,folds + 1):
model = keras.models.load_model(model_save_dest[k])
pred = pred + model.predict(test_X)
pred = pred/(1.0*folds)
pred_class = np.argmax(pred,axis=1)
act_class = np.argmax(test_y,axis=1)
accuracy = np.sum([pred_class == act_class])*1.0/len(test_X)
kappa = cohen_kappa_score(pred_class,act_class,weights='quadratic')
return pred_class,accuracy,kappa
示例12
def main(self):
start_time = time.time()
print('Data Processing..')
self.num_class = len(self.class_folders)
model_to_store_path,class_dict = self.train_model(self.train_dir,self.val_dir,n_fold=self.folds,batch_size=self.batch_size,
epochs=self.epochs,dim=self.dim,lr=self.lr,model=self.model)
print("Model saved to dest:",model_to_store_path)
# Validatione evaluate results
folder_path = Path(f'{self.val_dir}')
val_results_df = self.inference(model_to_store_path,folder_path,class_dict,self.dim)
val_results_path = f'{self.outdir}/val_results.csv'
val_results_df.to_csv(val_results_path,index=False)
print(f'Validation results saved at : {val_results_path}')
pred_class_index = np.array(val_results_df['pred_class_index'].values)
actual_class_index = np.array(val_results_df['actual_class_index'].values)
print(pred_class_index)
print(actual_class_index)
accuracy = np.mean(actual_class_index == pred_class_index)
kappa = cohen_kappa_score(pred_class_index,actual_class_index,weights='quadratic')
#print("-----------------------------------------------------")
print(f'Validation Accuracy: {accuracy}')
print(f'Validation Quadratic Kappa Score: {kappa}')
#print("-----------------------------------------------------")
#print("Processing Time",time.time() - start_time,' secs')
示例13
def reports(y_pred, y_test):
classification = classification_report(y_test, y_pred)
oa = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
each_acc, aa = AA_andEachClassAccuracy(confusion)
kappa = cohen_kappa_score(y_test, y_pred)
return classification, confusion, np.array([oa, aa, kappa] + list(each_acc)) * 100
示例14
def calculate_metrics(val_results_dict, y_pred, y_val, suffix=""):
tmp_kappa_list = []
tmp_accur_list = []
tmp_f1_list = []
tmp_cm_list = []
y_val = utils.to_categorical(y_val)[:,-1]
for each_threshold in np.linspace(0.1, 0.9, 17):
tmp_pred = [1 if _ >= each_threshold else 0 for _ in y_pred]
tmp_kappa_list.append(cohen_kappa_score(tmp_pred, y_val))
tmp_accur_list.append(accuracy_score(tmp_pred, y_val))
tmp_f1_list.append(f1_score(tmp_pred, y_val))
tmp_cm_list.append(competitionMetric(tmp_pred, y_val))
auroc = round(roc_auc_score(y_val, y_pred), 3)
kappa = round(np.max(tmp_kappa_list), 3)
accur = round(np.max(tmp_accur_list), 3)
cm = round(np.max(tmp_cm_list), 3)
f1 = round(np.max(tmp_f1_list), 3)
val_results_dict["auc{}".format(suffix)].append(auroc)
val_results_dict["kap{}".format(suffix)].append(kappa)
val_results_dict["acc{}".format(suffix)].append(accur)
val_results_dict["f1{}".format(suffix)].append(f1)
val_results_dict["cm{}".format(suffix)].append(cm)
kappa_threshold = np.linspace(0.1,0.9,17)[tmp_kappa_list.index(np.max(tmp_kappa_list))]
accur_threshold = np.linspace(0.1,0.9,17)[tmp_accur_list.index(np.max(tmp_accur_list))]
f1_threshold = np.linspace(0.1,0.9,17)[tmp_f1_list.index(np.max(tmp_f1_list))]
cm_threshold = np.linspace(0.1,0.9,17)[tmp_cm_list.index(np.max(tmp_cm_list))]
val_results_dict["threshold_kap{}".format(suffix)].append(round(kappa_threshold, 2))
val_results_dict["threshold_acc{}".format(suffix)].append(round(accur_threshold, 2))
val_results_dict["threshold_f1{}".format(suffix)].append(round(f1_threshold, 2))
val_results_dict["threshold_cm{}".format(suffix)].append(round(cm_threshold, 2))
return val_results_dict
示例15
def calculate_metrics(val_results_dict, y_pred, y_val, suffix=""):
tmp_kappa_list = []
tmp_accur_list = []
tmp_f1_list = []
tmp_cm_list = []
y_val = utils.to_categorical(y_val)[:,-1]
for each_threshold in np.linspace(0.1, 0.9, 17):
tmp_pred = [1 if _ >= each_threshold else 0 for _ in y_pred]
tmp_kappa_list.append(cohen_kappa_score(tmp_pred, y_val))
tmp_accur_list.append(accuracy_score(tmp_pred, y_val))
tmp_f1_list.append(f1_score(tmp_pred, y_val))
tmp_cm_list.append(competitionMetric(tmp_pred, y_val))
auroc = round(roc_auc_score(y_val, y_pred), 3)
kappa = round(np.max(tmp_kappa_list), 3)
accur = round(np.max(tmp_accur_list), 3)
cm = round(np.max(tmp_cm_list), 3)
f1 = round(np.max(tmp_f1_list), 3)
val_results_dict["auc{}".format(suffix)].append(auroc)
val_results_dict["kap{}".format(suffix)].append(kappa)
val_results_dict["acc{}".format(suffix)].append(accur)
val_results_dict["f1{}".format(suffix)].append(f1)
val_results_dict["cm{}".format(suffix)].append(cm)
kappa_threshold = np.linspace(0.1,0.9,17)[tmp_kappa_list.index(np.max(tmp_kappa_list))]
accur_threshold = np.linspace(0.1,0.9,17)[tmp_accur_list.index(np.max(tmp_accur_list))]
f1_threshold = np.linspace(0.1,0.9,17)[tmp_f1_list.index(np.max(tmp_f1_list))]
cm_threshold = np.linspace(0.1,0.9,17)[tmp_cm_list.index(np.max(tmp_cm_list))]
val_results_dict["threshold_kap{}".format(suffix)].append(round(kappa_threshold, 2))
val_results_dict["threshold_acc{}".format(suffix)].append(round(accur_threshold, 2))
val_results_dict["threshold_f1{}".format(suffix)].append(round(f1_threshold, 2))
val_results_dict["threshold_cm{}".format(suffix)].append(round(cm_threshold, 2))
return val_results_dict
示例16
def _kappa_score(self):
png_file = self.scalars(
{'kappa_score': cohen_kappa_score(self.targets, self.predicts, weights='quadratic')}, 'kappa_score'
)
if png_file:
self.update_sheet('kappa_score', {'raw': png_file, 'processor': 'upload_image'})
示例17
def print_metrics_regression(y_true, predictions, verbose=1):
predictions = np.array(predictions)
predictions = np.maximum(predictions, 0).flatten()
y_true = np.array(y_true)
y_true_bins = [get_bin_custom(x, CustomBins.nbins) for x in y_true]
prediction_bins = [get_bin_custom(x, CustomBins.nbins) for x in predictions]
cf = metrics.confusion_matrix(y_true_bins, prediction_bins)
if verbose:
print("Custom bins confusion matrix:")
print(cf)
kappa = metrics.cohen_kappa_score(y_true_bins, prediction_bins,
weights='linear')
mad = metrics.mean_absolute_error(y_true, predictions)
mse = metrics.mean_squared_error(y_true, predictions)
mape = mean_absolute_percentage_error(y_true, predictions)
if verbose:
print("Mean absolute deviation (MAD) = {}".format(mad))
print("Mean squared error (MSE) = {}".format(mse))
print("Mean absolute percentage error (MAPE) = {}".format(mape))
print("Cohen kappa score = {}".format(kappa))
return {"mad": mad,
"mse": mse,
"mape": mape,
"kappa": kappa}
示例18
def cohens_kappa():
data_folder = '/Users/fpena/UCC/Thesis/datasets/context/manuallyLabeledReviews/'
business_type = Constants.ITEM_TYPE
file_name = data_folder + '%s_%s_reviews.json'
labelers = [
# 'francisco',
'diego',
'mesut',
'rohit',
]
all_records = [
load_data(file_name % (labeler, business_type)) for labeler in labelers
]
rater1 = [record['review_type'] for record in all_records[0]]
rater2 = [record['review_type'] for record in all_records[1]]
rater3 = [record['review_type'] for record in all_records[2]]
taskdata = [[0, str(i), str(rater1[i])] for i in range(0, len(rater1))] + [
[1, str(i), str(rater2[i])] for i in range(0, len(rater2))] + [
[2, str(i), str(rater3[i])] for i in range(0, len(rater3))]
print(taskdata)
ratingtask = agreement.AnnotationTask(data=taskdata)
print("Observed agreement " + str(ratingtask.avg_Ao()))
print("kappa " + str(ratingtask.kappa()))
print("fleiss " + str(ratingtask.multi_kappa()))
print("alpha " + str(ratingtask.alpha()))
print("scotts " + str(ratingtask.pi()))
print("sklearn kappa " + str(cohen_kappa_score(rater1, rater2)))
print("sklearn kappa " + str(cohen_kappa_score(rater1, rater3)))
print("sklearn kappa " + str(cohen_kappa_score(rater2, rater3)))
示例19
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0
y_true = [] # true labels
y_pred = [] # predicted labels
start_time = time.time()
hidden = model.init_hidden(args.bsz)
for batch, i in enumerate(range(len(train_batch))):
data, targets = get_batch(train_data, train_batch, i)
hidden = repackage_hidden(hidden)
model.zero_grad()
output, hidden = model(data, hidden)
loss = criterion(output[-1], targets)
loss.backward()
# `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
optimizer.step()
total_loss += loss.data
_, predicted = torch.max(output[-1], 1)
y_true.extend(targets.tolist())
y_pred.extend(predicted.tolist())
if (batch + 1) % args.log_interval == 0:
cur_loss = total_loss.item() / (batch + 1)
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
'loss {:5.2f}'.format(
epoch, batch + 1, len(train_batch), lr,
elapsed * 1000 / args.log_interval, cur_loss))
start_time = time.time()
# compute Cohen's Kappa
kappa = cohen_kappa_score(y_true, y_pred)
return total_loss.item() / (batch + 1), kappa
示例20
def train():
# Turn on training mode which enables dropout.
model.train()
total_loss = 0
y_true = [] # true labels
y_pred = [] # predicted labels
start_time = time.time()
for batch, i in enumerate(range(len(train_batch))):
data, targets = get_batch(train_data, train_batch, i)
model.zero_grad()
outputs = model(data)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
total_loss += loss.data
_, predicted = torch.max(outputs, 1)
y_true.extend(targets.tolist())
y_pred.extend(predicted.tolist())
if (batch + 1) % args.log_interval == 0:
cur_loss = total_loss.item() / (batch + 1)
elapsed = time.time() - start_time
print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.6f} | ms/batch {:5.2f} | '
'loss {:5.2f}'.format(
epoch, batch + 1, len(train_batch), lr,
elapsed * 1000 / args.log_interval, cur_loss))
start_time = time.time()
# compute Cohen's Kappa
kappa = cohen_kappa_score(y_true, y_pred)
return total_loss.item() / (batch + 1), kappa
示例21
def test_cohen_kappa():
# These label vectors reproduce the contingency matrix from Artstein and
# Poesio (2008), Table 1: np.array([[20, 20], [10, 50]]).
y1 = np.array([0] * 40 + [1] * 60)
y2 = np.array([0] * 20 + [1] * 20 + [0] * 10 + [1] * 50)
kappa = cohen_kappa_score(y1, y2)
assert_almost_equal(kappa, .348, decimal=3)
assert_equal(kappa, cohen_kappa_score(y2, y1))
# Add spurious labels and ignore them.
y1 = np.append(y1, [2] * 4)
y2 = np.append(y2, [2] * 4)
assert_equal(cohen_kappa_score(y1, y2, labels=[0, 1]), kappa)
assert_almost_equal(cohen_kappa_score(y1, y1), 1.)
# Multiclass example: Artstein and Poesio, Table 4.
y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
y2 = np.array([0] * 52 + [1] * 32 + [2] * 16)
assert_almost_equal(cohen_kappa_score(y1, y2), .8013, decimal=4)
# Weighting example: none, linear, quadratic.
y1 = np.array([0] * 46 + [1] * 44 + [2] * 10)
y2 = np.array([0] * 50 + [1] * 40 + [2] * 10)
assert_almost_equal(cohen_kappa_score(y1, y2), .9315, decimal=4)
assert_almost_equal(cohen_kappa_score(y1, y2, weights="linear"), .9412, decimal=4)
assert_almost_equal(cohen_kappa_score(y1, y2, weights="quadratic"), .9541, decimal=4)
示例22
def conf_matrix(Y_gt, Y_pred, num_classes = 9):
total_pixels = 0
kappa_sum = 0
sudo_confusion_matrix = np.zeros((num_classes, num_classes))
# if len(Y_pred.shape) == 3:
# h,w,c = Y_pred.shape
# Y_pred = np.reshape(Y_pred, (1,))
n = len(Y_pred)
for i in range(n):
y_pred = Y_pred[i]
y_gt = Y_gt[i]
#y_pred_hotcode = hotcode(y_pred)
#y_gt_hotcode = hotcode(y_gt)
pred = np.reshape(y_pred, (y_pred.shape[0]*y_pred.shape[1], y_pred.shape[2]))
gt = np.reshape(y_gt, (y_gt.shape[0]*y_gt.shape[1], y_gt.shape[2]))
pred = [i for i in pred]
gt = [i for i in gt]
pred = to_class_no(pred)
gt = to_class_no(gt)
# pred.tolist()
# gt.tolist()
gt = np.asarray(gt, dtype = 'int32')
pred = np.asarray(pred, dtype = 'int32')
conf_matrix = confusion_matrix(gt, pred, labels=[0,1,2,3,4,5,6,7,8])
kappa = cohen_kappa_score(gt,pred, labels=[0,1,2,3,4,5,6,7])
pixels = len(pred)
total_pixels = total_pixels+pixels
sudo_confusion_matrix = sudo_confusion_matrix + conf_matrix
kappa_sum = kappa_sum + kappa
final_confusion_matrix = sudo_confusion_matrix
final_kappa = kappa_sum/n
return final_confusion_matrix, final_kappa