Python源码示例:sklearn.metrics.r2_score()
示例1
def fit_model(self, data, cross_val_data, cross_val_labels):
eval_metrics = []
for i in range(self.n_ensemble):
train_sm = np.concatenate(cross_val_data[:i] +
cross_val_data[(i + 1):])
test_sm = cross_val_data[i]
train_labels = np.concatenate(cross_val_labels[:i] +
cross_val_labels[(i + 1):])
test_labels = cross_val_labels[i]
fp_train = get_fp(train_sm)
fp_test = get_fp(test_sm)
self.model[i].fit(fp_train, train_labels.ravel())
predicted = self.model[i].predict(fp_test)
if self.model_type == 'classifier':
fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
eval_metrics.append(metrics.auc(fpr, tpr))
metrics_type = 'AUC'
elif self.model_type == 'regressor':
r2 = metrics.r2_score(test_labels, predicted)
eval_metrics.append(r2)
metrics_type = 'R^2 score'
return eval_metrics, metrics_type
示例2
def crossValidation(X, y, cvFolds, estimator):
r2 = np.zeros((cvFolds,1))
kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
cv_j=0
for train_index, test_index in kf:
train_X = X[train_index,:]
test_X = X[test_index,:]
train_y = y[train_index]
test_y = y[test_index]
est.fit(train_X,train_y)
y_true, y_pred = test_y,est.predict(test_X)
r2[cv_j] = r2_score(y_true, y_pred)
cv_j = cv_j + 1
return r2
#parameters: 'X' the predictors, 'y' the target, 'cvFolds' number of folds, 'estimator' machine learning algorithm
#returns: the R squared for each fold
示例3
def r2(self, log=False, pseudocount=1, clip=None):
""" Compute target R2 vector. """
r2_vec = np.zeros(self.num_targets)
for ti in range(self.num_targets):
if self.targets_na is not None:
preds_ti = self.preds[~self.targets_na, ti].astype('float64')
targets_ti = self.targets[~self.targets_na, ti].astype('float64')
else:
preds_ti = self.preds[:, :, ti].flatten().astype('float64')
targets_ti = self.targets[:, :, ti].flatten().astype('float64')
if clip is not None:
preds_ti = np.clip(preds_ti, 0, clip)
targets_ti = np.clip(targets_ti, 0, clip)
if log:
preds_ti = np.log2(preds_ti + pseudocount)
targets_ti = np.log2(targets_ti + pseudocount)
r2_vec[ti] = metrics.r2_score(targets_ti, preds_ti)
return r2_vec
示例4
def score_regression(y, y_hat, report=True):
"""
Create regression score
:param y:
:param y_hat:
:return:
"""
r2 = r2_score(y, y_hat)
rmse = sqrt(mean_squared_error(y, y_hat))
mae = mean_absolute_error(y, y_hat)
report_string = "---Regression Score--- \n"
report_string += "R2 = " + str(r2) + "\n"
report_string += "RMSE = " + str(rmse) + "\n"
report_string += "MAE = " + str(mae) + "\n"
if report:
print(report_string)
return mae, report_string
示例5
def r2_score_vec(y_true,y_pred):
""" returns non-aggregate version of r2 score.
based on r2_score() function from sklearn (http://sklearn.org)
"""
numerator = (y_true - y_pred) ** 2
denominator = (y_true - np.average(y_true)) ** 2
nonzero_denominator = denominator != 0
nonzero_numerator = numerator != 0
valid_score = nonzero_denominator & nonzero_numerator
output_scores = np.ones([y_true.shape[0]])
output_scores[valid_score] = 1 - (numerator[valid_score] /
denominator[valid_score])
# arbitrary set to zero to avoid -inf scores, having a constant
# y_true is not interesting for scoring a regression anyway
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
return output_scores
示例6
def test_few_fit_shapes():
"""test_few.py: fit and predict return correct shapes """
np.random.seed(202)
# load example data
boston = load_boston()
d = pd.DataFrame(data=boston.data)
print("feature shape:",boston.data.shape)
learner = FEW(generations=1, population_size=5,
mutation_rate=0.2, crossover_rate=0.8,
ml = LassoLarsCV(), min_depth = 1, max_depth = 3,
sel = 'epsilon_lexicase', tourn_size = 2,
random_state=0, verbosity=0,
disable_update_check=False, fit_choice = 'mse')
score = learner.fit(boston.data[:300], boston.target[:300])
print("learner:",learner._best_estimator)
yhat_test = learner.predict(boston.data[300:])
test_score = learner.score(boston.data[300:],boston.target[300:])
print("train score:",score,"test score:",test_score,
"test r2:",r2_score(boston.target[300:],yhat_test))
assert yhat_test.shape == boston.target[300:].shape
示例7
def test_metrics_from_list():
"""
Check getting functions from a list of metric names
"""
default = ModelBuilder.metrics_from_list()
assert default == [
metrics.explained_variance_score,
metrics.r2_score,
metrics.mean_squared_error,
metrics.mean_absolute_error,
]
specifics = ModelBuilder.metrics_from_list(
["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
)
assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]
示例8
def test_boston_dataset(max_bins):
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(
boston.data, boston.target, random_state=42)
mapper = _BinMapper(max_bins=max_bins, random_state=42)
X_train_binned = mapper.fit_transform(X_train)
# Init gradients and hessians to that of least squares loss
gradients = -y_train.astype(G_H_DTYPE)
hessians = np.ones(1, dtype=G_H_DTYPE)
min_samples_leaf = 8
max_leaf_nodes = 31
grower = TreeGrower(X_train_binned, gradients, hessians,
min_samples_leaf=min_samples_leaf,
max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
actual_n_bins=mapper.actual_n_bins_)
grower.grow()
predictor = grower.make_predictor(bin_thresholds=mapper.bin_thresholds_)
assert r2_score(y_train, predictor.predict(X_train)) > 0.85
assert r2_score(y_test, predictor.predict(X_test)) > 0.70
示例9
def test_multioutput_regression():
y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
error = mean_squared_error(y_true, y_pred)
assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)
error = mean_squared_log_error(y_true, y_pred)
assert_almost_equal(error, 0.200, decimal=2)
# mean_absolute_error and mean_squared_error are equal because
# it is a binary problem.
error = mean_absolute_error(y_true, y_pred)
assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)
error = r2_score(y_true, y_pred, multioutput='variance_weighted')
assert_almost_equal(error, 1. - 5. / 2)
error = r2_score(y_true, y_pred, multioutput='uniform_average')
assert_almost_equal(error, -.875)
示例10
def test_regression_metrics_at_limits():
assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [-1.], [-1.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])
示例11
def compute_perf_metrics(self, per_task=False):
"""Returns the R-squared metrics for each task or averaged over tasks based on the accumulated values
Args:
per_task (bool): True if calculating per-task metrics, False otherwise.
Returns:
A tuple (r2_score, std):
r2_score (np.array): An array of scores for each task, if per_task is True.
Otherwise, it is a float containing the average R^2 score over tasks.
std: Always None for this class.
"""
r2_scores = self.perf_metrics[0]
if per_task or self.num_tasks == 1:
return (r2_scores, None)
else:
return (r2_scores.mean(), None)
# ****************************************************************************************
示例12
def score(self, X, y):
"""Returns the coefficient of determination R^2 of the fitted linear
regression model, computed on the given features matrix and labels.
Parameters
----------
X : `np.ndarray` or `scipy.sparse.csr_matrix`, shape=(n_samples, n_features)
Features matrix.
y : `np.ndarray`, shape = (n_samples,)
Labels vector.
Returns
-------
score : `float`
R^2 of self.predict(X) against y
"""
from sklearn.metrics import r2_score
return r2_score(y, self.predict(X))
示例13
def neural_regression(X_train, Y_train, X_val, Y_val, X_test, Y_test, nb_epoch=200, batch_size=10, seed=7):
reg = neural_network(X_train.shape[1])
reg.fit(X_train, Y_train,
nb_epoch=nb_epoch,
batch_size=batch_size,
shuffle=True,
validation_data=(X_val, Y_val),
callbacks=[
ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.01),
EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=5, verbose=0, mode='auto'),
]
)
pred = reg.predict(X_test)
pred = np.reshape(pred, pred.shape[0])
r2 = r2_score(Y_test, pred)
return r2
示例14
def nestedCrossValidation(X, y, cvFolds, estimator):
kf = KFold(len(X), n_folds=cvFolds, shuffle=True, random_state = 30)
cv_j=0
param_grid = {'alpha': [0.0000001,0.000001,0.00001,0.0001,0.001,0.01,0.1,1,10,100,1000,10000,100000, 1000000, 10000000,1000000000]}
r2 = np.zeros((cvFolds,1))
for train_index, test_index in kf:
train_X = X[train_index,:]
test_X = X[test_index,:]
train_y = y[train_index]
test_y = y[test_index]
grid = GridSearchCV(estimator, param_grid=param_grid, verbose=0, cv=cvFolds, scoring='mean_squared_error')
grid.fit(train_X,train_y)
y_true, y_pred = test_y,grid.best_estimator_.predict(test_X)
r2[cv_j] = r2_score(y_true, y_pred)
cv_j = cv_j + 1
return r2
#%% main script
示例15
def __init__(
self,
wrapped: BaseStep = None,
test_size: float = 0.2,
scoring_function=r2_score,
run_validation_split_in_test_mode=True,
cache_folder_when_no_handle=None
):
"""
:param wrapped: wrapped step
:param test_size: ratio for test size between 0 and 1
:param scoring_function: scoring function with two arguments (y_true, y_pred)
"""
BaseCrossValidationWrapper.__init__(self, wrapped=wrapped, cache_folder_when_no_handle=cache_folder_when_no_handle)
self.run_validation_split_in_test_mode = run_validation_split_in_test_mode
self.test_size = test_size
self.scoring_function = scoring_function
示例16
def __init__(
self,
scoring_function=r2_score,
k_fold=3,
joiner=NumpyConcatenateOuterBatch(),
cache_folder_when_no_handle=None,
split_data_container_during_fit=True,
predict_after_fit=True
):
self.k_fold = k_fold
BaseCrossValidationWrapper.__init__(
self,
scoring_function=scoring_function,
joiner=joiner,
cache_folder_when_no_handle=cache_folder_when_no_handle,
split_data_container_during_fit=split_data_container_during_fit,
predict_after_fit=predict_after_fit
)
示例17
def eva_regress(y_true, y_pred):
"""Evaluation
evaluate the predicted resul.
# Arguments
y_true: List/ndarray, ture data.
y_pred: List/ndarray, predicted data.
"""
mape = MAPE(y_true, y_pred)
vs = metrics.explained_variance_score(y_true, y_pred)
mae = metrics.mean_absolute_error(y_true, y_pred)
mse = metrics.mean_squared_error(y_true, y_pred)
r2 = metrics.r2_score(y_true, y_pred)
print('explained_variance_score:%f' % vs)
print('mape:%f%%' % mape)
print('mae:%f' % mae)
print('mse:%f' % mse)
print('rmse:%f' % math.sqrt(mse))
print('r2:%f' % r2)
示例18
def test_ensemble_model():
X = np.vstack((np.arange(30, 10, -2, dtype='float64'),
np.arange(100, 90, -1, dtype='float64'))).T
Y = np.arange(10, dtype='float64')
rf = regressors.randomforest(random_state=42)
nn = regressors.neuralnetwork(solver='lbfgs', random_state=42)
ensemble = ensemble_model((rf, nn))
# we do not need to fit underlying models, they change when we fit enseble
ensemble.fit(X, Y)
pred = ensemble.predict(X)
mean_pred = np.vstack((rf.predict(X), nn.predict(X))).mean(axis=0)
assert_array_almost_equal(pred, mean_pred)
assert_almost_equal(ensemble.score(X, Y), r2_score(Y, pred))
# ensemble of a single model should behave exactly like this model
nn = neuralnetwork(solver='lbfgs', random_state=42)
ensemble = ensemble_model((nn,))
ensemble.fit(X, Y)
assert_array_almost_equal(ensemble.predict(X), nn.predict(X))
assert_almost_equal(ensemble.score(X, Y), nn.score(X, Y))
示例19
def print_evaluation_metrics(trained_model, trained_model_name, X_test, y_test):
print('--------- For Model: ', trained_model_name, ' ---------\n')
predicted_values = trained_model.predict(X_test)
print("Mean absolute error: ",
metrics.mean_absolute_error(y_test, predicted_values))
print("Median absolute error: ",
metrics.median_absolute_error(y_test, predicted_values))
print("Mean squared error: ", metrics.mean_squared_error(
y_test, predicted_values))
print("R2: ", metrics.r2_score(y_test, predicted_values))
plt.scatter(y_test, predicted_values, color='black')
# plt.plot(x, y_pred, color='blue', linewidth=3)
plt.title(trained_model_name)
plt.xlabel('$y_{test}$')
plt.ylabel('$y_{predicted}/y_{test}$')
plt.savefig('%s.png' %trained_model_name, bbox_inches='tight')
print("---------------------------------------\n")
示例20
def plot(results, ax):
df = results[["size", "time"]]
df.plot.scatter(x='size', y='time', c='LightBlue', ax=ax, marker=".")
# linear regression
x = df["size"].values.reshape((-1, 1))
y = df["time"].values
linear = LinearRegression().fit(x, y)
y_pred = linear.predict(x)
mqe = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
ax.plot(x, y_pred, color='DarkBlue', linewidth=2)
ax.set_title(
"Linear regression between size and time "
f"\n$mse={mqe:.3f}$ - $R^2={r2:.3f}$")
ax.set_xlabel("Size")
ax.set_ylabel("Seconds")
return ax
# =============================================================================
# CLI MAIN
# =============================================================================
示例21
def fit_model(self, data):
eval_metrics = []
if self.feature_type == 'fingerprints':
fps = get_fp(data.smiles)
elif self.feature_type == 'descriptors':
fps, _, _ = get_desc(data.smiles, self.calc)
if self.model_type == 'classifier':
cross_val_data, cross_val_labels = \
cross_validation_split(fps, data.binary_labels)
elif self.model_type == 'regressor':
cross_val_data, cross_val_labels = \
cross_validation_split(fps, data.property)
for i in range(self.n_ensemble):
train_sm = np.concatenate(cross_val_data[:i] + cross_val_data[(i + 1):])
test_sm = cross_val_data[i]
train_labels = np.concatenate(cross_val_labels[:i] +
cross_val_labels[(i + 1):])
test_labels = cross_val_labels[i]
if self.feature_type == 'descriptors':
train_sm, desc_mean = normalize_desc(train_sm)
self.desc_mean[i] = desc_mean
test_sm, _ = normalize_desc(test_sm, desc_mean)
self.model[i].fit(train_sm, train_labels.ravel())
predicted = self.model[i].predict(test_sm)
if self.model_type == 'classifier':
fpr, tpr, thresholds = metrics.roc_curve(test_labels, predicted)
eval_metrics.append(metrics.auc(fpr, tpr))
metrics_type = 'AUC'
elif self.model_type == 'regressor':
r2 = metrics.r2_score(test_labels, predicted)
eval_metrics.append(r2)
metrics_type = 'R^2 score'
return eval_metrics, metrics_type
示例22
def test_boston_dataset(max_bins):
boston = load_boston()
X_train, X_test, y_train, y_test = train_test_split(
boston.data, boston.target, random_state=42)
mapper = BinMapper(max_bins=max_bins, random_state=42)
X_train_binned = mapper.fit_transform(X_train)
X_test_binned = mapper.transform(X_test)
# Init gradients and hessians to that of least squares loss
gradients = -y_train.astype(np.float32)
hessians = np.ones(1, dtype=np.float32)
min_samples_leaf = 8
max_leaf_nodes = 31
grower = TreeGrower(X_train_binned, gradients, hessians,
min_samples_leaf=min_samples_leaf,
max_leaf_nodes=max_leaf_nodes, max_bins=max_bins,
n_bins_per_feature=mapper.n_bins_per_feature_)
grower.grow()
predictor = grower.make_predictor(
numerical_thresholds=mapper.numerical_thresholds_)
assert r2_score(y_train, predictor.predict_binned(X_train_binned)) > 0.85
assert r2_score(y_test, predictor.predict_binned(X_test_binned)) > 0.70
assert_allclose(predictor.predict(X_train),
predictor.predict_binned(X_train_binned))
assert_allclose(predictor.predict(X_test),
predictor.predict_binned(X_test_binned))
assert r2_score(y_train, predictor.predict(X_train)) > 0.85
assert r2_score(y_test, predictor.predict(X_test)) > 0.70
示例23
def oob_regression_r2_score(rf, X_train, y_train):
"""
Compute out-of-bag (OOB) R^2 for a scikit-learn random forest
regressor. We learned the guts of scikit's RF from the BSD licensed
code:
https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/ensemble/forest.py#L702
"""
X = X_train.values if isinstance(X_train, pd.DataFrame) else X_train
y = y_train.values if isinstance(y_train, pd.Series) else y_train
n_samples = len(X)
predictions = np.zeros(n_samples)
n_predictions = np.zeros(n_samples)
for tree in rf.estimators_:
unsampled_indices = _generate_unsampled_indices(tree.random_state, n_samples)
tree_preds = tree.predict(X[unsampled_indices, :])
predictions[unsampled_indices] += tree_preds
n_predictions[unsampled_indices] += 1
if (n_predictions == 0).any():
warnings.warn("Too few trees; some variables do not have OOB scores.")
n_predictions[n_predictions == 0] = 1
predictions /= n_predictions
oob_score = r2_score(y, predictions)
return oob_score
示例24
def oob_regression_r2_score(rf, X_train, y_train):
"""
Compute out-of-bag (OOB) R^2 for a scikit-learn random forest
regressor. We learned the guts of scikit's RF from the BSD licensed
code:
https://github.com/scikit-learn/scikit-learn/blob/a24c8b46/sklearn/ensemble/forest.py#L702
"""
X = X_train.values if isinstance(X_train, pd.DataFrame) else X_train
y = y_train.values if isinstance(y_train, pd.Series) else y_train
n_samples = len(X)
predictions = np.zeros(n_samples)
n_predictions = np.zeros(n_samples)
for tree in rf.estimators_:
unsampled_indices = _generate_unsampled_indices(tree.random_state, n_samples)
tree_preds = tree.predict(X[unsampled_indices, :])
predictions[unsampled_indices] += tree_preds
n_predictions[unsampled_indices] += 1
if (n_predictions == 0).any():
warnings.warn("Too few trees; some variables do not have OOB scores.")
n_predictions[n_predictions == 0] = 1
predictions /= n_predictions
oob_score = r2_score(y, predictions)
return oob_score
示例25
def main():
"""Run the demo."""
# Training batches
Xr, Yr, Xs, Ys = get_data()
N = Yr.shape[0]
my_feature_columns = []
for key in Xr.keys():
my_feature_columns.append(tf.feature_column.numeric_column(key=key))
# # Build 2 hidden layer DNN with 10, 10 units respectively.
estimator = tf.estimator.Estimator(
model_fn=my_model,
model_dir="./sarcos/",
params={"N": N,
"feature_columns": my_feature_columns})
input_fn = train_input_fn(Xr, Yr)
eval_fn = test_input_fn(Xs, Ys)
predict_fn = predict_input_fn(Xs)
steps = EPOCHS_PER_EVAL * (N // BATCH_SIZE)
for i in range(NEPOCHS // EPOCHS_PER_EVAL):
# Train the Model.
estimator.train(input_fn=input_fn, steps=steps)
# Evaluate the model.
eval_result = estimator.evaluate(input_fn=eval_fn)
# Use the predict interface to do alternative R2 calculation
d = estimator.predict(input_fn=predict_fn,
yield_single_examples=False)
Yt = []
print("\n\nEval Result: {}".format(eval_result))
for di in d:
Yt.append(di["predictions"])
Yt = np.concatenate(Yt, axis=0)
print("R2: {}\n\n".format(r2_score(Ys, Yt)))
示例26
def get_diversity(self,X):
"""compute mean diversity of individual outputs"""
# diversity in terms of cosine distances between features
feature_correlations = np.zeros(X.shape[0]-1)
for i in np.arange(1,X.shape[0]-1):
feature_correlations[i] = max(0.0,r2_score(X[0],X[i]))
# pdb.set_trace()
self.diversity.append(1-np.mean(feature_correlations))
示例27
def deterministic_crowding(self,parents,offspring,X_parents,X_offspring):
"""deterministic crowding implementation (for non-steady state).
offspring compete against the parent they are most similar to, here defined as
the parent they are most correlated with.
the offspring only replace their parent if they are more fit.
"""
# get children locations produced from crossover
cross_children = [i for i,o in enumerate(offspring) if len(o.parentid) > 1]
# order offspring so that they are lined up with their most similar parent
for c1,c2 in zip(cross_children[::2], cross_children[1::2]):
# get parent locations
p_loc = [j for j,p in enumerate(parents) if p.id in offspring[c1].parentid]
if len(p_loc) != 2:
continue
# if child is more correlated with its non-root parent
if r2_score(X_parents[p_loc[0]],X_offspring[c1]) + r2_score(X_parents[p_loc[1]],X_offspring[c2]) < r2_score(X_parents[p_loc[0]],X_offspring[c2]) + r2_score(X_parents[p_loc[1]],X_offspring[c1]):
# swap offspring
offspring[c1],offspring[c2] = offspring[c2],offspring[c1]
survivors = []
survivor_index = []
for i,(p,o) in enumerate(zip(parents,offspring)):
if p.fitness >= o.fitness:
survivors.append(copy.deepcopy(p))
survivor_index.append(i)
else:
survivors.append(copy.deepcopy(o))
survivor_index.append(i+len(parents))
# return survivors along with their indices
return survivors, survivor_index
示例28
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
model_config = {
"sklearn.multioutput.MultiOutputRegressor": {
"estimator": "sklearn.linear_model.LinearRegression"
}
}
data_config = get_random_data()
evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
if metrics_:
evaluation_config.update({"metrics": metrics_})
machine = Machine(
name="model-name",
dataset=data_config,
model=model_config,
evaluation=evaluation_config,
project_name="test",
)
_model, machine = ModelBuilder(machine).build()
expected_metrics = metrics_ or [
"sklearn.metrics.explained_variance_score",
"sklearn.metrics.r2_score",
"sklearn.metrics.mean_squared_error",
"sklearn.metrics.mean_absolute_error",
]
assert all(
metric.split(".")[-1].replace("_", "-")
in machine.metadata.build_metadata.model.cross_validation.scores
for metric in expected_metrics
)
示例29
def metrics_from_list(metric_list: Optional[List[str]] = None) -> List[Callable]:
"""
Given a list of metric function paths. ie. sklearn.metrics.r2_score or
simple function names which are expected to be in the ``sklearn.metrics`` module,
this will return a list of those loaded functions.
Parameters
----------
metrics: Optional[List[str]]
List of function paths to use as metrics for the model Defaults to
those specified in :class:`gordo.workflow.config_components.NormalizedConfig`
sklearn.metrics.explained_variance_score,
sklearn.metrics.r2_score,
sklearn.metrics.mean_squared_error,
sklearn.metrics.mean_absolute_error
Returns
-------
List[Callable]
A list of the functions loaded
Raises
------
AttributeError:
If the function cannot be loaded.
"""
defaults = NormalizedConfig.DEFAULT_CONFIG_GLOBALS["evaluation"]["metrics"]
funcs = list()
for func_path in metric_list or defaults:
func = pydoc.locate(func_path)
if func is None:
# Final attempt, load function from sklearn.metrics module.
funcs.append(getattr(metrics, func_path))
else:
funcs.append(func)
return funcs
示例30
def main(transpose_inputs=False):
train, test = get_train_test(random_state=12)
if transpose_inputs:
train = train.T
test = test.T
predicted = predict(train)
r2 = metrics.r2_score(test[test > 0], predicted[test > 0])
print('R2 score (binary {} neighbours): {:.1%}'.format(
('movie' if transpose_inputs else 'user'),
r2))