Python源码示例:sklearn.metrics.mean_absolute_error()
示例1
def proxy_a_distance(source_X, target_X):
"""
Compute the Proxy-A-Distance of a source/target representation
"""
nb_source = np.shape(source_X)[0]
nb_target = np.shape(target_X)[0]
train_X = np.vstack((source_X, target_X))
train_Y = np.hstack((np.zeros(nb_source, dtype=int),
np.ones(nb_target, dtype=int)))
clf = svm.LinearSVC(random_state=0)
clf.fit(train_X, train_Y)
y_pred = clf.predict(train_X)
error = metrics.mean_absolute_error(train_Y, y_pred)
dist = 2 * (1 - 2 * error)
return dist
示例2
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
"""
Given a trained estimator, calculate metrics.
Args:
trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)
Returns:
dict: A dictionary of metrics objects
"""
# Get predictions
predictions = trained_sklearn_estimator.predict(x_test)
# Calculate individual metrics
mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)
result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}
return result
示例3
def score_regression(y, y_hat, report=True):
"""
Create regression score
:param y:
:param y_hat:
:return:
"""
r2 = r2_score(y, y_hat)
rmse = sqrt(mean_squared_error(y, y_hat))
mae = mean_absolute_error(y, y_hat)
report_string = "---Regression Score--- \n"
report_string += "R2 = " + str(r2) + "\n"
report_string += "RMSE = " + str(rmse) + "\n"
report_string += "MAE = " + str(mae) + "\n"
if report:
print(report_string)
return mae, report_string
示例4
def test_metrics_from_list():
"""
Check getting functions from a list of metric names
"""
default = ModelBuilder.metrics_from_list()
assert default == [
metrics.explained_variance_score,
metrics.r2_score,
metrics.mean_squared_error,
metrics.mean_absolute_error,
]
specifics = ModelBuilder.metrics_from_list(
["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
)
assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]
示例5
def test_experiment_cat_custom_eval(tmpdir_name):
X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
random_state=0, id_column='user_id')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
params = {
'max_depth': 8,
'num_boost_round': 100,
'eval_metric': 'MAE'
}
result = run_experiment(params, X_train, y_train, X_test, tmpdir_name,
algorithm_type='cat', eval_func=mean_absolute_error)
assert mean_absolute_error(y_train, result.oof_prediction) == result.metrics[-1]
_check_file_exists(tmpdir_name)
示例6
def test_multioutput_regression():
y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
y_pred = np.array([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
error = mean_squared_error(y_true, y_pred)
assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)
error = mean_squared_log_error(y_true, y_pred)
assert_almost_equal(error, 0.200, decimal=2)
# mean_absolute_error and mean_squared_error are equal because
# it is a binary problem.
error = mean_absolute_error(y_true, y_pred)
assert_almost_equal(error, (1. / 3 + 2. / 3 + 2. / 3) / 4.)
error = r2_score(y_true, y_pred, multioutput='variance_weighted')
assert_almost_equal(error, 1. - 5. / 2)
error = r2_score(y_true, y_pred, multioutput='uniform_average')
assert_almost_equal(error, -.875)
示例7
def test_regression_metrics_at_limits():
assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [-1.], [-1.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])
示例8
def test_regression_custom_weights():
y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]
msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])
assert_almost_equal(msew, 0.39, decimal=2)
assert_almost_equal(maew, 0.475, decimal=3)
assert_almost_equal(rw, 0.94, decimal=2)
assert_almost_equal(evsw, 0.94, decimal=2)
# Handling msle separately as it does not accept negative inputs.
y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
multioutput=[0.3, 0.7])
assert_almost_equal(msle, msle2, decimal=2)
示例9
def test_trigonometric():
"""Check that using trig functions work and that results differ"""
est1 = SymbolicRegressor(population_size=100, generations=2,
random_state=0)
est1.fit(boston.data[:400, :], boston.target[:400])
est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
boston.target[400:])
est2 = SymbolicRegressor(population_size=100, generations=2,
function_set=['add', 'sub', 'mul', 'div',
'sin', 'cos', 'tan'],
random_state=0)
est2.fit(boston.data[:400, :], boston.target[:400])
est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
boston.target[400:])
assert(abs(est1 - est2) > 0.01)
示例10
def test_subsample():
"""Check that subsample work and that results differ"""
est1 = SymbolicRegressor(population_size=100, generations=2,
max_samples=1.0, random_state=0)
est1.fit(boston.data[:400, :], boston.target[:400])
est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
boston.target[400:])
est2 = SymbolicRegressor(population_size=100, generations=2,
max_samples=0.5, random_state=0)
est2.fit(boston.data[:400, :], boston.target[:400])
est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
boston.target[400:])
assert(abs(est1 - est2) > 0.01)
示例11
def test_parsimony_coefficient():
"""Check that parsimony coefficients work and that results differ"""
est1 = SymbolicRegressor(population_size=100, generations=2,
parsimony_coefficient=0.001, random_state=0)
est1.fit(boston.data[:400, :], boston.target[:400])
est1 = mean_absolute_error(est1.predict(boston.data[400:, :]),
boston.target[400:])
est2 = SymbolicRegressor(population_size=100, generations=2,
parsimony_coefficient='auto', random_state=0)
est2.fit(boston.data[:400, :], boston.target[:400])
est2 = mean_absolute_error(est2.predict(boston.data[400:, :]),
boston.target[400:])
assert(abs(est1 - est2) > 0.01)
示例12
def pla(data, period=15):
N = int(len(data)/period)
orig_x = range(0,len(data))
tck = splrep(orig_x, data,s=0)
test_xs = np.linspace(0,len(data),N)
spline_ys = splev(test_xs, tck)
spline_yps = splev(test_xs, tck, der=1)
xi = np.unique(tck[0])
yi = [[splev(x, tck, der=j) for j in xrange(3)] for x in xi]
P = interpolate.PiecewisePolynomial(xi,yi,orders=1)
test_ys = P(test_xs)
#inter_y = interp0(test_xs, test_ys, orig_x)
inter_y = interp1(test_xs, test_ys, orig_x)
mae = sqrt(mean_absolute_error(inter_y, data))
# mae = np.var(inter_y-data)
return mae
#def paa(data, period=15):
示例13
def paa(data, period=15):
numCoeff = int(len(data)/period)
data = data[:numCoeff*period]
data = data[:int(len(data)/numCoeff)*numCoeff]
origData = deepcopy(data)
N = len(data)
segLen = int(N/numCoeff)
sN = np.reshape(data, (numCoeff, segLen))
g = lambda data: np.mean(data)
# avg = np.mean(sN)
avg = map(g,sN)
data = np.matlib.repmat(avg, segLen, 1)
data = data.ravel(order='F')
# plt.plot(data)
# plt.plot(origData)
# plt.show()
#rmse = sqrt(mean_squared_error(data, origData))
mae = sqrt(mean_absolute_error(data, origData))
# mae = np.var(origData-data)
return mae
示例14
def pla(data, period=15):
N = int(len(data)/period)
orig_x = range(0,len(data))
tck = splrep(orig_x, data,s=0)
test_xs = np.linspace(0,len(data),N)
spline_ys = splev(test_xs, tck)
spline_yps = splev(test_xs, tck, der=1)
xi = np.unique(tck[0])
yi = [[splev(x, tck, der=j) for j in xrange(3)] for x in xi]
P = interpolate.PiecewisePolynomial(xi,yi,orders=1)
test_ys = P(test_xs)
#inter_y = interp0(test_xs, test_ys, orig_x)
inter_y = interp1(test_xs, test_ys, orig_x)
mae = sqrt(mean_absolute_error(inter_y, data))
# mae = np.var(inter_y-data)
return mae
#def paa(data, period=15):
示例15
def paa(data, period=15):
numCoeff = int(len(data)/period)
data = data[:numCoeff*period]
data = data[:int(len(data)/numCoeff)*numCoeff]
origData = deepcopy(data)
N = len(data)
segLen = int(N/numCoeff)
sN = np.reshape(data, (numCoeff, segLen))
g = lambda data: np.mean(data)
# avg = np.mean(sN)
avg = map(g,sN)
data = np.matlib.repmat(avg, segLen, 1)
data = data.ravel(order='F')
# plt.plot(data)
# plt.plot(origData)
# plt.show()
#rmse = sqrt(mean_squared_error(data, origData))
mae = sqrt(mean_absolute_error(data, origData))
# mae = np.var(origData-data)
return mae
示例16
def eva_regress(y_true, y_pred):
"""Evaluation
evaluate the predicted resul.
# Arguments
y_true: List/ndarray, ture data.
y_pred: List/ndarray, predicted data.
"""
mape = MAPE(y_true, y_pred)
vs = metrics.explained_variance_score(y_true, y_pred)
mae = metrics.mean_absolute_error(y_true, y_pred)
mse = metrics.mean_squared_error(y_true, y_pred)
r2 = metrics.r2_score(y_true, y_pred)
print('explained_variance_score:%f' % vs)
print('mape:%f%%' % mape)
print('mae:%f' % mae)
print('mse:%f' % mse)
print('rmse:%f' % math.sqrt(mse))
print('r2:%f' % r2)
示例17
def macro_mae(y_test, y_pred, classes):
cat_to_class_mapping = {v: int(k) for k, v in
get_labels_to_categories_map(classes).items()}
_y_test = [cat_to_class_mapping[y] for y in y_test]
_y_pred = [cat_to_class_mapping[y] for y in y_pred]
c = Counter(_y_pred)
print(c)
classes = set(_y_test)
micro_m = {}
for c in classes:
class_sentences = [(t, p) for t, p in zip(_y_test, _y_pred) if t == c]
yt = [y[0] for y in class_sentences]
yp = [y[1] for y in class_sentences]
micro_m[c] = mean_absolute_error(yt, yp)
# pprint.pprint(sorted(micro_m.items(), key=lambda x: x[1], reverse=True))
return numpy.mean(list(micro_m.values()))
示例18
def test_automl():
X, y = make_regression(n_samples=N_OBS,
n_features=N_FEATURE,
n_informative=N_IMP_FEATURE,
random_state=RANDOM_SEED)
X = pd.DataFrame(X, columns=['x{}'.format(i) for i in range(X.shape[1])])
y = pd.Series(y)
logging.info(X.shape, y.shape)
X_trn, X_tst, y_trn, y_tst = train_test_split(X, y, test_size=.2, random_state=RANDOM_SEED)
model = AutoLGB(objective='regression', metric='l1')
model.tune(X_trn, y_trn)
model.fit(X_trn, y_trn)
p = model.predict(X_tst)
r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
logging.info('MAE (LGB): {:.4f}'.format(mae(y_tst, p)))
assert mae(y_tst, p) < mae(y_tst, r)
model = AutoXGB(objective='reg:linear', metric='rmse')
model.tune(X_trn, y_trn)
model.fit(X_trn, y_trn)
p = model.predict(X_tst)
r = (np.random.rand(X_tst.shape[0]) * (y_trn.max() - y_trn.min()) + y_trn.min())
logging.info('MAE (XGB): {:.4f}'.format(mae(y_tst, p)))
assert mae(y_tst, p) < mae(y_tst, r)
示例19
def proxy_a_distance(source_X, target_X):
"""
Compute the Proxy-A-Distance of a source/target representation
"""
nb_source = np.shape(source_X)[0]
nb_target = np.shape(target_X)[0]
train_X = np.vstack((source_X, target_X))
train_Y = np.hstack((np.zeros(nb_source, dtype=int), np.ones(nb_target, dtype=int)))
clf = svm.LinearSVC(random_state=0)
clf.fit(train_X, train_Y)
y_pred = clf.predict(train_X)
error = metrics.mean_absolute_error(train_Y, y_pred)
dist = 2 * (1 - 2 * error)
return dist
示例20
def mae(self, labels, x, x_lens, y, y_lens):
""" mean absolute error (MAE)
"""
examples = self.prepare_examples(x, x_lens, y, y_lens)
y_hat = self.model.predict_proba(examples)
mae = mean_absolute_error(labels, y_hat)
return mae
示例21
def mae_score(y_true, y_pred):
"""Computes MAE."""
return mean_absolute_error(y_true, y_pred)
示例22
def mae_score(y_true, y_pred):
"""Computes MAE."""
return mean_absolute_error(y_true, y_pred)
示例23
def evaluate(self, data, label):
ypred = self.model.predict(data, verbose=1)[0]
if not do_reg:
acc = accuracy_score(label.argmax(1), ypred.argmax(1))
mae = mean_absolute_error(label, ypred)
return mae if do_reg else (acc, mae)
#%%
示例24
def test_apply():
output = pipeline.apply(lambda x: np.mean(x, axis=0)).execute()
assert output.shape[0] == dataset.X_test.shape[0]
output = pipeline.apply(lambda x: np.mean(x, axis=0)).validate(scorer=mean_absolute_error, k=10)
assert len(output) == 10
示例25
def test_optimizer():
opt = Optimizer([model, model_2], scorer=mean_absolute_error)
output = opt.minimize('SLSQP')
assert output.shape[0] == 2
assert_almost_equal(output.sum(), 1.0, decimal=5)
示例26
def test_report_score():
report_score(np.array([1, 2, 3]), mean_absolute_error)
report_score(np.array([1, 2, 3]), None)
示例27
def cross_val_score(estimator, y, exogenous=None, scoring=None, cv=None,
verbose=0, error_score=np.nan):
"""Evaluate a score by cross-validation
Parameters
----------
estimator : estimator
An estimator object that implements the ``fit`` method
y : array-like or iterable, shape=(n_samples,)
The time-series array.
exogenous : array-like, shape=[n_obs, n_vars], optional (default=None)
An optional 2-d array of exogenous variables.
scoring : str or callable, optional (default=None)
The scoring metric to use. If a callable, must adhere to the signature
``metric(true, predicted)``. Valid string scoring metrics include:
- 'smape'
- 'mean_absolute_error'
- 'mean_squared_error'
cv : BaseTSCrossValidator or None, optional (default=None)
An instance of cross-validation. If None, will use a RollingForecastCV
verbose : integer, optional
The verbosity level.
error_score : 'raise' or numeric
Value to assign to the score if an error occurs in estimator fitting.
If set to 'raise', the error is raised.
If a numeric value is given, ModelFitWarning is raised. This parameter
does not affect the refit step, which will always raise the error.
"""
cv_results = cross_validate(estimator=estimator, y=y, exogenous=exogenous,
scoring=scoring, cv=cv,
verbose=verbose,
error_score=error_score)
return cv_results['test_score']
示例28
def test_isoup_tree_mean(test_path):
stream = RegressionGenerator(n_samples=2000, n_features=20,
n_informative=15, random_state=1,
n_targets=3)
learner = iSOUPTreeRegressor(leaf_prediction='mean')
cnt = 0
max_samples = 2000
wait_samples = 200
y_pred = np.zeros((int(max_samples / wait_samples), 3))
y_true = np.zeros((int(max_samples / wait_samples), 3))
while cnt < max_samples:
X, y = stream.next_sample()
# Test every n samples
if (cnt % wait_samples == 0) and (cnt != 0):
y_pred[int(cnt / wait_samples), :] = learner.predict(X)
y_true[int(cnt / wait_samples), :] = y
learner.partial_fit(X, y)
cnt += 1
test_file = os.path.join(test_path,
'expected_preds_multi_target_regression_mean.npy')
expected_predictions = np.load(test_file)
assert np.allclose(y_pred, expected_predictions)
error = mean_absolute_error(y_true, y_pred)
expected_error = 191.2823924547882
assert np.isclose(error, expected_error)
expected_info = "iSOUPTreeRegressor(binary_split=False, grace_period=200, leaf_prediction='mean', " \
"learning_ratio_const=True, learning_ratio_decay=0.001, learning_ratio_perceptron=0.02, " \
"max_byte_size=33554432, memory_estimate_period=1000000, nb_threshold=0, no_preprune=False, " \
"nominal_attributes=None, random_state=None, remove_poor_atts=False, split_confidence=1e-07, " \
"stop_mem_management=False, tie_threshold=0.05)"
info = " ".join([line.strip() for line in learner.get_info().split()])
assert info == expected_info
assert type(learner.predict(X)) == np.ndarray
示例29
def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
model_config = {
"sklearn.multioutput.MultiOutputRegressor": {
"estimator": "sklearn.linear_model.LinearRegression"
}
}
data_config = get_random_data()
evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
if metrics_:
evaluation_config.update({"metrics": metrics_})
machine = Machine(
name="model-name",
dataset=data_config,
model=model_config,
evaluation=evaluation_config,
project_name="test",
)
_model, machine = ModelBuilder(machine).build()
expected_metrics = metrics_ or [
"sklearn.metrics.explained_variance_score",
"sklearn.metrics.r2_score",
"sklearn.metrics.mean_squared_error",
"sklearn.metrics.mean_absolute_error",
]
assert all(
metric.split(".")[-1].replace("_", "-")
in machine.metadata.build_metadata.model.cross_validation.scores
for metric in expected_metrics
)
示例30
def test_regression_metrics(n_samples=50):
y_true = np.arange(n_samples)
y_pred = y_true + 1
assert_almost_equal(mean_squared_error(y_true, y_pred), 1.)
assert_almost_equal(mean_squared_log_error(y_true, y_pred),
mean_squared_error(np.log(1 + y_true),
np.log(1 + y_pred)))
assert_almost_equal(mean_absolute_error(y_true, y_pred), 1.)
assert_almost_equal(median_absolute_error(y_true, y_pred), 1.)
assert_almost_equal(max_error(y_true, y_pred), 1.)
assert_almost_equal(r2_score(y_true, y_pred), 0.995, 2)
assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)