Python源码示例:sklearn.metrics.mean_squared_error()
示例1
def Train(data, modelcount, censhu, yanzhgdata):
model = xgb.XGBRegressor(max_depth=censhu, learning_rate=0.1, n_estimators=modelcount, silent=True, objective='reg:gamma')
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = mse(data[:, -1], train_out)
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算MSE
add_mse = mse(yanzhgdata[:, -1], add_yan)
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例2
def test_regression_small():
X, y = make_regression(n_samples=2000,
n_features=10,
n_informative=5,
noise=30.0,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('regression').fit(X, y, 'test_regression_small')
cls.generate_module_sample()
from sklearn.metrics import mean_squared_error
pred = cls.predict(X)
print(mean_squared_error(y, pred))
assert len(cls.algorithms) == 4
assert cls.algorithms[0].best_score is not None
示例3
def test_regression_medium():
X, y = make_regression(n_samples=20000,
n_features=10,
n_informative=5,
noise=30.0,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('regression').fit(X, y, 'test_regression_medium')
cls.generate_module_sample()
from sklearn.metrics import mean_squared_error
pred = cls.predict(X)
print(mean_squared_error(y, pred))
assert len(cls.algorithms) == 2
assert cls.algorithms[0].best_score is not None
示例4
def test_regression_big():
X, y = make_regression(n_samples=200000,
n_features=10,
n_informative=5,
noise=30.0,
random_state=0)
X = pd.DataFrame(X)
y = pd.Series(y)
cls = MALSS('regression').fit(X, y, 'test_regression_big')
cls.generate_module_sample()
from sklearn.metrics import mean_squared_error
pred = cls.predict(X)
print(mean_squared_error(y, pred))
assert len(cls.algorithms) == 1
assert cls.algorithms[0].best_score is not None
示例5
def calculate_regression_metrics(trained_sklearn_estimator, x_test, y_test):
"""
Given a trained estimator, calculate metrics.
Args:
trained_sklearn_estimator (sklearn.base.BaseEstimator): a scikit-learn estimator that has been `.fit()`
y_test (numpy.ndarray): A 1d numpy array of the y_test set (predictions)
x_test (numpy.ndarray): A 2d numpy array of the x_test set (features)
Returns:
dict: A dictionary of metrics objects
"""
# Get predictions
predictions = trained_sklearn_estimator.predict(x_test)
# Calculate individual metrics
mean_squared_error = skmetrics.mean_squared_error(y_test, predictions)
mean_absolute_error = skmetrics.mean_absolute_error(y_test, predictions)
result = {'mean_squared_error': mean_squared_error, 'mean_absolute_error': mean_absolute_error}
return result
示例6
def score_regression(y, y_hat, report=True):
"""
Create regression score
:param y:
:param y_hat:
:return:
"""
r2 = r2_score(y, y_hat)
rmse = sqrt(mean_squared_error(y, y_hat))
mae = mean_absolute_error(y, y_hat)
report_string = "---Regression Score--- \n"
report_string += "R2 = " + str(r2) + "\n"
report_string += "RMSE = " + str(rmse) + "\n"
report_string += "MAE = " + str(mae) + "\n"
if report:
print(report_string)
return mae, report_string
示例7
def mean_squared_error_scorer(
golds: ndarray,
probs: ndarray,
preds: Optional[ndarray],
uids: Optional[List[str]] = None,
) -> Dict[str, float]:
"""Mean squared error regression loss.
Args:
golds: Ground truth values.
probs: Predicted probabilities.
preds: Predicted values.
uids: Unique ids, defaults to None.
Returns:
Mean squared error regression loss.
"""
return {"mean_squared_error": float(mean_squared_error(golds, probs))}
示例8
def Train(data, treecount, tezh, yanzhgdata):
model = RF(n_estimators=treecount, max_features=tezh)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = mse(data[:, -1], train_out)
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算MSE
add_mse = mse(yanzhgdata[:, -1], add_yan)
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例9
def Train(data, modelcount, censhu, yanzhgdata):
model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=censhu),
n_estimators=modelcount, learning_rate=0.8)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = mse(data[:, -1], train_out)
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算MSE
add_mse = mse(yanzhgdata[:, -1], add_yan)
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例10
def Train(data, modelcount, censhu, yanzhgdata):
model = lgbm.LGBMRegressor(boosting_type='gbdt', objective='regression', num_leaves=1200,
learning_rate=0.17, n_estimators=modelcount, max_depth=censhu,
metric='rmse', bagging_fraction=0.8, feature_fraction=0.8, reg_lambda=0.9)
model.fit(data[:, :-1], data[:, -1])
# 给出训练数据的预测值
train_out = model.predict(data[:, :-1])
# 计算MSE
train_mse = mse(data[:, -1], train_out)
# 给出验证数据的预测值
add_yan = model.predict(yanzhgdata[:, :-1])
# 计算MSE
add_mse = mse(yanzhgdata[:, -1], add_yan)
print(train_mse, add_mse)
return train_mse, add_mse
# 最终确定组合的函数
示例11
def test_metrics_wrapper():
# make the features in y be in different scales
y = np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100]
# With no scaler provided it is relevant which of the two series gets an 80% error
metric_func_noscaler = model_utils.metric_wrapper(mean_squared_error)
mse_feature_one_wrong = metric_func_noscaler(y, y * [0.8, 1])
mse_feature_two_wrong = metric_func_noscaler(y, y * [1, 0.8])
assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
# With a scaler provided it is not relevant which of the two series gets an 80%
# error
scaler = MinMaxScaler().fit(y)
metric_func_scaler = model_utils.metric_wrapper(mean_squared_error, scaler=scaler)
mse_feature_one_wrong = metric_func_scaler(y, y * [0.8, 1])
mse_feature_two_wrong = metric_func_scaler(y, y * [1, 0.8])
assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
示例12
def test_get_metrics_dict_scaler(scaler, mock):
mock_model = mock
metrics_list = [sklearn.metrics.mean_squared_error]
# make the features in y be in different scales
y = pd.DataFrame(
np.array([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) * [1, 100],
columns=["Tag 1", "Tag 2"],
)
metrics_dict = ModelBuilder.build_metrics_dict(metrics_list, y, scaler=scaler)
metric_func = metrics_dict["mean-squared-error"]
mock_model.predict = lambda _y: _y * [0.8, 1]
mse_feature_one_wrong = metric_func(mock_model, y, y)
mock_model.predict = lambda _y: _y * [1, 0.8]
mse_feature_two_wrong = metric_func(mock_model, y, y)
if scaler:
assert np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
else:
assert not np.isclose(mse_feature_one_wrong, mse_feature_two_wrong)
示例13
def test_metrics_from_list():
"""
Check getting functions from a list of metric names
"""
default = ModelBuilder.metrics_from_list()
assert default == [
metrics.explained_variance_score,
metrics.r2_score,
metrics.mean_squared_error,
metrics.mean_absolute_error,
]
specifics = ModelBuilder.metrics_from_list(
["sklearn.metrics.adjusted_mutual_info_score", "sklearn.metrics.r2_score"]
)
assert specifics == [metrics.adjusted_mutual_info_score, metrics.r2_score]
示例14
def test_averaging_opt_minimize():
X, y = make_regression_df(n_samples=1024)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
oof, test = _make_1st_stage_preds(X_train, y_train, X_test)
best_single_model = min(mean_squared_error(y_train, oof[0]),
mean_squared_error(y_train, oof[1]),
mean_squared_error(y_train, oof[2]))
result = averaging_opt(test, oof, y_train, mean_squared_error, higher_is_better=False)
assert result.score <= best_single_model
result_simple_avg = averaging(test, oof, y_train, eval_func=mean_squared_error)
assert result.score <= result_simple_avg.score
示例15
def test_experiment_lgb_regressor(tmpdir_name):
X, y = make_regression_df(n_samples=1024, n_num_features=10, n_cat_features=2,
random_state=0, id_column='user_id')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
params = {
'objective': 'regression',
'max_depth': 8
}
result = run_experiment(params, X_train, y_train, X_test, tmpdir_name)
assert len(np.unique(result.oof_prediction)) > 5 # making sure prediction is not binarized
assert len(np.unique(result.test_prediction)) > 5
assert mean_squared_error(y_train, result.oof_prediction) == result.metrics[-1]
_check_file_exists(tmpdir_name)
示例16
def test_boston():
# Check consistency on dataset boston house prices.
for (name, Tree), criterion in product(REG_TREES.items(), REG_CRITERIONS):
reg = Tree(criterion=criterion, random_state=0)
reg.fit(boston.data, boston.target)
score = mean_squared_error(boston.target, reg.predict(boston.data))
assert_less(score, 1,
"Failed with {0}, criterion = {1} and score = {2}"
"".format(name, criterion, score))
# using fewer features reduces the learning ability of this tree,
# but reduces training time.
reg = Tree(criterion=criterion, max_features=6, random_state=0)
reg.fit(boston.data, boston.target)
score = mean_squared_error(boston.target, reg.predict(boston.data))
assert_less(score, 2,
"Failed with {0}, criterion = {1} and score = {2}"
"".format(name, criterion, score))
示例17
def test_regression_metrics_at_limits():
assert_almost_equal(mean_squared_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_squared_log_error([0.], [0.]), 0.00, 2)
assert_almost_equal(mean_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(median_absolute_error([0.], [0.]), 0.00, 2)
assert_almost_equal(max_error([0.], [0.]), 0.00, 2)
assert_almost_equal(explained_variance_score([0.], [0.]), 1.00, 2)
assert_almost_equal(r2_score([0., 1], [0., 1]), 1.00, 2)
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [-1.], [-1.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., 2., 3.], [1., -2., 3.])
assert_raises_regex(ValueError, "Mean Squared Logarithmic Error cannot be "
"used when targets contain negative values.",
mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])
示例18
def test_regression_custom_weights():
y_true = [[1, 2], [2.5, -1], [4.5, 3], [5, 7]]
y_pred = [[1, 1], [2, -1], [5, 4], [5, 6.5]]
msew = mean_squared_error(y_true, y_pred, multioutput=[0.4, 0.6])
maew = mean_absolute_error(y_true, y_pred, multioutput=[0.4, 0.6])
rw = r2_score(y_true, y_pred, multioutput=[0.4, 0.6])
evsw = explained_variance_score(y_true, y_pred, multioutput=[0.4, 0.6])
assert_almost_equal(msew, 0.39, decimal=2)
assert_almost_equal(maew, 0.475, decimal=3)
assert_almost_equal(rw, 0.94, decimal=2)
assert_almost_equal(evsw, 0.94, decimal=2)
# Handling msle separately as it does not accept negative inputs.
y_true = np.array([[0.5, 1], [1, 2], [7, 6]])
y_pred = np.array([[0.5, 2], [1, 2.5], [8, 8]])
msle = mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
msle2 = mean_squared_error(np.log(1 + y_true), np.log(1 + y_pred),
multioutput=[0.3, 0.7])
assert_almost_equal(msle, msle2, decimal=2)
示例19
def test_base_chain_crossval_fit_and_predict():
# Fit chain with cross_val_predict and verify predict
# performance
X, Y = generate_multilabel_dataset_with_correlations()
for chain in [ClassifierChain(LogisticRegression()),
RegressorChain(Ridge())]:
chain.fit(X, Y)
chain_cv = clone(chain).set_params(cv=3)
chain_cv.fit(X, Y)
Y_pred_cv = chain_cv.predict(X)
Y_pred = chain.predict(X)
assert Y_pred_cv.shape == Y_pred.shape
assert not np.all(Y_pred == Y_pred_cv)
if isinstance(chain, ClassifierChain):
assert jaccard_score(Y, Y_pred_cv, average='samples') > .4
else:
assert mean_squared_error(Y, Y_pred_cv) < .25
示例20
def __init__(self, transportAlgorithm="MappingTransport",
scaler=False, params=None, feedback=True):
try:
from sklearn.metrics import mean_squared_error
from itertools import product
from sklearn.metrics import (
f1_score, cohen_kappa_score, accuracy_score)
except BaseException:
raise ImportError('Please install itertools and scikit-learn')
self.transportAlgorithm = transportAlgorithm
self.feedback = feedback
self.params_ = params
if scaler:
from sklearn.preprocessing import MinMaxScaler
self.scaler = MinMaxScaler(feature_range=(-1, 1))
self.scalerTarget = MinMaxScaler(feature_range=(-1, 1))
else:
self.scaler = scaler
示例21
def mse_cv(self, cv):
"""
This method performs cross-validation over mean squared error.
Parameters
----------
* cv : integer
The number of cross validation folds to perform
Returns
-------
Returns a scores of the k-fold mean squared error.
"""
mse = metrics.make_scorer(metrics.mean_squared_error)
result = cross_validate(self.reg, self.X,
self.y, cv=cv,
scoring=(mse))
return self.get_test_score(result)
示例22
def rmse(a, b):
return sqrt(mean_squared_error(a, b))
示例23
def rmse(a, b):
return sqrt(mean_squared_error(a, b))
示例24
def rmse(a, b):
return sqrt(mean_squared_error(a, b))
示例25
def rmse(a, b):
return sqrt(mean_squared_error(a, b))
示例26
def rmse(y_pred, y_true):
y_pred = y_pred.reshape(-1)
y_true = y_true.reshape(-1)
#print(y_true.shape)
return np.sqrt(mean_squared_error(y_pred, y_true))
示例27
def rmse(y, p):
"""Root Mean Squared Error (RMSE).
Args:
y (numpy.array): target
p (numpy.array): prediction
Returns:
e (numpy.float64): RMSE
"""
# check and get number of samples
assert y.shape == p.shape
return np.sqrt(mse(y, p))
示例28
def plot(results, ax):
df = results[["size", "time"]]
df.plot.scatter(x='size', y='time', c='LightBlue', ax=ax, marker=".")
# linear regression
x = df["size"].values.reshape((-1, 1))
y = df["time"].values
linear = LinearRegression().fit(x, y)
y_pred = linear.predict(x)
mqe = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)
ax.plot(x, y_pred, color='DarkBlue', linewidth=2)
ax.set_title(
"Linear regression between size and time "
f"\n$mse={mqe:.3f}$ - $R^2={r2:.3f}$")
ax.set_xlabel("Size")
ax.set_ylabel("Seconds")
return ax
# =============================================================================
# CLI MAIN
# =============================================================================
示例29
def rmse(y_true, y_pred):
assert y_true.shape == y_pred.shape
return np.sqrt(mean_squared_error(y_true, y_pred))
示例30
def mse(preds, dtrain):
"""Compute mean squared error.
For more information see: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
:param preds: Prediction values
:param dtrain: Training data with labels
:return: Metric name, mean squared error
"""
labels = dtrain.get_label()
return 'mse', mean_squared_error(labels, preds)