Python源码示例:sklearn.metrics.check_scoring()
示例1
def check_scoring(estimator, scoring=None, **kwargs):
res = sklearn_check_scoring(estimator, scoring=scoring, **kwargs)
if callable(scoring):
# Heuristic to ensure user has not passed a metric
module = getattr(scoring, "__module__", None)
if (
hasattr(module, "startswith")
and module.startswith("dask_ml.metrics.")
and not module.startswith("dask_ml.metrics.scorer")
and not module.startswith("dask_ml.metrics.tests.")
):
raise ValueError(
"scoring value %r looks like it is a metric "
"function rather than a scorer. A scorer should "
"require an estimator as its first parameter. "
"Please use `make_scorer` to convert a metric "
"to a scorer." % scoring
)
if scoring in SCORERS.keys():
func, kwargs = SCORERS[scoring]
return make_scorer(func, **kwargs)
return res
示例2
def _validate_parameters(self, X, y):
if (self.max_iter is not None) and self.max_iter < 1:
raise ValueError(
"Received max_iter={}. max_iter < 1 is not supported".format(
self.max_iter
)
)
# Make sure dask arrays are passed so error on unknown chunk size is raised
if isinstance(X, dd.DataFrame):
X = X.to_dask_array()
if isinstance(y, (dd.DataFrame, dd.Series)):
y = y.to_dask_array()
kwargs = dict(accept_unknown_chunks=False, accept_dask_dataframe=False)
X = self._check_array(X, **kwargs)
y = self._check_array(y, ensure_2d=False, **kwargs)
scorer = check_scoring(self.estimator, scoring=self.scoring)
return X, y, scorer
示例3
def _fit_one_fold(fit_set, models, X, y, scoring, fit_params):
"""
Fits the given estimator on one fold of training data.
Scores the fitted estimator against the test fold.
"""
train = fit_set[0][0]
test = fit_set[0][1]
estimator_ = _clone(models[fit_set[1]["model_index"]][1])
parameters = fit_set[1]["param_set"]
X_train, y_train = _safe_split(estimator_, X, y, train)
X_test, y_test = _safe_split(estimator_, X, y, test, train)
if parameters is not None:
estimator_.set_params(**parameters)
estimator_.fit(X_train, y_train, **fit_params)
scorer = check_scoring(estimator_, scoring=scoring)
is_multimetric = not callable(scorer)
out_dct = fit_set[1]
out_dct["score"] = _score(
estimator_, X_test, y_test,
scorer, is_multimetric
)
return out_dct
示例4
def _scoring(self, net, X_test, y_test):
"""Resolve scoring and apply it to data. Use cached prediction
instead of running inference again, if available."""
scorer = check_scoring(net, self.scoring_)
return scorer(net, X_test, y_test)
示例5
def __init__(self, trained_model, validation_df, features, target,
scoring, n_jobs=None):
self.trained_model = trained_model
self.df = validation_df.copy()
self.features = features
self.target = target
self.n_jobs = n_jobs
self.scorer = check_scoring(estimator=self.trained_model, scoring=scoring)
# FLOFO defaults
self.num_bins = 10
self.shuffle_func = np.random.permutation
self.feature_group_len = 2
self.num_sampling = 10
min_data_needed = 10*(self.num_bins**self.feature_group_len)
if self.df.shape[0] < min_data_needed:
raise Exception("Small validation set (<{})".format(min_data_needed))
if len(self.features) <= self.feature_group_len:
raise Exception("FLOFO needs more than {} features".format(self.feature_group_len))
if self.n_jobs is not None and self.n_jobs > 1:
warning_str = ("Warning: If your model is multithreaded, please initialise the number"
"of jobs of LOFO to be equal to 1, otherwise you may experience performance issues.")
warnings.warn(warning_str)
self._bin_features()
示例6
def _estimate_performances(self, X, y):
performances = np.zeros(self.n_classifiers_)
for idx, clf in enumerate(self.pool_classifiers_):
scorer = check_scoring(clf, self.scoring)
performances[idx] = scorer(clf, X, y)
return performances
示例7
def score_estimator(scoring, estimator, coordinates, data, weights=None):
"""
Score the given gridder against the given data using the given metric.
If the data and predictions have more than 1 component, the scores of each
component will be averaged.
Parameters
----------
scoring : str or callable
A scoring specification known to scikit-learn. See
:func:`sklearn.metrics.check_scoring`.
estimator : a Verde gridder
The gridder to score. Usually derived from
:class:`verde.base.BaseGridder`.
coordinates : tuple of arrays
Arrays with the coordinates of each data point. Should be in the
following order: (easting, northing, vertical, ...).
For the specific definition of coordinate systems and what these
names mean, see the class docstring.
data : array or tuple of arrays
The data values of each data point. If the data has more than one
component, *data* must be a tuple of arrays (one for each
component).
weights : None or array or tuple of arrays
If not None, then the weights assigned to each data point. If more
than one data component is provided, you must provide a weights
array for each data component (if not None).
Returns
-------
score : float
The score.
"""
coordinates, data, weights = check_fit_input(
coordinates, data, weights, unpack=False
)
predicted = check_data(estimator.predict(coordinates))
scorer = check_scoring(DummyEstimator, scoring=scoring)
result = np.mean(
[
scorer(
DummyEstimator(pred.ravel()),
coordinates,
data[i].ravel(),
sample_weight=weights[i],
)
for i, pred in enumerate(predicted)
]
)
return result
示例8
def _check_multimetric_scoring(estimator, scoring=None):
""" Check the scoring parameter in cases when multiple metrics are allowed """
if callable(scoring) or scoring is None or isinstance(scoring,
str):
scorers = {"score": check_scoring(estimator, scoring=scoring)}
return scorers, False
else:
err_msg_generic = ("scoring should either be a single string or "
"callable for single metric evaluation or a "
"list/tuple of strings or a dict of scorer name "
"mapped to the callable for multiple metric "
"evaluation. Got %s of type %s"
% (repr(scoring), type(scoring)))
if isinstance(scoring, (list, tuple, set)):
err_msg = ("The list/tuple elements must be unique "
"strings of predefined scorers. ")
invalid = False
try:
keys = set(scoring)
except TypeError:
invalid = True
if invalid:
raise ValueError(err_msg)
if len(keys) != len(scoring):
raise ValueError(err_msg + "Duplicate elements were found in"
" the given list. %r" % repr(scoring))
elif len(keys) > 0:
if not all(isinstance(k, str) for k in keys):
if any(callable(k) for k in keys):
raise ValueError(err_msg +
"One or more of the elements were "
"callables. Use a dict of score name "
"mapped to the scorer callable. "
"Got %r" % repr(scoring))
else:
raise ValueError(err_msg +
"Non-string types were found in "
"the given list. Got %r"
% repr(scoring))
scorers = {scorer: check_scoring(estimator, scoring=scorer)
for scorer in scoring}
else:
raise ValueError(err_msg +
"Empty list was given. %r" % repr(scoring))
elif isinstance(scoring, dict):
keys = set(scoring)
if not all(isinstance(k, str) for k in keys):
raise ValueError("Non-string types were found in the keys of "
"the given dict. scoring=%r" % repr(scoring))
if len(keys) == 0:
raise ValueError("An empty dict was passed. %r"
% repr(scoring))
scorers = {key: check_scoring(estimator, scoring=scorer)
for key, scorer in scoring.items()}
else:
raise ValueError(err_msg_generic)
return scorers, True
示例9
def fit(self, X, y):
"""Fit the static selection model by select an ensemble of classifier
containing the base classifiers with highest accuracy in the given
dataset.
Parameters
----------
X : array of shape (n_samples, n_features)
Data used to fit the model.
y : array of shape (n_samples)
class labels of each example in X.
Returns
-------
self : object
Returns self.
"""
self._validate_parameters()
X, y = check_X_y(X, y)
super(StaticSelection, self).fit(X, y)
self.n_classifiers_ensemble_ = int(
self.n_classifiers_ * self.pct_classifiers)
performances = np.zeros(self.n_classifiers_)
if not self.base_already_encoded_:
y_encoded = y
else:
y_encoded = self.enc_.transform(y)
for clf_idx, clf in enumerate(self.pool_classifiers_):
scorer = check_scoring(clf, self.scoring)
performances[clf_idx] = scorer(clf, X, y_encoded)
self.clf_indices_ = np.argsort(performances)[::-1][
0:self.n_classifiers_ensemble_]
self.ensemble_ = [self.pool_classifiers_[clf_idx] for clf_idx in
self.clf_indices_]
return self