Python源码示例:sklearn.preprocessing.QuantileTransformer()
示例1
def fit(self, X, y=None):
"""Compute the lower and upper quantile cutoffs, columns to transform, and each column's quantiles.
Parameters
----------
X : array-like, shape [n_samples, n_features]
The data array to transform. Must be numeric, non-sparse, and two-dimensional.
Returns
-------
self : QuantileExtremeValueTransformer
"""
super().fit(X)
X = check_array(X)
self.quantile_transformer_ = QuantileTransformer(random_state=0, copy=True)
self.quantile_transformer_.fit(X)
return self
示例2
def do_feature_engineering(list_text):
df = pd.DataFrame(list_text, columns=["col2"])
feature_list = []
feature = get_length_related_features_col2(df)
feature_list.append(feature)
feature = get_col2_re_features(df)
feature_list.append(feature)
index = feature_list[0].index
for feature_dataset in feature_list[1:]:
pd.testing.assert_index_equal(index, feature_dataset.index)
data = pd.concat(feature_list, axis=1)
qt = QuantileTransformer(random_state=2019)
for col in data.columns:
data[col] = qt.fit_transform(data[[col]])
return data
示例3
def fit(self, X):
""" Fit the quantile mapping model.
Parameters
----------
X : array-like, shape [n_samples, n_features]
Training data.
"""
X = ensure_samples_features(X)
qt_kws = self.qt_kwargs.copy()
if "n_quantiles" not in qt_kws:
qt_kws["n_quantiles"] = len(X)
# maybe detrend the input datasets
if self.detrend:
x_to_cdf = LinearTrendTransformer(**self.lt_kwargs).fit_transform(X)
else:
x_to_cdf = X
# calculate the cdfs for X
# TODO: replace this transformer with something that uses robust
# empirical cdf plotting positions
self.x_cdf_fit_ = QuantileTransformer(**qt_kws).fit(x_to_cdf)
return self
示例4
def _transform_function(self, x, idx=None):
"""Applies single column quantile transform from ``sklearn.preprocessing.QuantileTransformer``.
Uses ``quantile_transformer_.quantiles_`` calculated during ``fit`` if given an index, otherwise the quantiles
will be calculated from input ``x``.
"""
if idx:
return self.quantile_transformer_._transform_col( # pylint: disable=protected-access
x, self.quantile_transformer_.quantiles_[:, idx], False
)
return quantile_transform_nonrandom(x)
示例5
def __init__(self, n_quantiles=1000, output_distribution='uniform', ignore_implicit_zeros=False, subsample=100000, random_state=None, copy=True):
self._hyperparams = {
'n_quantiles': n_quantiles,
'output_distribution': output_distribution,
'ignore_implicit_zeros': ignore_implicit_zeros,
'subsample': subsample,
'random_state': random_state,
'copy': copy}
self._wrapped_model = SKLModel(**self._hyperparams)
示例6
def __init__(self, options):
self.handle_options(options)
out_params = convert_params(
options.get('params', {}),
bools=['copy'],
ints=['n_quantiles'],
strs=['output_distribution']
)
self.estimator = _QuantileTransformer(**out_params)
self.columns = None
示例7
def quantile_transform(train_targets, non_train_targets):
transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
return train_targets, non_train_targets, transformer
示例8
def bad_quantile_transform(train_targets, non_train_targets):
transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
return train_targets, non_train_targets, "i am the wrong type for an inversion result"
示例9
def my_quantile_transform(train_targets, non_train_targets):
transformer = QuantileTransformer(output_distribution="uniform")
train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
return train_targets, non_train_targets
示例10
def my_quantile_transform(train_targets, non_train_targets):
transformer = QuantileTransformer(output_distribution="uniform")
train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
return train_targets, non_train_targets
示例11
def quantile_transform(train_targets, non_train_targets):
transformer = QuantileTransformer(output_distribution="normal", n_quantiles=100)
train_targets[train_targets.columns] = transformer.fit_transform(train_targets.values)
non_train_targets[train_targets.columns] = transformer.transform(non_train_targets.values)
return train_targets, non_train_targets, transformer
示例12
def test_basic(self, output_distribution):
rs = da.random.RandomState(0)
a = dpp.QuantileTransformer(output_distribution=output_distribution)
b = spp.QuantileTransformer(output_distribution=output_distribution)
X = rs.uniform(size=(1000, 3), chunks=50)
a.fit(X)
b.fit(X)
assert_estimator_equal(a, b, atol=0.02)
# set the quantiles, so that from here out, we're exact
a.quantiles_ = b.quantiles_
assert_eq_ar(a.transform(X), b.transform(X), atol=1e-7)
assert_eq_ar(X, a.inverse_transform(a.transform(X)))
示例13
def test_types(self, type_, kwargs):
X = np.random.uniform(size=(1000, 3))
dX = type_(X, **kwargs)
qt = spp.QuantileTransformer()
qt.fit(X)
dqt = dpp.QuantileTransformer()
dqt.fit(dX)
示例14
def test_fit_transform_frame(self):
df = pd.DataFrame(np.random.randn(1000, 3))
ddf = dd.from_pandas(df, 2)
a = spp.QuantileTransformer()
b = dpp.QuantileTransformer()
expected = a.fit_transform(df)
result = b.fit_transform(ddf)
assert_eq_ar(result, expected, rtol=1e-3, atol=1e-3)
示例15
def preprocess_train(self, X, feature_types, vect_max_features):
transformer_list = []
if len(feature_types['language']) > 0:
pipeline = Pipeline(steps=[
("preparator", NlpDataPreprocessor(nlp_cols=feature_types['language'])),
("vectorizer",
TfidfVectorizer(ngram_range=self.params['proc.ngram_range'], sublinear_tf=True, max_features=vect_max_features, tokenizer=self.tokenize))
])
transformer_list.append(('vect', pipeline))
if len(feature_types['onehot']) > 0:
pipeline = Pipeline(steps=[
('generator', OheFeaturesGenerator(cats_cols=feature_types['onehot'])),
])
transformer_list.append(('cats', pipeline))
if len(feature_types['continuous']) > 0:
pipeline = Pipeline(steps=[
('generator', NumericDataPreprocessor(cont_cols=feature_types['continuous'])),
('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
('scaler', StandardScaler())
])
transformer_list.append(('cont', pipeline))
if len(feature_types['skewed']) > 0:
pipeline = Pipeline(steps=[
('generator', NumericDataPreprocessor(cont_cols=feature_types['skewed'])),
('imputer', SimpleImputer(strategy=self.params['proc.impute_strategy'])),
('quantile', QuantileTransformer(output_distribution='normal')), # Or output_distribution = 'uniform'
])
transformer_list.append(('skew', pipeline))
self.pipeline = FeatureUnion(transformer_list=transformer_list)
self.pipeline.fit(X)