Python源码示例:sklearn.preprocessing.FunctionTransformer()
示例1
def __init__(self, pos_features, pipeline_obj_path):
"""
Args:
pos_features: list of positional features to use
pipeline_obj_path: path to the serialized pipeline obj_path
"""
self.pos_features = pos_features
self.pipeline_obj_path = pipeline_obj_path
# deserialize the pickle file
with open(self.pipeline_obj_path, "rb") as f:
pipeline_obj = pickle.load(f)
self.POS_FEATURES = pipeline_obj[0]
self.minmax_scaler = pipeline_obj[1]
self.imp = pipeline_obj[2]
self.funct_transform = FunctionTransformer(func=sign_log_func,
inverse_func=sign_log_func_inverse)
# for simplicity, assume all current pos_features are the
# same as from before
assert self.POS_FEATURES == self.pos_features
示例2
def __init__(
self,
alpha=1.0,
threshold=0.1,
degree=3,
operators=None,
dt=1.0,
n_jobs=1,
derivative=None,
feature_names=None,
kw={},
):
self.alpha = alpha
self.threshold = threshold
self.degree = degree
self.operators = operators
self.n_jobs = n_jobs
self.derivative = derivative or FunctionTransformer(func=_derivative, kw_args={"dt": dt})
self.feature_names = feature_names
self.kw = kw
示例3
def test_different_implementations():
random_state = 1233
X_train, y_train = make_classification_problem()
# Compare with chained transformations.
tran1 = RandomIntervalSegmenter(n_intervals='sqrt',
random_state=random_state)
tran2 = RowTransformer(FunctionTransformer(func=np.mean, validate=False))
A = tran2.fit_transform(tran1.fit_transform(X_train))
tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
features=[np.mean],
random_state=random_state)
B = tran.fit_transform(X_train)
np.testing.assert_array_equal(A, B)
# Compare with transformer pipeline using TSFeatureUnion.
示例4
def test_different_pipelines():
random_state = 1233
X_train, y_train = make_classification_problem()
steps = [
('segment', RandomIntervalSegmenter(n_intervals='sqrt',
random_state=random_state)),
('transform', FeatureUnion([
('mean', RowTransformer(
FunctionTransformer(func=np.mean, validate=False))),
('std',
RowTransformer(FunctionTransformer(func=np.std, validate=False))),
('slope', RowTransformer(
FunctionTransformer(func=time_series_slope, validate=False))),
])),
]
pipe = Pipeline(steps)
a = pipe.fit_transform(X_train)
tran = RandomIntervalFeatureExtractor(n_intervals='sqrt',
features=[np.mean, np.std,
time_series_slope],
random_state=random_state)
b = tran.fit_transform(X_train)
np.testing.assert_array_equal(a, b)
np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
示例5
def test_ColumnTransformer_pipeline():
X_train, y_train = load_basic_motions(split="train", return_X_y=True)
X_test, y_test = load_basic_motions(split="test", return_X_y=True)
# using Identity function transformers (transform series to series)
def id_func(X):
return X
column_transformer = ColumnTransformer([
('id0', FunctionTransformer(func=id_func, validate=False), ['dim_0']),
('id1', FunctionTransformer(func=id_func, validate=False), ['dim_1'])
])
steps = [
('extract', column_transformer),
('tabularise', Tabularizer()),
('classify', RandomForestClassifier(n_estimators=2, random_state=1))]
model = Pipeline(steps=steps)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
assert y_pred.shape[0] == y_test.shape[0]
np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
示例6
def test_FeatureUnion_pipeline():
# pipeline with segmentation plus multiple feature extraction
steps = [
('segment', RandomIntervalSegmenter(n_intervals=3)),
('transform', FeatureUnion([
('mean', RowTransformer(
FunctionTransformer(func=np.mean, validate=False))),
('std',
RowTransformer(FunctionTransformer(func=np.std, validate=False)))
])),
('clf', DecisionTreeClassifier())
]
clf = Pipeline(steps)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
assert y_pred.shape[0] == y_test.shape[0]
np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
示例7
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.preprocessing.Binarizer, pp.Binarizer)
self.assertIs(df.preprocessing.FunctionTransformer,
pp.FunctionTransformer)
self.assertIs(df.preprocessing.Imputer, pp.Imputer)
self.assertIs(df.preprocessing.KernelCenterer, pp.KernelCenterer)
self.assertIs(df.preprocessing.LabelBinarizer, pp.LabelBinarizer)
self.assertIs(df.preprocessing.LabelEncoder, pp.LabelEncoder)
self.assertIs(df.preprocessing.MultiLabelBinarizer, pp.MultiLabelBinarizer)
self.assertIs(df.preprocessing.MaxAbsScaler, pp.MaxAbsScaler)
self.assertIs(df.preprocessing.MinMaxScaler, pp.MinMaxScaler)
self.assertIs(df.preprocessing.Normalizer, pp.Normalizer)
self.assertIs(df.preprocessing.OneHotEncoder, pp.OneHotEncoder)
self.assertIs(df.preprocessing.PolynomialFeatures, pp.PolynomialFeatures)
self.assertIs(df.preprocessing.RobustScaler, pp.RobustScaler)
self.assertIs(df.preprocessing.StandardScaler, pp.StandardScaler)
示例8
def _get_transformations_one_to_many_greater(self, feature_names):
# results in number of features greater than original features
# copy all features except last one. For last one, replicate columns to create 3 more features
transformations = []
feature_names = list(feature_names)
index = 0
for f in feature_names[:-1]:
transformations.append(("{}".format(index), "passthrough", [f]))
index += 1
def copy_func(x):
return np.tile(x, (1, 3))
copy_transformer = FunctionTransformer(copy_func)
transformations.append(("copy_transformer", copy_transformer, [feature_names[-1]]))
return ColumnTransformer(transformations)
示例9
def test_multiply_by_function_transformer(self):
from gordo.machine.model.transformer_funcs.general import multiply_by
# Provide a require argument
tf = FunctionTransformer(func=multiply_by, kw_args={"factor": 2})
self._validate_transformer(tf)
# Ignore the required argument
tf = FunctionTransformer(func=multiply_by)
with self.assertRaises(TypeError):
self._validate_transformer(tf)
示例10
def test_transform_target_regressor_1d_transformer(X, y):
# All transformer in scikit-learn expect 2D data. FunctionTransformer with
# validate=False lift this constraint without checking that the input is a
# 2D vector. We check the consistency of the data shape using a 1D and 2D y
# array.
transformer = FunctionTransformer(func=lambda x: x + 1,
inverse_func=lambda x: x - 1,
validate=False)
regr = TransformedTargetRegressor(regressor=LinearRegression(),
transformer=transformer)
y_pred = regr.fit(X, y).predict(X)
assert y.shape == y_pred.shape
# consistency forward transform
y_tran = regr.transformer_.transform(y)
_check_shifted_by_one(y, y_tran)
assert y.shape == y_pred.shape
# consistency inverse transform
assert_allclose(y, regr.transformer_.inverse_transform(
y_tran).squeeze())
# consistency of the regressor
lr = LinearRegression()
transformer2 = clone(transformer)
lr.fit(X, transformer2.fit_transform(y))
y_lr_pred = lr.predict(X)
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
assert_allclose(regr.regressor_.coef_, lr.coef_)
示例11
def test_np_log():
X = np.arange(10).reshape((5, 2))
# Test that the numpy.log example still works.
assert_array_equal(
FunctionTransformer(np.log1p).transform(X),
np.log1p(X),
)
示例12
def test_kw_arg():
X = np.linspace(0, 1, num=10).reshape((5, 2))
F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
# Test that rounding is correct
assert_array_equal(F.transform(X),
np.around(X, decimals=3))
示例13
def test_kw_arg_update():
X = np.linspace(0, 1, num=10).reshape((5, 2))
F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
F.kw_args['decimals'] = 1
# Test that rounding is correct
assert_array_equal(F.transform(X), np.around(X, decimals=1))
示例14
def test_inverse_transform():
X = np.array([1, 4, 9, 16]).reshape((2, 2))
# Test that inverse_transform works correctly
F = FunctionTransformer(
func=np.sqrt,
inverse_func=np.around, inv_kw_args=dict(decimals=3),
)
assert_array_equal(
F.inverse_transform(F.transform(X)),
np.around(np.sqrt(X), decimals=3),
)
示例15
def test_check_inverse():
X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))
X_list = [X_dense,
sparse.csr_matrix(X_dense),
sparse.csc_matrix(X_dense)]
for X in X_list:
if sparse.issparse(X):
accept_sparse = True
else:
accept_sparse = False
trans = FunctionTransformer(func=np.sqrt,
inverse_func=np.around,
accept_sparse=accept_sparse,
check_inverse=True,
validate=True)
assert_warns_message(UserWarning,
"The provided functions are not strictly"
" inverse of each other. If you are sure you"
" want to proceed regardless, set"
" 'check_inverse=False'.",
trans.fit, X)
trans = FunctionTransformer(func=np.expm1,
inverse_func=np.log1p,
accept_sparse=accept_sparse,
check_inverse=True,
validate=True)
Xt = assert_no_warnings(trans.fit_transform, X)
assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))
# check that we don't check inverse when one of the func or inverse is not
# provided.
trans = FunctionTransformer(func=np.expm1, inverse_func=None,
check_inverse=True, validate=True)
assert_no_warnings(trans.fit, X_dense)
trans = FunctionTransformer(func=None, inverse_func=np.expm1,
check_inverse=True, validate=True)
assert_no_warnings(trans.fit, X_dense)
示例16
def test_function_transformer_future_warning(validate, expected_warning):
# FIXME: to be removed in 0.22
X = np.random.randn(100, 10)
transformer = FunctionTransformer(validate=validate)
with pytest.warns(expected_warning) as results:
transformer.fit_transform(X)
if expected_warning is None:
assert len(results) == 0
示例17
def test_function_transformer_frame():
pd = pytest.importorskip('pandas')
X_df = pd.DataFrame(np.random.randn(100, 10))
transformer = FunctionTransformer(validate=False)
X_df_trans = transformer.fit_transform(X_df)
assert hasattr(X_df_trans, 'loc')
示例18
def get_params(self, deep=True):
"""Get the parameters (if any) of the given feature function.
Parameters
----------
deep : bool (default: True)
If True, the method will get the parameters of the transformer.
(See :class:`~sklearn.preprocessing.FunctionTransformer`).
"""
func_to_inspect = _get_python_func(self.func)
# Get code object from the function
if hasattr(func_to_inspect, 'func_code'):
func_code = func_to_inspect.func_code
else:
func_code = func_to_inspect.__code__
args, _, _ = getargs(func_code)
# Get defaults from the function
if hasattr(func_to_inspect, 'defaults'):
defaults = func_to_inspect.func_defaults
else:
defaults = func_to_inspect.__defaults__
if defaults is None:
return dict()
else:
n_defaults = len(defaults)
func_params = {key: value for key, value in
zip(args[-n_defaults:], defaults)}
if self.params is not None:
func_params.update(self.params)
return func_params
示例19
def __init__(self, *args, **kwargs):
self.ft = FunctionTransformer(*args, **kwargs)
示例20
def on_field(f: str, *vec) -> Pipeline:
return make_pipeline(FunctionTransformer(itemgetter(f), validate=False), *vec)
示例21
def main():
vectorizer = make_union(
on_field('name', Tfidf(max_features=100000, token_pattern='\w+')),
on_field('text', Tfidf(max_features=100000, token_pattern='\w+', ngram_range=(1, 2))),
on_field(['shipping', 'item_condition_id'],
FunctionTransformer(to_records, validate=False), DictVectorizer()),
n_jobs=4)
y_scaler = StandardScaler()
with timer('process train'):
train = pd.read_table('../input/train.tsv')
train = train[train['price'] > 0].reset_index(drop=True)
cv = KFold(n_splits=20, shuffle=True, random_state=42)
train_ids, valid_ids = next(cv.split(train))
train, valid = train.iloc[train_ids], train.iloc[valid_ids]
y_train = y_scaler.fit_transform(np.log1p(train['price'].values.reshape(-1, 1)))
X_train = vectorizer.fit_transform(preprocess(train)).astype(np.float32)
print(f'X_train: {X_train.shape} of {X_train.dtype}')
del train
with timer('process valid'):
X_valid = vectorizer.transform(preprocess(valid)).astype(np.float32)
with ThreadPool(processes=4) as pool:
Xb_train, Xb_valid = [x.astype(np.bool).astype(np.float32) for x in [X_train, X_valid]]
xs = [[Xb_train, Xb_valid], [X_train, X_valid]] * 2
y_pred = np.mean(pool.map(partial(fit_predict, y_train=y_train), xs), axis=0)
y_pred = np.expm1(y_scaler.inverse_transform(y_pred.reshape(-1, 1))[:, 0])
print('Valid RMSLE: {:.4f}'.format(np.sqrt(mean_squared_log_error(valid['price'], y_pred))))
示例22
def get_estimator():
merge_transformer = FunctionTransformer(_merge_external_data,
validate=False)
categorical_cols = ['Arrival', 'Departure']
drop_col = ['DateOfDeparture']
preoprocessor = make_column_transformer(
(OneHotEncoder(handle_unknown='ignore'), categorical_cols),
('drop', drop_col),
remainder='passthrough'
)
pipeline = Pipeline(steps=[
('merge', merge_transformer),
('transfomer', preoprocessor),
('regressor', RandomForestRegressor(n_estimators=10, max_depth=10,
max_features=10)),
])
return pipeline
示例23
def test_row_transformer_function_transformer_series_to_primitives():
X, y = load_gunpoint(return_X_y=True)
ft = FunctionTransformer(func=np.mean, validate=False)
t = RowTransformer(ft)
Xt = t.fit_transform(X, y)
assert Xt.shape == X.shape
assert isinstance(Xt.iloc[0, 0],
float) # check series-to-primitive transforms
示例24
def test_row_transformer_function_transformer_series_to_series():
X, y = load_gunpoint(return_X_y=True)
# series-to-series transform function
def powerspectrum(x):
fft = np.fft.fft(x)
ps = fft.real * fft.real + fft.imag * fft.imag
return ps[:ps.shape[0] // 2]
ft = FunctionTransformer(func=powerspectrum, validate=False)
t = RowTransformer(ft)
Xt = t.fit_transform(X, y)
assert Xt.shape == X.shape
assert isinstance(Xt.iloc[0, 0], (
pd.Series, np.ndarray)) # check series-to-series transforms
示例25
def test_FeatureUnion():
X, y = load_gunpoint(return_X_y=True)
ft = FunctionTransformer(func=np.mean, validate=False)
t = RowTransformer(ft)
fu = FeatureUnion([
('mean', t),
('std',
RowTransformer(FunctionTransformer(func=np.std, validate=False)))
])
Xt = fu.fit_transform(X, y)
assert Xt.shape == (X.shape[0], X.shape[1] * len(fu.transformer_list))
示例26
def test_predict_transform(self):
predict_transformer = FeatureUnion([
("identity", FunctionTransformer(None)),
("log10", FunctionTransformer(numpy.log10))
])
pipeline = PMMLPipeline([("estimator", DummyRegressor())], predict_transformer = predict_transformer)
X = DataFrame([[1, 0], [2, 0], [3, 0]], columns = ["X1", "X2"])
y = Series([0.5, 1.0, 1.5], name = "y")
pipeline.fit(X, y)
y_pred = [1.0, 1.0, 1.0]
y_predt = [1.0, 1.0, numpy.log10(1.0)]
self.assertEqual(y_pred, pipeline.predict(X).tolist())
self.assertEqual([y_predt for i in range(0, 3)], pipeline.predict_transform(X).tolist())
示例27
def test_predict_proba_transform(self):
predict_proba_transformer = FunctionTransformer(numpy.log)
pipeline = PMMLPipeline([("estimator", DummyClassifier(strategy = "prior"))], predict_proba_transformer = predict_proba_transformer)
X = DataFrame([1.0, 1.0, 1.0, 1.0, 1.0, 1.0], columns = ["x"])
y = Series(["green", "red", "yellow", "green", "red", "green"], name = "y")
pipeline.fit(X, y)
self.assertEqual(["green", "red", "yellow"], pipeline._final_estimator.classes_.tolist())
y_proba = [3 / 6.0, 2 / 6.0, 1 / 6.0]
y_probat = [numpy.log(x) for x in y_proba]
self.assertEqual([y_proba for i in range(0, 6)], pipeline.predict_proba(X).tolist())
self.assertEqual([y_proba + y_probat for i in range(0, 6)], pipeline.predict_proba_transform(X).tolist())
示例28
def sklearn_custom_transformer_model(sklearn_knn_model):
def transform(vec):
print("Invoking custom transformer!")
return vec + 1
transformer = SKFunctionTransformer(transform, validate=True)
pipeline = SKPipeline([("custom_transformer", transformer), ("knn", sklearn_knn_model.model)])
return ModelWithData(pipeline, inference_data=datasets.load_iris().data[:, :2])
示例29
def test_FunctionTransformer(self):
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
mod1 = df.pp.FunctionTransformer(func=lambda x: x + 1)
df.fit(mod1)
result = df.transform(mod1)
exp = df.copy()
exp.data = exp.data + 1
self.assertIsInstance(result, pdml.ModelFrame)
tm.assert_frame_equal(result, exp)
示例30
def test_np_log():
X = np.arange(10).reshape((5, 2))
# Test that the numpy.log example still works.
assert_array_equal(
FunctionTransformer(np.log1p).transform(X),
np.log1p(X),
)