Python源码示例:sklearn.preprocessing.KBinsDiscretizer()
示例1
def test_encode_options():
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='ordinal').fit(X)
Xt_1 = est.transform(X)
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='onehot-dense').fit(X)
Xt_2 = est.transform(X)
assert not sp.issparse(Xt_2)
assert_array_equal(OneHotEncoder(
categories=[np.arange(i) for i in [2, 3, 3, 3]],
sparse=False)
.fit_transform(Xt_1), Xt_2)
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3],
encode='onehot').fit(X)
Xt_3 = est.transform(X)
assert sp.issparse(Xt_3)
assert_array_equal(OneHotEncoder(
categories=[np.arange(i) for i in [2, 3, 3, 3]],
sparse=True)
.fit_transform(Xt_1).toarray(),
Xt_3.toarray())
示例2
def test_nonuniform_strategies(
strategy, expected_2bins, expected_3bins, expected_5bins):
X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1)
# with 2 bins
est = KBinsDiscretizer(n_bins=2, strategy=strategy, encode='ordinal')
Xt = est.fit_transform(X)
assert_array_equal(expected_2bins, Xt.ravel())
# with 3 bins
est = KBinsDiscretizer(n_bins=3, strategy=strategy, encode='ordinal')
Xt = est.fit_transform(X)
assert_array_equal(expected_3bins, Xt.ravel())
# with 5 bins
est = KBinsDiscretizer(n_bins=5, strategy=strategy, encode='ordinal')
Xt = est.fit_transform(X)
assert_array_equal(expected_5bins, Xt.ravel())
示例3
def test_model_k_bins_discretiser_ordinal_uniform(self):
X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9]])
model = KBinsDiscretizer(n_bins=3,
encode="ordinal",
strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalUniform",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例4
def test_model_k_bins_discretiser_onehot_dense_uniform(self):
X = np.array([[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9]])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="onehot-dense",
strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOneHotDenseUniform",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例5
def test_model_k_bins_discretiser_ordinal_uniform_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
model = KBinsDiscretizer(n_bins=3,
encode="ordinal",
strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalUniformInt",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例6
def test_model_k_bins_discretiser_ordinal_quantile_int(self):
X = np.array([
[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
[-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19],
[12, 13, 31, -16], [0, -21, 15, 30], [10, 22, 71, -91]
])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="ordinal",
strategy="quantile").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalQuantileInt",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例7
def test_model_k_bins_discretiser_ordinal_kmeans_int(self):
X = np.array([
[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9],
[-1, 0, 1, -16], [31, -5, 15, 10], [12, -2, 8, -19]
])
model = KBinsDiscretizer(n_bins=3, encode="ordinal",
strategy="kmeans").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalKMeansInt",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例8
def test_model_k_bins_discretiser_onehot_dense_uniform_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="onehot-dense",
strategy="uniform").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOneHotDenseUniformInt",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例9
def test_model_k_bins_discretiser_onehot_dense_quantile_int(self):
X = np.array([[1, 3, 3, -6], [3, -2, 5, 0], [0, 2, 7, -9]])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="onehot-dense",
strategy="quantile").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOneHotDenseQuantileInt",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例10
def test_fit_transform(strategy, expected):
est = KBinsDiscretizer(n_bins=3, encode='ordinal', strategy=strategy)
est.fit(X)
assert_array_equal(expected, est.transform(X))
示例11
def test_valid_n_bins():
KBinsDiscretizer(n_bins=2).fit_transform(X)
KBinsDiscretizer(n_bins=np.array([2])[0]).fit_transform(X)
assert KBinsDiscretizer(n_bins=2).fit(X).n_bins_.dtype == np.dtype(np.int)
示例12
def test_invalid_n_bins():
est = KBinsDiscretizer(n_bins=1)
assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
"number of bins. Received 1, expected at least 2.",
est.fit_transform, X)
est = KBinsDiscretizer(n_bins=1.1)
assert_raise_message(ValueError, "KBinsDiscretizer received an invalid "
"n_bins type. Received float, expected int.",
est.fit_transform, X)
示例13
def test_invalid_n_bins_array():
# Bad shape
n_bins = np.full((2, 4), 2.)
est = KBinsDiscretizer(n_bins=n_bins)
assert_raise_message(ValueError,
"n_bins must be a scalar or array of shape "
"(n_features,).", est.fit_transform, X)
# Incorrect number of features
n_bins = [1, 2, 2]
est = KBinsDiscretizer(n_bins=n_bins)
assert_raise_message(ValueError,
"n_bins must be a scalar or array of shape "
"(n_features,).", est.fit_transform, X)
# Bad bin values
n_bins = [1, 2, 2, 1]
est = KBinsDiscretizer(n_bins=n_bins)
assert_raise_message(ValueError,
"KBinsDiscretizer received an invalid number of bins "
"at indices 0, 3. Number of bins must be at least 2, "
"and must be an int.",
est.fit_transform, X)
# Float bin values
n_bins = [2.1, 2, 2.1, 2]
est = KBinsDiscretizer(n_bins=n_bins)
assert_raise_message(ValueError,
"KBinsDiscretizer received an invalid number of bins "
"at indices 0, 2. Number of bins must be at least 2, "
"and must be an int.",
est.fit_transform, X)
示例14
def test_fit_transform_n_bins_array(strategy, expected):
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='ordinal',
strategy=strategy).fit(X)
assert_array_equal(expected, est.transform(X))
# test the shape of bin_edges_
n_features = np.array(X).shape[1]
assert est.bin_edges_.shape == (n_features, )
for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_):
assert bin_edges.shape == (n_bins + 1, )
示例15
def test_same_min_max(strategy):
warnings.simplefilter("always")
X = np.array([[1, -2],
[1, -1],
[1, 0],
[1, 1]])
est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode='ordinal')
assert_warns_message(UserWarning,
"Feature 0 is constant and will be replaced "
"with 0.", est.fit, X)
assert est.n_bins_[0] == 1
# replace the feature with zeros
Xt = est.transform(X)
assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
示例16
def test_transform_1d_behavior():
X = np.arange(4)
est = KBinsDiscretizer(n_bins=2)
assert_raises(ValueError, est.fit, X)
est = KBinsDiscretizer(n_bins=2)
est.fit(X.reshape(-1, 1))
assert_raises(ValueError, est.transform, X)
示例17
def test_numeric_stability():
X_init = np.array([2., 4., 6., 8., 10.]).reshape(-1, 1)
Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)
# Test up to discretizing nano units
for i in range(1, 9):
X = X_init / 10**i
Xt = KBinsDiscretizer(n_bins=2, encode='ordinal').fit_transform(X)
assert_array_equal(Xt_expected, Xt)
示例18
def test_invalid_encode_option():
est = KBinsDiscretizer(n_bins=[2, 3, 3, 3], encode='invalid-encode')
assert_raise_message(ValueError, "Valid options for 'encode' are "
"('onehot', 'onehot-dense', 'ordinal'). "
"Got encode='invalid-encode' instead.",
est.fit, X)
示例19
def test_inverse_transform(strategy, encode, expected_inv):
kbd = KBinsDiscretizer(n_bins=3, strategy=strategy, encode=encode)
Xt = kbd.fit_transform(X)
Xinv = kbd.inverse_transform(Xt)
assert_array_almost_equal(expected_inv, Xinv)
示例20
def test_transform_outside_fit_range(strategy):
X = np.array([0, 1, 2, 3])[:, None]
kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode='ordinal')
kbd.fit(X)
X2 = np.array([-2, 5])[:, None]
X2t = kbd.transform(X2)
assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_)
assert_array_equal(X2t.min(axis=0), [0])
示例21
def test_overwrite():
X = np.array([0, 1, 2, 3])[:, None]
X_before = X.copy()
est = KBinsDiscretizer(n_bins=3, encode="ordinal")
Xt = est.fit_transform(X)
assert_array_equal(X, X_before)
Xt_before = Xt.copy()
Xinv = est.inverse_transform(Xt)
assert_array_equal(Xt, Xt_before)
assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]]))
示例22
def test_redundant_bins(strategy, expected_bin_edges):
X = [[0], [0], [0], [0], [3], [3]]
kbd = KBinsDiscretizer(n_bins=3, strategy=strategy)
msg = ("Bins whose width are too small (i.e., <= 1e-8) in feature 0 "
"are removed. Consider decreasing the number of bins.")
assert_warns_message(UserWarning, msg, kbd.fit, X)
assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
示例23
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
self.column_index = [
index for index, info in enumerate(self.meta) if info['type'] == CONTINUOUS]
self.discretizer = KBinsDiscretizer(
n_bins=self.n_bins, encode='ordinal', strategy='uniform')
if not self.column_index:
return
self.discretizer.fit(data[:, self.column_index])
示例24
def test_model_k_bins_discretiser_ordinal_quantile(self):
X = np.array([
[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
[0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
[4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
[6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="ordinal",
strategy="quantile").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalQuantile",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例25
def test_model_k_bins_discretiser_ordinal_kmeans(self):
X = np.array([
[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
[0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
[4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
[6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
])
model = KBinsDiscretizer(n_bins=3, encode="ordinal",
strategy="kmeans").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOrdinalKMeans",
allow_failure="StrictVersion("
"onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例26
def test_model_k_bins_discretiser_onehot_dense_quantile(self):
X = np.array([
[1.2, 3.2, 1.3, -5.6], [4.3, -3.2, 5.7, 1.0],
[0, 3.2, 4.7, -8.9], [0.2, 1.3, 0.6, -9.4],
[0.8, 4.2, -14.7, -28.9], [8.2, 1.9, 2.6, -5.4],
[4.8, -9.2, 33.7, 3.9], [81.2, 1., 0.6, 12.4],
[6.8, 11.2, -1.7, -2.9], [11.2, 12.9, 4.3, -1.4],
])
model = KBinsDiscretizer(n_bins=[3, 2, 3, 4],
encode="onehot-dense",
strategy="quantile").fit(X)
model_onnx = convert_sklearn(
model,
"scikit-learn KBinsDiscretiser",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnKBinsDiscretiserOneHotDenseQuantile",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)