Python源码示例:sklearn.decomposition.KernelPCA()
示例1
def test_kernel_pca_sparse():
rng = np.random.RandomState(0)
X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
X_pred = sp.csr_matrix(rng.random_sample((2, 4)))
for eigen_solver in ("auto", "arpack"):
for kernel in ("linear", "rbf", "poly"):
# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=False)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
np.abs(X_fit_transformed2))
# transform new data
X_pred_transformed = kpca.transform(X_pred)
assert_equal(X_pred_transformed.shape[1],
X_fit_transformed.shape[1])
# inverse transform
# X_pred2 = kpca.inverse_transform(X_pred_transformed)
# assert_equal(X_pred2.shape, X_pred.shape)
示例2
def test_leave_zero_eig():
"""This test checks that fit().transform() returns the same result as
fit_transform() in case of non-removed zero eigenvalue.
Non-regression test for issue #12141 (PR #12143)"""
X_fit = np.array([[1, 1], [0, 0]])
# Assert that even with all np warnings on, there is no div by zero warning
with pytest.warns(None) as record:
with np.errstate(all='warn'):
k = KernelPCA(n_components=2, remove_zero_eig=False,
eigen_solver="dense")
# Fit, then transform
A = k.fit(X_fit).transform(X_fit)
# Do both at once
B = k.fit_transform(X_fit)
# Compare
assert_array_almost_equal(np.abs(A), np.abs(B))
for w in record:
# There might be warnings about the kernel being badly conditioned,
# but there should not be warnings about division by zero.
# (Numpy division by zero warning can have many message variants, but
# at least we know that it is a RuntimeWarning so lets check only this)
assert not issubclass(w.category, RuntimeWarning)
示例3
def test_kernel_pca_precomputed():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
for eigen_solver in ("dense", "arpack"):
X_kpca = KernelPCA(4, eigen_solver=eigen_solver).\
fit(X_fit).transform(X_pred)
X_kpca2 = KernelPCA(
4, eigen_solver=eigen_solver, kernel='precomputed').fit(
np.dot(X_fit, X_fit.T)).transform(np.dot(X_pred, X_fit.T))
X_kpca_train = KernelPCA(
4, eigen_solver=eigen_solver,
kernel='precomputed').fit_transform(np.dot(X_fit, X_fit.T))
X_kpca_train2 = KernelPCA(
4, eigen_solver=eigen_solver, kernel='precomputed').fit(
np.dot(X_fit, X_fit.T)).transform(np.dot(X_fit, X_fit.T))
assert_array_almost_equal(np.abs(X_kpca),
np.abs(X_kpca2))
assert_array_almost_equal(np.abs(X_kpca_train),
np.abs(X_kpca_train2))
示例4
def test_gridsearch_pipeline_precomputed():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model using a precomputed kernel.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="precomputed", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca),
("Perceptron", Perceptron(max_iter=5))])
param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
X_kernel = rbf_kernel(X, gamma=2.)
grid_search.fit(X_kernel, y)
assert_equal(grid_search.best_score_, 1)
# 0.23. warning about tol not having its correct default value.
示例5
def test_nested_circles():
# Test the linear separability of the first 2D KPCA transform
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
# 2D nested circles are not linearly separable
train_score = Perceptron(max_iter=5).fit(X, y).score(X, y)
assert_less(train_score, 0.8)
# Project the circles data into the first 2 components of a RBF Kernel
# PCA model.
# Note that the gamma value is data dependent. If this test breaks
# and the gamma value has to be updated, the Kernel PCA example will
# have to be updated too.
kpca = KernelPCA(kernel="rbf", n_components=2,
fit_inverse_transform=True, gamma=2.)
X_kpca = kpca.fit_transform(X)
# The data is perfectly linearly separable in that space
train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y)
assert_equal(train_score, 1.0)
示例6
def dim_reduction_method(self):
"""
select dimensionality reduction method
"""
if self.dim_reduction=='pca':
return PCA()
elif self.dim_reduction=='factor-analysis':
return FactorAnalysis()
elif self.dim_reduction=='fast-ica':
return FastICA()
elif self.dim_reduction=='kernel-pca':
return KernelPCA()
elif self.dim_reduction=='sparse-pca':
return SparsePCA()
elif self.dim_reduction=='truncated-svd':
return TruncatedSVD()
elif self.dim_reduction!=None:
raise ValueError('%s is not a supported dimensionality reduction method. Valid inputs are: \
"pca","factor-analysis","fast-ica,"kernel-pca","sparse-pca","truncated-svd".'
%(self.dim_reduction))
示例7
def __init__(self, options):
self.handle_options(options)
out_params = convert_params(
options.get('params', {}),
ints=['k', 'degree', 'alpha', 'max_iteration'],
floats=['gamma', 'tolerance'],
aliases={'k': 'n_components', 'tolerance': 'tol',
'max_iteration': 'max_iter'},
)
out_params['kernel'] = 'rbf'
if 'n_components' not in out_params:
out_params['n_components'] = min(2, len(options['feature_variables']))
elif out_params['n_components'] == 0:
raise RuntimeError('k needs to be greater than zero.')
self.estimator = _KPCA(**out_params)
# sklearn's KernelPCA.transform tries to form a complete kernel
# matrix of its input and the original data the model was fit
# on. Unfortunately, this might consume a colossal amount of
# memory for large inputs. We chunk the input to cut down on this.
示例8
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.decomposition.PCA, decomposition.PCA)
self.assertIs(df.decomposition.IncrementalPCA,
decomposition.IncrementalPCA)
self.assertIs(df.decomposition.KernelPCA, decomposition.KernelPCA)
self.assertIs(df.decomposition.FactorAnalysis,
decomposition.FactorAnalysis)
self.assertIs(df.decomposition.FastICA, decomposition.FastICA)
self.assertIs(df.decomposition.TruncatedSVD, decomposition.TruncatedSVD)
self.assertIs(df.decomposition.NMF, decomposition.NMF)
self.assertIs(df.decomposition.SparsePCA, decomposition.SparsePCA)
self.assertIs(df.decomposition.MiniBatchSparsePCA,
decomposition.MiniBatchSparsePCA)
self.assertIs(df.decomposition.SparseCoder, decomposition.SparseCoder)
self.assertIs(df.decomposition.DictionaryLearning,
decomposition.DictionaryLearning)
self.assertIs(df.decomposition.MiniBatchDictionaryLearning,
decomposition.MiniBatchDictionaryLearning)
self.assertIs(df.decomposition.LatentDirichletAllocation,
decomposition.LatentDirichletAllocation)
示例9
def test_Decompositions_KernelPCA(self):
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
mod1 = df.decomposition.KernelPCA()
mod2 = decomposition.KernelPCA()
df.fit(mod1)
mod2.fit(iris.data, iris.target)
result = df.transform(mod1)
expected = mod2.transform(iris.data)
self.assertIsInstance(result, pdml.ModelFrame)
tm.assert_series_equal(df.target, result.target)
self.assert_numpy_array_almost_equal(result.data.values[:, :40],
expected[:, :40])
示例10
def to_uts(mts, transformer):
"""PCA Dimension Reduction. Convert MTS to UTS
Args:
mts (ndarray): MTS
transformer (class): PCA, KernelPCA, TSNE
Returns:
ndarray: UTS
"""
model = PCA(n_components=1)
if transformer == KernelPCA:
model = KernelPCA(n_components=1, kernel="rbf")
elif transformer == TSNE:
model = TSNE(n_components=1, perplexity=40, n_iter=300)
uts = model.fit_transform(mts)
uts = uts.reshape(-1)
return uts
示例11
def to_uts(mts, transformer):
"""PCA Dimension Reduction. Convert MTS to UTS
Args:
mts (ndarray): MTS
transformer (class): PCA, KernelPCA, TSNE
Returns:
ndarray: UTS
"""
model = PCA(n_components=1)
if transformer == KernelPCA:
model = KernelPCA(n_components=1, kernel="rbf")
elif transformer == TSNE:
model = TSNE(n_components=1, perplexity=40, n_iter=300)
uts = model.fit_transform(mts)
uts = uts.reshape(-1)
return uts
示例12
def test_kernel_pca_sparse():
rng = np.random.RandomState(0)
X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
X_pred = sp.csr_matrix(rng.random_sample((2, 4)))
for eigen_solver in ("auto", "arpack"):
for kernel in ("linear", "rbf", "poly"):
# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=False)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
np.abs(X_fit_transformed2))
# transform new data
X_pred_transformed = kpca.transform(X_pred)
assert_equal(X_pred_transformed.shape[1],
X_fit_transformed.shape[1])
# inverse transform
# X_pred2 = kpca.inverse_transform(X_pred_transformed)
# assert_equal(X_pred2.shape, X_pred.shape)
示例13
def test_kernel_pca_precomputed():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
for eigen_solver in ("dense", "arpack"):
X_kpca = KernelPCA(4, eigen_solver=eigen_solver).\
fit(X_fit).transform(X_pred)
X_kpca2 = KernelPCA(
4, eigen_solver=eigen_solver, kernel='precomputed').fit(
np.dot(X_fit, X_fit.T)).transform(np.dot(X_pred, X_fit.T))
X_kpca_train = KernelPCA(
4, eigen_solver=eigen_solver,
kernel='precomputed').fit_transform(np.dot(X_fit, X_fit.T))
X_kpca_train2 = KernelPCA(
4, eigen_solver=eigen_solver, kernel='precomputed').fit(
np.dot(X_fit, X_fit.T)).transform(np.dot(X_fit, X_fit.T))
assert_array_almost_equal(np.abs(X_kpca),
np.abs(X_kpca2))
assert_array_almost_equal(np.abs(X_kpca_train),
np.abs(X_kpca_train2))
示例14
def reduce_KernelPCA(x, **kwd_params):
'''
Reduce the dimensions using Principal Component
Analysis with different kernels
'''
# create the PCA object
pca = dc.KernelPCA(**kwd_params)
# learn the principal components from all the features
return pca.fit(x)
# the file name of the dataset
示例15
def reduce_KernelPCA(x, **kwd_params):
'''
Reduce the dimensions using Principal Component
Analysis with different kernels
'''
# create the PCA object
pca = dc.KernelPCA(**kwd_params)
# learn the principal components from all the features
return pca.fit(x)
# get the sample
示例16
def test_kernel_pca():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
def histogram(x, y, **kwargs):
# Histogram kernel implemented as a callable.
assert_equal(kwargs, {}) # no kernel_params that we didn't ask for
return np.minimum(x, y).sum()
for eigen_solver in ("auto", "dense", "arpack"):
for kernel in ("linear", "rbf", "poly", histogram):
# histogram kernel produces singular matrix inside linalg.solve
# XXX use a least-squares approximation?
inv = not callable(kernel)
# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=inv)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
np.abs(X_fit_transformed2))
# non-regression test: previously, gamma would be 0 by default,
# forcing all eigenvalues to 0 under the poly kernel
assert_not_equal(X_fit_transformed.size, 0)
# transform new data
X_pred_transformed = kpca.transform(X_pred)
assert_equal(X_pred_transformed.shape[1],
X_fit_transformed.shape[1])
# inverse transform
if inv:
X_pred2 = kpca.inverse_transform(X_pred_transformed)
assert_equal(X_pred2.shape, X_pred.shape)
示例17
def test_kernel_pca_invalid_parameters():
assert_raises(ValueError, KernelPCA, 10, fit_inverse_transform=True,
kernel='precomputed')
示例18
def test_kernel_pca_consistent_transform():
# X_fit_ needs to retain the old, unmodified copy of X
state = np.random.RandomState(0)
X = state.rand(10, 10)
kpca = KernelPCA(random_state=state).fit(X)
transformed1 = kpca.transform(X)
X_copy = X.copy()
X[:, 0] = 666
transformed2 = kpca.transform(X_copy)
assert_array_almost_equal(transformed1, transformed2)
示例19
def test_kernel_pca_deterministic_output():
rng = np.random.RandomState(0)
X = rng.rand(10, 10)
eigen_solver = ('arpack', 'dense')
for solver in eigen_solver:
transformed_X = np.zeros((20, 2))
for i in range(20):
kpca = KernelPCA(n_components=2, eigen_solver=solver,
random_state=rng)
transformed_X[i, :] = kpca.fit_transform(X)[0]
assert_allclose(
transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
示例20
def test_kernel_pca_n_components():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
for eigen_solver in ("dense", "arpack"):
for c in [1, 2, 4]:
kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
shape = kpca.fit(X_fit).transform(X_pred).shape
assert_equal(shape, (2, c))
示例21
def test_remove_zero_eig():
X = np.array([[1 - 1e-30, 1], [1, 1], [1, 1 - 1e-20]])
# n_components=None (default) => remove_zero_eig is True
kpca = KernelPCA()
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 0))
kpca = KernelPCA(n_components=2)
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 2))
kpca = KernelPCA(n_components=2, remove_zero_eig=True)
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 0))
示例22
def test_kernel_pca_invalid_kernel():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((2, 4))
kpca = KernelPCA(kernel="tototiti")
assert_raises(ValueError, kpca.fit, X_fit)
示例23
def trainKernelPCA(data):
"""
使用带有核函数的主成分分析对数据进行降维
"""
model = KernelPCA(n_components=2, kernel="rbf", gamma=25)
model.fit(data)
return model
示例24
def test_fit_transform_KernelPCA(self):
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
mod1 = df.decomposition.KernelPCA()
mod2 = decomposition.KernelPCA()
result = df.fit_transform(mod1)
expected = mod2.fit_transform(iris.data, iris.target)
self.assertIsInstance(result, pdml.ModelFrame)
tm.assert_series_equal(df.target, result.target)
self.assert_numpy_array_almost_equal(result.data.values[:, :40],
expected[:, :40])
示例25
def test_kernel_pca():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
def histogram(x, y, **kwargs):
# Histogram kernel implemented as a callable.
assert_equal(kwargs, {}) # no kernel_params that we didn't ask for
return np.minimum(x, y).sum()
for eigen_solver in ("auto", "dense", "arpack"):
for kernel in ("linear", "rbf", "poly", histogram):
# histogram kernel produces singular matrix inside linalg.solve
# XXX use a least-squares approximation?
inv = not callable(kernel)
# transform fit data
kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
fit_inverse_transform=inv)
X_fit_transformed = kpca.fit_transform(X_fit)
X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
assert_array_almost_equal(np.abs(X_fit_transformed),
np.abs(X_fit_transformed2))
# non-regression test: previously, gamma would be 0 by default,
# forcing all eigenvalues to 0 under the poly kernel
assert_not_equal(X_fit_transformed.size, 0)
# transform new data
X_pred_transformed = kpca.transform(X_pred)
assert_equal(X_pred_transformed.shape[1],
X_fit_transformed.shape[1])
# inverse transform
if inv:
X_pred2 = kpca.inverse_transform(X_pred_transformed)
assert_equal(X_pred2.shape, X_pred.shape)
示例26
def test_kernel_pca_invalid_parameters():
assert_raises(ValueError, KernelPCA, 10, fit_inverse_transform=True,
kernel='precomputed')
示例27
def test_kernel_pca_consistent_transform():
# X_fit_ needs to retain the old, unmodified copy of X
state = np.random.RandomState(0)
X = state.rand(10, 10)
kpca = KernelPCA(random_state=state).fit(X)
transformed1 = kpca.transform(X)
X_copy = X.copy()
X[:, 0] = 666
transformed2 = kpca.transform(X_copy)
assert_array_almost_equal(transformed1, transformed2)
示例28
def test_kernel_pca_linear_kernel():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((5, 4))
X_pred = rng.random_sample((2, 4))
# for a linear kernel, kernel PCA should find the same projection as PCA
# modulo the sign (direction)
# fit only the first four components: fifth is near zero eigenvalue, so
# can be trimmed due to roundoff error
assert_array_almost_equal(
np.abs(KernelPCA(4).fit(X_fit).transform(X_pred)),
np.abs(PCA(4).fit(X_fit).transform(X_pred)))
示例29
def test_remove_zero_eig():
X = np.array([[1 - 1e-30, 1], [1, 1], [1, 1 - 1e-20]])
# n_components=None (default) => remove_zero_eig is True
kpca = KernelPCA()
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 0))
kpca = KernelPCA(n_components=2)
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 2))
kpca = KernelPCA(n_components=2, remove_zero_eig=True)
Xt = kpca.fit_transform(X)
assert_equal(Xt.shape, (3, 0))
示例30
def test_kernel_pca_invalid_kernel():
rng = np.random.RandomState(0)
X_fit = rng.random_sample((2, 4))
kpca = KernelPCA(kernel="tototiti")
assert_raises(ValueError, kpca.fit, X_fit)