Python源码示例:sklearn.decomposition.PCA
示例1
def gen_instance(self, max_length, dimension, test_mode=True, seed=0):
if seed!=0: np.random.seed(seed)
# Randomly generate (max_length) cities with (dimension) coordinates in [0,100]
seq = np.random.randint(100, size=(max_length, dimension))
# Principal Component Analysis to center & rotate coordinates
pca = PCA(n_components=dimension)
sequence = pca.fit_transform(seq)
# Scale to [0,1[
input_ = sequence/100
if test_mode == True:
return input_, seq
else:
return input_
# Generate random batch for training procedure
示例2
def get_rot_rad(init_coorx, coory, z=50, coorW=1024, coorH=512, floorW=1024, floorH=512, tol=5):
gpid = get_gpid(init_coorx, coorW)
coor = np.hstack([np.arange(coorW)[:, None], coory[:, None]])
xy = np_coor2xy(coor, z, coorW, coorH, floorW, floorH)
xy_cor = []
rot_rad_suggestions = []
for j in range(len(init_coorx)):
pca = PCA(n_components=1)
pca.fit(xy[gpid == j])
rot_rad_suggestions.append(_get_rot_rad(*pca.components_[0]))
rot_rad_suggestions = np.sort(rot_rad_suggestions + [1e9])
rot_rad = np.mean(rot_rad_suggestions[:-1])
best_rot_rad_sz = -1
last_j = 0
for j in range(1, len(rot_rad_suggestions)):
if rot_rad_suggestions[j] - rot_rad_suggestions[j-1] > tol:
last_j = j
elif j - last_j > best_rot_rad_sz:
rot_rad = rot_rad_suggestions[last_j:j+1].mean()
best_rot_rad_sz = j - last_j
dx = int(round(rot_rad * 1024 / 360))
return dx, rot_rad
示例3
def pca(self, **kwargs):
if 'n_components' in kwargs:
nComp = kwargs['n_components']
else:
nComp = 0.995
if 'dates' in kwargs:
mat = self.to_matrix(kwargs['dates'])
else:
mat = self.to_matrix()
scaler = StandardScaler()
pca = PCA(n_components=nComp)
self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
self._pipeline.fit(mat)
if 'file' in kwargs:
tofile(kwargs['file'], self._pipeline)
return self._pipeline
示例4
def __init__(self,
weighter=LengthNormalizer(),
normalizer=StandardScaler(),
selector=AssociationCompactor(1000, RankDifference),
projector=PCA(2)):
'''
:param weighter: instance of an sklearn class with fit_transform to weight X category corpus.
:param normalizer: instance of an sklearn class with fit_transform to normalize term X category corpus.
:param selector: instance of a compactor class, if None, no compaction will be done.
:param projector: instance an sklearn class with fit_transform
'''
self.weighter_ = weighter
self.normalizer_ = normalizer
self.selector_ = selector
self.projector_ = projector
示例5
def __init__(self, doc2vec_builder=None, projector=PCA(2)):
'''
:param doc2vec_builder: Doc2VecBuilder, optional
If None, a default model will be used
:param projector: object
Has fit_transform method
'''
if doc2vec_builder is None:
try:
import gensim
except:
raise Exception("Please install gensim before using Doc2VecCategoryProjector/")
self.doc2vec_builder = Doc2VecBuilder(
gensim.models.Doc2Vec(vector_size=100, window=5, min_count=5, workers=6, alpha=0.025,
min_alpha=0.025, epochs=50)
)
else:
assert type(doc2vec_builder) == Doc2VecBuilder
self.doc2vec_builder = doc2vec_builder
self.projector = projector
示例6
def parse_args():
""" Parse input arguments """
parser = argparse.ArgumentParser(description='Feature extraction for RCC algorithm')
parser.add_argument('--dataset', default=None, type=str,
help='The entered dataset file must be in the Data folder')
parser.add_argument('--prep', dest='prep', default='none', type=str,
help='preprocessing of data: scale,minmax,normalization,none')
parser.add_argument('--algo', dest='algo', default='mknn', type=str,
help='Algorithm to use: knn,mknn')
parser.add_argument('--k', dest='k', default=10, type=int,
help='Number of nearest neighbor to consider')
parser.add_argument('--pca', dest='pca', default=None, type=int,
help='Dimension of PCA processing before kNN graph construction')
parser.add_argument('--samples', dest='nsamples', default=0, type=int,
help='total samples to consider')
parser.add_argument('--format', choices=['mat', 'pkl', 'h5'], default='mat', help='Dataset format')
args = parser.parse_args()
return args
示例7
def pca(features, n_components=2):
"""
Returns the embedded points for PCA.
Parameters
----------
features: numpy.ndarray
contains the input feature vectors.
n_components: int
number of components to transform the features into
Returns
-------
embedding: numpy.ndarray
x,y(z) points that the feature vectors have been transformed into
"""
embedding = PCA(n_components=n_components).fit_transform(features)
return embedding
########################################################################################################################
示例8
def create_writer(self,
image_out_port: None) -> PcaTaskWriter:
"""
Method to create an instance of PcaTaskWriter.
Parameters
----------
image_out_port : None
Output port, not used.
Returns
-------
pynpoint.util.multipca.PcaTaskWriter
PCA task writer.
"""
return PcaTaskWriter(self.m_result_queue,
self.m_mean_out_port,
self.m_median_out_port,
self.m_weighted_out_port,
self.m_clip_out_port,
self.m_data_mutex,
self.m_requirements)
示例9
def init_creator(self,
image_in_port: None) -> PcaTaskCreator:
"""
Method to create an instance of PcaTaskCreator.
Parameters
----------
image_in_port : None
Input port, not used.
Returns
-------
pynpoint.util.multipca.PcaTaskCreator
PCA task creator.
"""
return PcaTaskCreator(self.m_tasks_queue,
self.m_num_proc,
self.m_pca_numbers)
示例10
def vis(embed, vis_alg='PCA', pool_alg='REDUCE_MEAN'):
plt.close()
fig = plt.figure()
plt.rcParams['figure.figsize'] = [21, 7]
for idx, ebd in enumerate(embed):
ax = plt.subplot(2, 6, idx + 1)
vis_x = ebd[:, 0]
vis_y = ebd[:, 1]
plt.scatter(vis_x, vis_y, c=subset_label, cmap=ListedColormap(["blue", "green", "yellow", "red"]), marker='.',
alpha=0.7, s=2)
ax.set_title('pool_layer=-%d' % (idx + 1))
plt.tight_layout()
plt.subplots_adjust(bottom=0.1, right=0.95, top=0.9)
cax = plt.axes([0.96, 0.1, 0.01, 0.3])
cbar = plt.colorbar(cax=cax, ticks=range(num_label))
cbar.ax.get_yaxis().set_ticks([])
for j, lab in enumerate(['ent.', 'bus.', 'sci.', 'heal.']):
cbar.ax.text(.5, (2 * j + 1) / 8.0, lab, ha='center', va='center', rotation=270)
fig.suptitle('%s visualization of BERT layers using "bert-as-service" (-pool_strategy=%s)' % (vis_alg, pool_alg),
fontsize=14)
plt.show()
示例11
def load_wemb(params, vocab):
wemb = pkl.load(open(prm.wordemb_path, 'rb'))
dim_emb_orig = wemb.values()[0].shape[0]
W = 0.01 * np.random.randn(prm.n_words, dim_emb_orig).astype(config.floatX)
for word, pos in vocab.items():
if word in wemb:
W[pos,:] = wemb[word]
if prm.dim_emb < dim_emb_orig:
pca =PCA(n_components=prm.dim_emb, copy=False, whiten=True)
W = pca.fit_transform(W)
params['W'] = W
return params
示例12
def PCA(data, num_components=None):
# mean center the data
data -= data.mean(axis=0)
# calculate the covariance matrix
R = np.cov(data, rowvar=False)
# calculate eigenvectors & eigenvalues of the covariance matrix
# use 'eigh' rather than 'eig' since R is symmetric,
# the performance gain is substantial
V, E = np.linalg.eigh(R)
# sort eigenvalue in decreasing order
idx = np.argsort(V)[::-1]
E = E[:,idx]
# sort eigenvectors according to same index
V = V[idx]
# select the first n eigenvectors (n is desired dimension
# of rescaled data array, or dims_rescaled_data)
E = E[:, :num_components]
# carry out the transformation on the data using eigenvectors
# and return the re-scaled data, eigenvalues, and eigenvectors
return np.dot(E.T, data.T).T, V, E
示例13
def Transform(self, data_container, store_folder='', store_key=''):
data = data_container.GetArray()
if data.shape[1] != self.GetModel().components_.shape[1]:
print('Data can not be transformed by existed PCA')
sub_data = self.GetModel().transform(data)
sub_feature_name = ['PCA_feature_' + str(index) for index in
range(1, super(DimensionReductionByPCA, self).GetRemainedNumber() + 1)]
new_data_container = deepcopy(data_container)
new_data_container.SetArray(sub_data)
new_data_container.SetFeatureName(sub_feature_name)
new_data_container.UpdateFrameByData()
if store_folder:
self.SaveDataContainer(data_container, store_folder, store_key)
return new_data_container
示例14
def __init__(
self,
features: ndarray,
algorithm: str = 'kmeans',
pca_k: int = None,
random_state: int = 12345
):
"""
:param features: the embedding matrix created by bert parent
:param algorithm: Which clustering algorithm to use
:param pca_k: If you want the features to be ran through pca, this is the components number
:param random_state: Random state
"""
if pca_k:
self.features = PCA(n_components=pca_k).fit_transform(features)
else:
self.features = features
self.algorithm = algorithm
self.pca_k = pca_k
self.random_state = random_state
示例15
def fit(self, x):
""" Compute PCA.
Parameters
----------
x : ndarray, shape(n_samples, n_feat)
Input matrix.
Returns
-------
self : object
Returns self.
"""
pca = PCA(n_components=self.n_components,
random_state=self.random_state)
self.maps_ = pca.fit_transform(x)
self.lambdas_ = pca.explained_variance_
return self
示例16
def kmean_pca_batch(data, batch, k=10):
data = np.asarray(data, dtype=np.float32)
batch = np.asarray(batch, dtype=np.float32)
a = np.zeros(batch.shape[0])
for i in np.arange(batch.shape[0]):
tmp = np.concatenate((data, [batch[i]]))
tmp_pca = PCA(n_components=2).fit_transform(tmp)
a[i] = mle_single(tmp_pca[:-1], tmp_pca[-1], k=k)
return a
示例17
def getGFKDim(Xs, Xt):
Pss = PCA().fit(Xs).components_.T
Pts = PCA().fit(Xt).components_.T
Psstt = PCA().fit(np.vstack((Xs, Xt))).components_.T
DIM = round(Xs.shape[1]*0.5)
res = -1
for d in range(1, DIM+1):
Ps = Pss[:, :d]
Pt = Pts[:, :d]
Pst = Psstt[:, :d]
alpha1 = getAngle(Ps, Pst, d)
alpha2 = getAngle(Pt, Pst, d)
D = (alpha1 + alpha2) * 0.5
check = [round(D[1, dd]*100) == 100 for dd in range(d)]
if True in check:
res = list(map(lambda i: i == True, check)).index(True)
return res
示例18
def PCA_map(Xs, Xt):
dim = getGFKDim(Xs, Xt)
X = np.vstack((Xs, Xt))
X_new = PCA().fit_transform(X)[:, :dim]
Xs_new = X_new[:Xs.shape[0], :]
Xt_new = X_new[Xs.shape[0]:, :]
return Xs_new, Xt_new
示例19
def classic(D, n_components=2, random_state=None):
"""Fast CMDS using random SVD
Parameters
----------
D : array-like, shape=[n_samples, n_samples]
pairwise distances
n_components : int, optional (default: 2)
number of dimensions in which to embed `D`
random_state : int, RandomState or None, optional (default: None)
numpy random state
Returns
-------
Y : array-like, embedded data [n_sample, ndim]
"""
_logger.debug(
"Performing classic MDS on {} of shape {}...".format(type(D).__name__, D.shape)
)
D = D ** 2
D = D - D.mean(axis=0)[None, :]
D = D - D.mean(axis=1)[:, None]
pca = PCA(
n_components=n_components, svd_solver="randomized", random_state=random_state
)
Y = pca.fit_transform(D)
return Y
示例20
def pca_feature(X, d):
X = X/255.
from sklearn.decomposition import PCA
X = np.reshape(X, (X.shape[0], np.prod(X.shape[1:])))
pca = PCA(n_components=d)
return pca.fit_transform(X)
示例21
def pca_fit_and_filter_pixel_list(candidate_data, reference_data, parameters):
''' Performs PCA analysis, on the valid pixels and filters according
to the distance from the principle eigenvector, for a single band.
:param list candidate_band: A list of valid candidate data
:param list reference_band: A list of coincident valid reference data
:param pca_options parameters: Method specific parameters. Currently:
threshold (float): Representing the width of the PCA filter
:returns: A boolean list representing the pif pixels within valid_pixels
'''
fitted_pca = _pca_fit_single_band(candidate_data, reference_data)
return _pca_filter_single_band(
fitted_pca, candidate_data, reference_data, parameters.threshold)
示例22
def _pca_fit_single_band(cand_valid, ref_valid):
''' Uses SK Learn PCA module to do PCA fit
'''
X = _numpy_array_from_2arrays(cand_valid, ref_valid)
# SK Learn PCA
pca = PCA(n_components=2)
# Fit the points
pca.fit(X)
return pca
示例23
def _pca_filter_single_band(pca, cand_valid, ref_valid, threshold):
''' Uses SciKit Learn PCA module to transform the data and filter
'''
major_pca_values = _pca_transform_get_only_major_values(
pca, cand_valid, ref_valid)
# Filter
pixels_pass_filter = numpy.logical_and(
major_pca_values >= (threshold * -1), major_pca_values <= threshold)
return pixels_pass_filter
示例24
def PCA_tramsform_img(img=None, n_principle=3):
"""
This function trainsforms an HSI by 1-D PCA. PCA is fitted on the whole data
and is conducted on the spectral dimension, rendering the image from size
length * width * dim to length * width * n_principle.
Parameters:
img: initial unregularizaed HSI.
n_principle: Target number of principles we want.
Return:
reg_img: Regularized, transformed image.
WARNNING: RELATIVE ENERGY BETWEEN PRINCIPLE COMPONENTS CHANGED IN THIS
IMPLEMENTATION. YOU MAY NEED TO ADD PENALTY MULTIPLIERS IN THE HIGHER NETWORKS
TO REIMBURSE IT.
"""
length = img.shape[0]
width = img.shape[1]
dim = img.shape[2]
# reshape img, HORIZONTALLY strench the img, without changing the spectral dim.
reshaped_img = numpy.asarray(img.reshape(length*width, dim),
dtype=theano.config.floatX)
pca = PCA(n_components=n_principle)
pca_img = pca.fit_transform(reshaped_img)
# Regularization: Think about energy of each principles here.
reg_img = scale_to_unit_interval(ndar=pca_img, eps=1e-8)
reg_img = numpy.asarray(reg_img.reshape(length, width, n_principle),
dtype=theano.config.floatX)
energy_dist = pca.explained_variance_ratio_
residual = 1 - numpy.sum(energy_dist[0: n_principle])
return reg_img, energy_dist, residual
示例25
def retrieval(ref_descriptors, query_descriptors, max_num_nn, pca_dim=0):
if pca_dim != 0:
pca = PCA(n_components=pca_dim)
ref_descriptors = normalize(pca.fit_transform(normalize(ref_descriptors)))
query_descriptors = normalize(pca.transform(normalize(query_descriptors)))
ref_tree = cKDTree(ref_descriptors)
_, indices = ref_tree.query(query_descriptors, k=max_num_nn)
return indices
示例26
def tsne_on_pca(arr, is_PCA=True):
"""
visualize through t-sne on pca reduced data
:param arr: (nr_examples, nr_features)
:return:
"""
if is_PCA:
pca_50 = PCA(n_components=50)
arr = pca_50.fit_transform(arr)
tsne_2 = TSNE(n_components=2)
res = tsne_2.fit_transform(arr)
return res
示例27
def test_non_serializable_parameters(self):
pipeline = Pipeline([('pca', PCA()), ('rf', RandomForestClassifier())])
performance_dict, hyperparameters = functions.verify_estimator_class(
pipeline,
'predict_proba',
dict(Accuracy=self.source),
self.dataset_properties
)
assert functions.is_valid_json(hyperparameters)
示例28
def pca_components(X, dim):
X = X.reshape((len(X), dim))
pca = PCA(n_components=dim)
pca.fit(X)
U = (pca.components_).T
U_norm = normalize(U, axis=0)
return U_norm[:,:args.num_comp]
示例29
def pca_components(X, dim):
X = X.reshape((len(X), dim))
pca = PCA(n_components=dim)
pca.fit(X)
U = (pca.components_).T
U_norm = normalize(U, axis=0)
return U_norm[:,:args.num_comp]
示例30
def _pca(data, n_pcs):
from sklearn.decomposition import PCA
pca = PCA(n_components=n_pcs)
pca.fit(data)
data_pc = pca.transform(data)
return data_pc