Python源码示例:sklearn.datasets.make_circles()
示例1
def load_mini(N=1000):
X, y = make_moons(N, noise=0.035, random_state=20)
x_, y_ = make_circles(N, noise=0.02, random_state=20)
x_[:, 1] += 2.0
y_ += 2
X = np.concatenate([X, x_], axis=0)
y = np.concatenate([y, y_])
X -= X.mean(0, keepdims=True)
X /= X.max(0, keepdims=True)
X = X.astype("float32")
y = y.astype("int32")
dict_init = [
("datum_shape", (2,)),
("n_classes", 4),
("name", "mini"),
("classes", [str(u) for u in range(4)]),
]
dataset = Dataset(**dict(dict_init))
dataset["inputs/train_set"] = X
dataset["outputs/train_set"] = y
return dataset
示例2
def test_random_trees_dense_equal():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning the same array for both argument values.
# Create the RTEs
hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
random_state=0)
hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
random_state=0)
X, y = datasets.make_circles(factor=0.5)
X_transformed_dense = hasher_dense.fit_transform(X)
X_transformed_sparse = hasher_sparse.fit_transform(X)
# Assert that dense and sparse hashers have same array.
assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)
# Ignore warnings from switching to more power iterations in randomized_svd
示例3
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two SVD dimensions
# Note: Not all random_states produce perfect results.
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
assert_array_equal(hasher.fit(X).transform(X).toarray(),
X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.)
示例4
def test_make_circles():
factor = 0.3
for (n_samples, n_outer, n_inner) in [(7, 3, 4), (8, 4, 4)]:
# Testing odd and even case, because in the past make_circles always
# created an even number of samples.
X, y = make_circles(n_samples, shuffle=False, noise=None,
factor=factor)
assert_equal(X.shape, (n_samples, 2), "X shape mismatch")
assert_equal(y.shape, (n_samples,), "y shape mismatch")
center = [0.0, 0.0]
for x, label in zip(X, y):
dist_sqr = ((x - center) ** 2).sum()
dist_exp = 1.0 if label == 0 else factor**2
assert_almost_equal(dist_sqr, dist_exp,
err_msg="Point is not on expected circle")
assert_equal(X[y == 0].shape, (n_outer, 2),
"Samples not correctly distributed across circles.")
assert_equal(X[y == 1].shape, (n_inner, 2),
"Samples not correctly distributed across circles.")
assert_raises(ValueError, make_circles, factor=-0.01)
assert_raises(ValueError, make_circles, factor=1.)
示例5
def test_gridsearch_pipeline_precomputed():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model using a precomputed kernel.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="precomputed", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca),
("Perceptron", Perceptron(max_iter=5))])
param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
X_kernel = rbf_kernel(X, gamma=2.)
grid_search.fit(X_kernel, y)
assert_equal(grid_search.best_score_, 1)
# 0.23. warning about tol not having its correct default value.
示例6
def test_nested_circles():
# Test the linear separability of the first 2D KPCA transform
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
# 2D nested circles are not linearly separable
train_score = Perceptron(max_iter=5).fit(X, y).score(X, y)
assert_less(train_score, 0.8)
# Project the circles data into the first 2 components of a RBF Kernel
# PCA model.
# Note that the gamma value is data dependent. If this test breaks
# and the gamma value has to be updated, the Kernel PCA example will
# have to be updated too.
kpca = KernelPCA(kernel="rbf", n_components=2,
fit_inverse_transform=True, gamma=2.)
X_kpca = kpca.fit_transform(X)
# The data is perfectly linearly separable in that space
train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y)
assert_equal(train_score, 1.0)
示例7
def test_rbf_kernel(ax, cost):
train_x, train_y = make_circles(
n_samples=500, noise=0.1, factor=0.1, random_state=1
)
train_y[train_y == 0] = -1
scaler = StandardScaler()
train_x_scaled = scaler.fit_transform(train_x, train_y)
train_data = np.hstack((train_y.reshape(500, 1), train_x_scaled))
mykernel = Kernel(kernel="rbf", degree=5, coef0=1, gamma=0.5)
mysvm = SmoSVM(
train=train_data,
kernel_func=mykernel,
cost=cost,
tolerance=0.001,
auto_norm=False,
)
mysvm.fit()
plot_partition_boundary(mysvm, train_data, ax=ax)
示例8
def generateData(n):
"""
"""
np.random.seed(12046)
blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]])
circles = make_circles(n_samples=n, factor=.4, noise=.05)
moons = make_moons(n_samples=n, noise=.05)
blocks = np.random.rand(n, 2) - 0.5
y = (blocks[:, 0] * blocks[:, 1] < 0) + 0
blocks = (blocks, y)
# 由于神经网络对数据的线性变换不稳定,因此将数据做归一化处理
scaler = StandardScaler()
blobs = (scaler.fit_transform(blobs[0]), blobs[1])
circles = (scaler.fit_transform(circles[0]), circles[1])
moons = (scaler.fit_transform(moons[0]), moons[1])
blocks = (scaler.fit_transform(blocks[0]), blocks[1])
return blobs, circles, moons, blocks
示例9
def test_random_trees_dense_equal():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning the same array for both argument values.
# Create the RTEs
hasher_dense = RandomTreesEmbedding(n_estimators=10, sparse_output=False,
random_state=0)
hasher_sparse = RandomTreesEmbedding(n_estimators=10, sparse_output=True,
random_state=0)
X, y = datasets.make_circles(factor=0.5)
X_transformed_dense = hasher_dense.fit_transform(X)
X_transformed_sparse = hasher_sparse.fit_transform(X)
# Assert that dense and sparse hashers have same array.
assert_array_equal(X_transformed_sparse.toarray(), X_transformed_dense)
# Ignore warnings from switching to more power iterations in randomized_svd
示例10
def test_random_hasher():
# test random forest hashing on circles dataset
# make sure that it is linearly separable.
# even after projected to two SVD dimensions
# Note: Not all random_states produce perfect results.
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# test fit and transform:
hasher = RandomTreesEmbedding(n_estimators=30, random_state=1)
assert_array_equal(hasher.fit(X).transform(X).toarray(),
X_transformed.toarray())
# one leaf active per data point per forest
assert_equal(X_transformed.shape[0], X.shape[0])
assert_array_equal(X_transformed.sum(axis=1), hasher.n_estimators)
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
linear_clf = LinearSVC()
linear_clf.fit(X_reduced, y)
assert_equal(linear_clf.score(X_reduced, y), 1.)
示例11
def test_random_trees_dense_type():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning a dense array.
# Create the RTE with sparse=False
hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
assert_equal(type(X_transformed), np.ndarray)
示例12
def test_single_linkage_clustering():
# Check that we get the correct result in two emblematic cases
moons, moon_labels = make_moons(noise=0.05, random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(moons)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
moon_labels), 1)
circles, circle_labels = make_circles(factor=0.5, noise=0.025,
random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(circles)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
circle_labels), 1)
示例13
def test_format_mapper_data(self, jinja_env):
mapper = KeplerMapper()
data, labels = make_circles(1000, random_state=0)
lens = mapper.fit_transform(data, projection=[0])
graph = mapper.map(lens, data)
color_function = lens[:, 0]
inverse_X = data
projected_X = lens
projected_X_names = ["projected_%s" % (i) for i in range(projected_X.shape[1])]
inverse_X_names = ["inverse_%s" % (i) for i in range(inverse_X.shape[1])]
custom_tooltips = np.array(["customized_%s" % (l) for l in labels])
graph_data = format_mapper_data(
graph,
color_function,
inverse_X,
inverse_X_names,
projected_X,
projected_X_names,
custom_tooltips,
jinja_env,
)
# print(graph_data)
# Dump to json so we can easily tell what's in it.
graph_data = json.dumps(graph_data)
# TODO test more properties!
assert "name" in graph_data
assert """cube2_cluster0""" in graph_data
assert """projected_0""" in graph_data
assert """inverse_0""" in graph_data
assert """customized_""" in graph_data
示例14
def test_complete_pipeline(self, CoverClass):
# TODO: add a mock that asserts the cover was called appropriately.. or test number of cubes etc.
data, _ = datasets.make_circles()
data = data.astype(np.float64)
mapper = KeplerMapper()
graph = mapper.map(data, cover=CoverClass())
mapper.visualize(graph)
示例15
def make_two_rings(num_samples):
samples, labels = make_circles(num_samples, shuffle=True, noise=None, random_state=None, factor=0.6)
return samples
示例16
def generate_data(n_samples, dataset, noise):
if dataset == 'moons':
return datasets.make_moons(
n_samples=n_samples,
noise=noise,
random_state=0
)
elif dataset == 'circles':
return datasets.make_circles(
n_samples=n_samples,
noise=noise,
factor=0.5,
random_state=1
)
elif dataset == 'linear':
X, y = datasets.make_classification(
n_samples=n_samples,
n_features=2,
n_redundant=0,
n_informative=2,
random_state=2,
n_clusters_per_class=1
)
rng = np.random.RandomState(2)
X += noise * rng.uniform(size=X.shape)
linearly_separable = (X, y)
return linearly_separable
else:
raise ValueError(
'Data type incorrectly specified. Please choose an existing '
'dataset.')
示例17
def rand_ring2d(batch_size):
""" This function generates 2D samples from a hollowed-cirlce distribution in a 2-dimensional space.
Args:
batch_size (int): number of batch samples
Return:
torch.Tensor: tensor of size (batch_size, 2)
"""
circles = make_circles(2 * batch_size, noise=.01)
z = np.squeeze(circles[0][np.argwhere(circles[1] == 0), :])
return torch.from_numpy(z).type(torch.FloatTensor)
示例18
def test_n_clusters_circles(self):
""" Tests that DBSCAN finds the correct number of clusters with
circle data.
"""
n_samples = 1500
x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
dbscan = DBSCAN(n_regions=1, eps=.15)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例19
def test_n_clusters_circles_max_samples(self):
""" Tests that DBSCAN finds the correct number of clusters when
defining max_samples with circle data.
"""
n_samples = 1500
x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
dbscan = DBSCAN(n_regions=1, eps=.15, max_samples=500)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例20
def test_n_clusters_circles_grid(self):
""" Tests that DBSCAN finds the correct number of clusters when
setting n_regions > 1 with circle data.
"""
n_samples = 1500
x, y = make_circles(n_samples=n_samples, factor=.5, noise=.05)
dbscan = DBSCAN(n_regions=4, eps=.15, max_samples=700)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例21
def generateCircles(n):
"""
生成圆圈数据
"""
data, _ = make_circles(n_samples=n, factor=0.5, noise=0.06)
return data
示例22
def test_random_trees_dense_type():
# Test that the `sparse_output` parameter of RandomTreesEmbedding
# works by returning a dense array.
# Create the RTE with sparse=False
hasher = RandomTreesEmbedding(n_estimators=10, sparse_output=False)
X, y = datasets.make_circles(factor=0.5)
X_transformed = hasher.fit_transform(X)
# Assert that type is ndarray, not scipy.sparse.csr.csr_matrix
assert_equal(type(X_transformed), np.ndarray)
示例23
def test_gridsearch_pipeline():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="rbf", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca),
("Perceptron", Perceptron(max_iter=5))])
param_grid = dict(kernel_pca__gamma=2. ** np.arange(-2, 2))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
grid_search.fit(X, y)
assert_equal(grid_search.best_score_, 1)
示例24
def test_gridsearch_pipeline_precomputed():
# Test if we can do a grid-search to find parameters to separate
# circles with a perceptron model using a precomputed kernel.
X, y = make_circles(n_samples=400, factor=.3, noise=.05,
random_state=0)
kpca = KernelPCA(kernel="precomputed", n_components=2)
pipeline = Pipeline([("kernel_pca", kpca),
("Perceptron", Perceptron(max_iter=5))])
param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
X_kernel = rbf_kernel(X, gamma=2.)
grid_search.fit(X_kernel, y)
assert_equal(grid_search.best_score_, 1)
示例25
def cluster_data():
np.random.seed(0)
# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
noise=.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
# Anisotropicly distributed data
random_state = 170
X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
aniso = (X_aniso, y)
# blobs with varied variances
varied = datasets.make_blobs(n_samples=n_samples,
cluster_std=[1.0, 2.5, 0.5],
random_state=random_state)
default_base = {'quantile': .3,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': 3,
'min_samples': 20,
'xi': 0.05,
'min_cluster_size': 0.1}
data = [
('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240,
'quantile': .2, 'n_clusters': 2,
'min_samples': 20, 'xi': 0.25}),
('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
('varied', varied, {'eps': .18, 'n_neighbors': 2,
'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
('aniso', aniso, {'eps': .15, 'n_neighbors': 2,
'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
('blobs', blobs, {}),
]
yield data, default_base
示例26
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--save', type=str, default='work')
parser.add_argument('--nEpoch', type=int, default=100)
# parser.add_argument('--testBatchSz', type=int, default=2048)
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--model', type=str, default="picnn",
choices=['picnn', 'ficnn'])
parser.add_argument('--dataset', type=str, default="moons",
choices=['moons', 'circles', 'linear'])
parser.add_argument('--noncvx', action='store_true')
args = parser.parse_args()
npr.seed(args.seed)
tf.set_random_seed(args.seed)
setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))
save = os.path.join(os.path.expanduser(args.save),
"{}.{}".format(args.model, args.dataset))
if os.path.isdir(save):
shutil.rmtree(save)
os.makedirs(save, exist_ok=True)
if args.dataset == "moons":
(dataX, dataY) = make_moons(noise=0.3, random_state=0)
elif args.dataset == "circles":
(dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
dataY = 1.-dataY
elif args.dataset == "linear":
(dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
dataX += 2 * rng.uniform(size=dataX.shape)
else:
assert(False)
dataY = dataY.reshape((-1, 1)).astype(np.float32)
nData = dataX.shape[0]
nFeatures = dataX.shape[1]
nLabels = 1
nXy = nFeatures + nLabels
config = tf.ConfigProto() #log_device_placement=False)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
model.train(args, dataX, dataY)