Python源码示例:sklearn.datasets.make_moons()
示例1
def load_mini(N=1000):
X, y = make_moons(N, noise=0.035, random_state=20)
x_, y_ = make_circles(N, noise=0.02, random_state=20)
x_[:, 1] += 2.0
y_ += 2
X = np.concatenate([X, x_], axis=0)
y = np.concatenate([y, y_])
X -= X.mean(0, keepdims=True)
X /= X.max(0, keepdims=True)
X = X.astype("float32")
y = y.astype("int32")
dict_init = [
("datum_shape", (2,)),
("n_classes", 4),
("name", "mini"),
("classes", [str(u) for u in range(4)]),
]
dataset = Dataset(**dict(dict_init))
dataset["inputs/train_set"] = X
dataset["outputs/train_set"] = y
return dataset
示例2
def test_classifier_comparison():
"""Test the classifier comparison example works"""
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable]
scores = []
for ds in datasets:
X, y = ds
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=.4, random_state=42)
clf = SymbolicClassifier(random_state=0)
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)
scores.append(('%.2f' % score).lstrip('0'))
assert_equal(scores, ['.95', '.93', '.95'])
示例3
def generateData(n):
"""
"""
np.random.seed(12046)
blobs = make_blobs(n_samples=n, centers = [[-2, -2], [2, 2]])
circles = make_circles(n_samples=n, factor=.4, noise=.05)
moons = make_moons(n_samples=n, noise=.05)
blocks = np.random.rand(n, 2) - 0.5
y = (blocks[:, 0] * blocks[:, 1] < 0) + 0
blocks = (blocks, y)
# 由于神经网络对数据的线性变换不稳定,因此将数据做归一化处理
scaler = StandardScaler()
blobs = (scaler.fit_transform(blobs[0]), blobs[1])
circles = (scaler.fit_transform(circles[0]), circles[1])
moons = (scaler.fit_transform(moons[0]), moons[1])
blocks = (scaler.fit_transform(blocks[0]), blocks[1])
return blobs, circles, moons, blocks
示例4
def runKernelPCA():
"""
使用kernel PCA对数据降维
"""
data, labels = make_moons(n_samples=100, noise=0.05)
fig = plt.figure(figsize=(10, 10), dpi=80)
# 将原始数据可视化
ax = fig.add_subplot(2, 2, 1)
visualizeKernelPCA(ax, data, labels)
# 使用PCA对数据降维,并将结果可视化
ax = fig.add_subplot(2, 2, 2)
model = trainPCA(data)
x = model.transform(data)[:, 0]
visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
# 使用kernel PCA对数据降维,并将结果可视化
ax = fig.add_subplot(2, 2, 3)
model = trainKernelPCA(data)
x = model.transform(data)[:, 0]
visualizeKernelPCA(ax, np.c_[x, [0] * len(x)], labels)
# 展示数据在kernel PCA第一和第二主成分的降维结果
ax = fig.add_subplot(2, 2, 4)
visualizeKernelPCA(ax, model.transform(data), labels)
plt.show()
示例5
def num_observations():
obs_values = [10, 100, 1000]
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
losses_store = []
for i in obs_values:
X, y = datasets.make_moons(i, noise=0.1)
num_examples = len(X) # training set size
model = build_model(X,32,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
losses_store.append(losses)
print losses
x = np.linspace(0,145,30)
for i in range(len(losses_store)):
lab = 'n_observations = ' + str(obs_values[i])
plt.plot(x,losses_store[i],label=lab)
plt.legend()
plt.show()
示例6
def noise():
noise_values = [0.01, 0.1, 0.2, 0.3, 0.4]
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
losses_store = []
for i in noise_values:
X, y = datasets.make_moons(200, noise=i)
num_examples = len(X) # training set size
model = build_model(X,32,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
losses_store.append(losses)
print losses
x = np.linspace(0,145,30)
for i in range(len(losses_store)):
lab = 'noise_value = ' + str(noise_values[i])
plt.plot(x,losses_store[i],label=lab)
plt.legend()
plt.show()
示例7
def reg():
reg_values = [0.00, 0.01, 0.1, 0.2, 0.3]
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
losses_store = []
for i in reg_values:
reg_lambda = i # regularization strength
X, y = datasets.make_moons(200, noise=0.2)
num_examples = len(X) # training set size
model = build_model(X,32,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
losses_store.append(losses)
print losses
x = np.linspace(0,145,30)
for i in range(len(losses_store)):
lab = 'regularization_value = ' + str(reg_values[i])
plt.plot(x,losses_store[i],label=lab)
plt.legend()
plt.show()
示例8
def test_num_nodes():
X, y = datasets.make_moons(400, noise=0.2)
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
node_vals = [4,8,16,32,64,128]
losses_store = []
for val in node_vals:
model = build_model(X,val,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
losses_store.append(losses)
print losses
x = np.linspace(0,145,30)
for i in range(len(losses_store)):
lab = 'n_nodes = ' + str(node_vals[i])
plt.plot(x,losses_store[i],label=lab)
plt.legend()
plt.show()
示例9
def load_data():
x = ds.make_moons(n_samples=30000, shuffle=True, noise=0.05)[0]
return x[:24000], x[24000:27000], x[27000:]
示例10
def test_single_linkage_clustering():
# Check that we get the correct result in two emblematic cases
moons, moon_labels = make_moons(noise=0.05, random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(moons)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
moon_labels), 1)
circles, circle_labels = make_circles(factor=0.5, noise=0.025,
random_state=42)
clustering = AgglomerativeClustering(n_clusters=2, linkage='single')
clustering.fit(circles)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
circle_labels), 1)
示例11
def test_make_moons():
X, y = make_moons(3, shuffle=False)
for x, label in zip(X, y):
center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
dist_sqr = ((x - center) ** 2).sum()
assert_almost_equal(dist_sqr, 1.0,
err_msg="Point is not on expected unit circle")
示例12
def test_as_classifier():
X, y = make_moons(n_samples=100, random_state=1)
y = 2 * y - 1 # use -1/+1 labels
clf = as_classifier(DecisionTreeRegressor())
clf.fit(X, y)
probas = clf.predict_proba(X)
predictions = clf.predict(X)
assert_array_equal(probas.shape, (len(X), 2))
assert_array_equal(predictions, y)
y[-1] = 2
clf = as_classifier(DecisionTreeRegressor())
assert_raises(ValueError, clf.fit, X, y)
示例13
def _download():
train_x, train_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
test_x, test_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
valid_x, valid_t = make_moons(n_samples=10000, shuffle=True, noise=0.2, random_state=1234)
train_x += np.abs(train_x.min())
test_x += np.abs(test_x.min())
valid_x += np.abs(valid_x.min())
train_set = (train_x, train_t)
test_set = (test_x, test_t)
valid_set = (valid_x, valid_t)
return train_set, test_set, valid_set
示例14
def generate_data(n_samples, dataset, noise):
if dataset == 'moons':
return datasets.make_moons(
n_samples=n_samples,
noise=noise,
random_state=0
)
elif dataset == 'circles':
return datasets.make_circles(
n_samples=n_samples,
noise=noise,
factor=0.5,
random_state=1
)
elif dataset == 'linear':
X, y = datasets.make_classification(
n_samples=n_samples,
n_features=2,
n_redundant=0,
n_informative=2,
random_state=2,
n_clusters_per_class=1
)
rng = np.random.RandomState(2)
X += noise * rng.uniform(size=X.shape)
linearly_separable = (X, y)
return linearly_separable
else:
raise ValueError(
'Data type incorrectly specified. Please choose an existing '
'dataset.')
示例15
def generate_data(n_samples=300, noise=0.05):
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=noise)
X = noisy_moons[0]
return X
示例16
def data():
n_samples = 60
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05,
random_state=1)
X = noisy_moons[0]
return X
示例17
def test_n_clusters_moons(self):
""" Tests that DBSCAN finds the correct number of clusters with
moon data.
"""
n_samples = 1500
x, y = make_moons(n_samples=n_samples, noise=.05)
dbscan = DBSCAN(n_regions=1, eps=.3)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例18
def test_n_clusters_moons_max_samples(self):
""" Tests that DBSCAN finds the correct number of clusters when
defining max_samples with moon data.
"""
n_samples = 1500
x, y = make_moons(n_samples=n_samples, noise=.05)
dbscan = DBSCAN(n_regions=1, eps=.3, max_samples=500)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例19
def test_n_clusters_moons_grid(self):
""" Tests that DBSCAN finds the correct number of clusters when
setting n_regions > 1 with moon data.
"""
n_samples = 1500
x, y = make_moons(n_samples=n_samples, noise=.05)
dbscan = DBSCAN(n_regions=4, eps=.3, max_samples=600)
x = StandardScaler().fit_transform(x)
ds_x = ds.array(x, block_size=(300, 2))
dbscan.fit(ds_x)
self.assertEqual(dbscan.n_clusters, 2)
示例20
def make_trans_moons(theta=40, nb=100, noise=.05):
from math import cos, sin, pi
X, y = make_moons(nb, noise=noise, random_state=1)
Xt, yt = make_moons(nb, noise=noise, random_state=2)
trans = -np.mean(X, axis=0)
X = 2*(X+trans)
Xt = 2*(Xt+trans)
theta = -theta*pi/180
rotation = np.array( [ [cos(theta), sin(theta)], [-sin(theta), cos(theta)] ] )
Xt = np.dot(Xt, rotation.T)
return X, y, Xt, yt
示例21
def generateMoons(n):
"""
生成月牙型数据
"""
data, _ = make_moons(n_samples=n, noise=0.08)
return data
示例22
def generateData(n):
"""
生成线性和非线性数据
"""
x = np.linspace(-5, 5, n)
error = np.random.randn(n)
y = 1 * x + error
linear = np.c_[x, y]
nonLinear, _ = make_moons(n_samples=n, noise=0.05)
return linear, nonLinear
示例23
def generateData(n):
"""
随机生成训练数据
"""
X, Y = make_moons(n_samples=n, noise=0.05, random_state=2046)
data = np.concatenate((Y.reshape(-1, 1), X), axis=1)
data = pd.DataFrame(data, columns=["y", "x1", "x2"])
return data
示例24
def main():
#toy dataset
X, y = datasets.make_moons(16, noise=0.10)
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
model = build_model(X,20,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
output = feed_forward(model, X)
preds = np.argmax(output[3], axis=1)
示例25
def main():
#toy dataset
X, y = datasets.make_moons(16, noise=0.10)
num_examples = len(X) # training set size
nn_input_dim = 2 # input layer dimensionality
nn_output_dim = 2 # output layer dimensionality
learning_rate = 0.01 # learning rate for gradient descent
reg_lambda = 0.01 # regularization strength
model = build_model(X,20,2)
model, losses = train(model,X, y, reg_lambda=reg_lambda, learning_rate=learning_rate)
output = feed_forward(model, X)
preds = np.argmax(output[3], axis=1)
示例26
def main():
# Load the dataset
X, y = datasets.make_moons(n_samples=300, noise=0.08, shuffle=False)
# Cluster the data using DBSCAN
clf = DBSCAN(eps=0.17, min_samples=5)
y_pred = clf.predict(X)
# Project the data onto the 2 primary principal components
p = Plot()
p.plot_in_2d(X, y_pred, title="DBSCAN")
p.plot_in_2d(X, y, title="Actual Clustering")
示例27
def test_make_moons():
X, y = make_moons(3, shuffle=False)
for x, label in zip(X, y):
center = [0.0, 0.0] if label == 0 else [1.0, 0.5]
dist_sqr = ((x - center) ** 2).sum()
assert_almost_equal(dist_sqr, 1.0,
err_msg="Point is not on expected unit circle")
示例28
def cluster_data():
np.random.seed(0)
# ============
# Generate datasets. We choose the size big enough to see the scalability
# of the algorithms, but not too big to avoid too long running times
# ============
n_samples = 1500
noisy_circles = datasets.make_circles(n_samples=n_samples, factor=.5,
noise=.05)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=.05)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
# Anisotropicly distributed data
random_state = 170
X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
aniso = (X_aniso, y)
# blobs with varied variances
varied = datasets.make_blobs(n_samples=n_samples,
cluster_std=[1.0, 2.5, 0.5],
random_state=random_state)
default_base = {'quantile': .3,
'eps': .3,
'damping': .9,
'preference': -200,
'n_neighbors': 10,
'n_clusters': 3,
'min_samples': 20,
'xi': 0.05,
'min_cluster_size': 0.1}
data = [
('noisy_circles', noisy_circles, {'damping': .77, 'preference': -240,
'quantile': .2, 'n_clusters': 2,
'min_samples': 20, 'xi': 0.25}),
('noisy_moons', noisy_moons, {'damping': .75, 'preference': -220, 'n_clusters': 2}),
('varied', varied, {'eps': .18, 'n_neighbors': 2,
'min_samples': 5, 'xi': 0.035, 'min_cluster_size': .2}),
('aniso', aniso, {'eps': .15, 'n_neighbors': 2,
'min_samples': 20, 'xi': 0.1, 'min_cluster_size': .2}),
('blobs', blobs, {}),
]
yield data, default_base
示例29
def test_plain_lr():
from sklearn.datasets import make_moons
import functools
# 修改flow_id 否则内存表可能被覆盖
session.init(mode=0)
ns = str(uuid.uuid1())
X = session.table('testX7', ns, partition=2)
Y = session.table('testY7', ns, partition=2)
b = np.array([0])
eta = 1.2
max_iter = 10
total_num = 500
_x, _y = make_moons(total_num, noise=0.25,random_state=12345)
for i in range(np.shape(_y)[0]):
X.put(i, _x[i])
Y.put(i, _y[i])
print(len([y for y in Y.collect()]))
current_milli_time = lambda: int(round(time.time() * 1000))
start = current_milli_time()
#shape_w = [1, np.shape(_x)[1]]
shape_w = [np.shape(_x)[1]]
w = np.ones(shape_w)
print(w)
X = TensorInEgg(None,None,X)
Y = TensorInEgg(None,None,Y)
w = TensorInPy(None,None,w)
b = TensorInPy(None, None, b)
# lr = LR(shape_w)
# lr.train(X, Y)
itr = 0
while itr < max_iter:
H = 1 / X
H = 1.0 / (1 + ((X @ w + b) * -1).map(np.exp))
R = H - Y
gradient_w = (R * X).sum() / total_num
gradient_b = R.sum() / total_num
w = w - eta * gradient_w
b = b - eta * gradient_b
print("aaa",w,b)
# self.plot(itr)
itr += 1
print("train total time: {}".format(current_milli_time() - start))
_x_test, _y_test = make_moons(50,random_state=12345)
_x_test = TensorInPy(None,None, _x_test)
y_pred = 1.0 / (1 + ((_x_test @ w + b) * -1).map(np.exp))
from sklearn import metrics
auc = metrics.roc_auc_score(_y_test, y_pred.store.reshape(50))
print("auc: {}".format(auc))
示例30
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--save', type=str, default='work')
parser.add_argument('--nEpoch', type=int, default=100)
# parser.add_argument('--testBatchSz', type=int, default=2048)
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--model', type=str, default="picnn",
choices=['picnn', 'ficnn'])
parser.add_argument('--dataset', type=str, default="moons",
choices=['moons', 'circles', 'linear'])
parser.add_argument('--noncvx', action='store_true')
args = parser.parse_args()
npr.seed(args.seed)
tf.set_random_seed(args.seed)
setproctitle.setproctitle('bamos.icnn.synthetic.{}.{}'.format(args.model, args.dataset))
save = os.path.join(os.path.expanduser(args.save),
"{}.{}".format(args.model, args.dataset))
if os.path.isdir(save):
shutil.rmtree(save)
os.makedirs(save, exist_ok=True)
if args.dataset == "moons":
(dataX, dataY) = make_moons(noise=0.3, random_state=0)
elif args.dataset == "circles":
(dataX, dataY) = make_circles(noise=0.2, factor=0.5, random_state=0)
dataY = 1.-dataY
elif args.dataset == "linear":
(dataX, dataY) = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
dataX += 2 * rng.uniform(size=dataX.shape)
else:
assert(False)
dataY = dataY.reshape((-1, 1)).astype(np.float32)
nData = dataX.shape[0]
nFeatures = dataX.shape[1]
nLabels = 1
nXy = nFeatures + nLabels
config = tf.ConfigProto() #log_device_placement=False)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
model = Model(nFeatures, nLabels, sess, args.model, nGdIter=30)
model.train(args, dataX, dataY)