Python源码示例:sklearn.preprocessing.StandardScaler()
示例1
def classify_1nn(data_train, data_test):
'''
Classification using 1NN
Inputs: data_train, data_test: train and test csv file path
Outputs: yprediction and accuracy
'''
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
data = {'src': np.loadtxt(data_train, delimiter=','),
'tar': np.loadtxt(data_test, delimiter=','),
}
Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
1], data['tar'][:, :-1], data['tar'][:, -1]
Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(Xs, Ys)
ypred = clf.predict(Xt)
acc = accuracy_score(y_true=Yt, y_pred=ypred)
print('Acc: {:.4f}'.format(acc))
return ypred, acc
示例2
def test_similar_results(self):
global_seed(314159)
X = np.random.rand(100000, 5)
dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
dp_ss.fit(X)
sk_ss = sk_pp.StandardScaler()
sk_ss.fit(X)
self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
(dp_ss.mean_, sk_ss.mean_))
self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
(dp_ss.var_, sk_ss.var_))
self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
示例3
def test_accountant(self):
from diffprivlib.accountant import BudgetAccountant
acc = BudgetAccountant()
X = np.random.rand(10, 5)
ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc)
ss.fit(X)
self.assertEqual((1, 0), acc.total())
with BudgetAccountant(1.5, 0) as acc2:
ss = StandardScaler(epsilon=1, bounds=(0, 1))
ss.fit(X)
self.assertEqual((1, 0), acc2.total())
with self.assertRaises(BudgetError):
ss.fit(X)
self.assertEqual((1, 0), acc.total())
示例4
def pca(self, **kwargs):
if 'n_components' in kwargs:
nComp = kwargs['n_components']
else:
nComp = 0.995
if 'dates' in kwargs:
mat = self.to_matrix(kwargs['dates'])
else:
mat = self.to_matrix()
scaler = StandardScaler()
pca = PCA(n_components=nComp)
self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
self._pipeline.fit(mat)
if 'file' in kwargs:
tofile(kwargs['file'], self._pipeline)
return self._pipeline
示例5
def random_normal_draw(history, nb_samples, **kwargs):
"""Random normal distributed draws
Arguments:
history: numpy 2D array, with history along axis=0 and parameters
along axis=1
nb_samples: number of samples to draw
Returns:
numpy 2D array, with samples along axis=0 and parameters along axis=1
"""
scaler = StandardScaler()
scaler.fit(history)
scaled = scaler.transform(history)
sqrt_cov = sqrtm(empirical_covariance(scaled)).real
#Draw correlated random variables
#draws are generated transposed for convenience of the dot operation
draws = np.random.standard_normal((history.shape[-1], nb_samples))
draws = np.dot(sqrt_cov, draws)
draws = np.transpose(draws)
return scaler.inverse_transform(draws)
示例6
def fetch(self, n_tr, n_val, n_test, seed=0):
x, y = self.load()
# split data
x_tr, x_val, y_tr, y_val = train_test_split(
x, y, train_size=n_tr, test_size=n_val+n_test, random_state=seed)
x_val, x_test, y_val, y_test = train_test_split(
x_val, y_val, train_size=n_val, test_size=n_test, random_state=seed+1)
# process x
if self.normalize:
scaler = StandardScaler()
scaler.fit(x_tr)
x_tr = scaler.transform(x_tr)
x_val = scaler.transform(x_val)
x_test = scaler.transform(x_test)
if self.append_one:
x_tr = np.c_[x_tr, np.ones(n_tr)]
x_val = np.c_[x_val, np.ones(n_val)]
x_test = np.c_[x_test, np.ones(n_test)]
return (x_tr, y_tr), (x_val, y_val), (x_test, y_test)
示例7
def test_invalid_test_size(self):
rng = np.random.RandomState(seed=7)
with self.assertRaises(TypeError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=1, batch_size=0,
is_ordered=True, seed=7)
with self.assertRaises(ValueError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=50.0, batch_size=0,
is_ordered=True, seed=7)
示例8
def test_invalid_batch_size(self):
rng = np.random.RandomState(seed=7)
with self.assertRaises(TypeError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=0.4, batch_size=0.5,
is_ordered=True, seed=7)
with self.assertRaises(ValueError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=0.4, batch_size=10,
is_ordered=True, seed=7)
示例9
def test_invalid_log_format(self):
rng = np.random.RandomState(seed=7)
with self.assertRaises(TypeError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=True, seed=7, log_format=7)
with self.assertRaises(TypeError):
Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
decisions=[rng.randint(0, 2) for _ in range(10)],
rewards=[rng.randint(0, 100) for _ in range(10)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=True, seed=7, log_format=None)
示例10
def test_simulator_mixed(self):
size = 100
decisions = [random.randint(0, 2) for _ in range(size)]
rewards = [random.randint(0, 1000) for _ in range(size)]
contexts = [[random.random() for _ in range(50)] for _ in range(size)]
n_jobs = 1
mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]
sim = Simulator(mixed, decisions, rewards, contexts,
scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
sim.run()
self.assertTrue(sim.bandit_to_confusion_matrices)
self.assertTrue(sim.bandit_to_predictions)
示例11
def test_simulator_hyper_parameter(self):
size = 100
decisions = [random.randint(0, 2) for _ in range(size)]
rewards = [random.randint(0, 1000) for _ in range(size)]
contexts = [[random.random() for _ in range(50)] for _ in range(size)]
n_jobs = 1
hyper_parameter_tuning = []
for radius in range(6, 10):
hyper_parameter_tuning.append(('Radius' + str(radius),
MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
n_jobs=n_jobs)))
sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts,
scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456,
is_quick=True)
sim.run()
self.assertTrue(sim.bandit_to_confusion_matrices)
self.assertTrue(sim.bandit_to_predictions)
示例12
def test_unused_arm_scaled2(self):
context_history = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5],
[1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5],
[1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64')
scaler = StandardScaler()
scaled_contexts = scaler.fit_transform(context_history)
scaled_predict = scaler.transform(np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))
arms, mab = self.predict(arms=[1, 2, 3, 4],
decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
learning_policy=LearningPolicy.LinUCB(alpha=1),
context_history=scaled_contexts,
contexts=scaled_predict,
seed=123456,
num_run=1,
is_predict=True)
self.assertEqual(arms, [4, 4])
示例13
def test_contextual_offline(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(20)],
rewards=[rng.randint(0, 2) for _ in range(20)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=True, seed=7)
示例14
def test_contextual_offline_run_n_jobs(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp, n_jobs=2)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para, n_jobs=2)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(20)],
rewards=[rng.randint(0, 2) for _ in range(20)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=True, seed=7)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
示例15
def test_contextual_online(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(100)],
rewards=[rng.randint(0, 2) for _ in range(100)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
scaler=StandardScaler(), test_size=0.4, batch_size=5,
is_ordered=True, seed=7)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())
示例16
def test_contextual_quick(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(20)],
rewards=[rng.randint(0, 2) for _ in range(20)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=True, seed=7, is_quick=True)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
示例17
def test_contextual_online_quick(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(100)],
rewards=[rng.randint(0, 2) for _ in range(100)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
scaler=StandardScaler(), test_size=0.4, batch_size=5,
is_ordered=True, seed=7, is_quick=True)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())
示例18
def test_contextual_unordered(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(20)],
rewards=[rng.randint(0, 2) for _ in range(20)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
scaler=StandardScaler(), test_size=0.4, batch_size=0,
is_ordered=False, seed=7)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
示例19
def test_contextual_unordered_online(self):
rng = np.random.RandomState(seed=7)
bandits = []
counter = 0
for cp in TestSimulator.nps:
for lp in TestSimulator.lps:
bandits.append((str(counter), MAB([0, 1], lp, cp)))
counter += 1
for para in TestSimulator.parametric:
bandits.append((str(counter), MAB([0, 1], para)))
counter += 1
sim = Simulator(bandits=bandits,
decisions=[rng.randint(0, 2) for _ in range(100)],
rewards=[rng.randint(0, 2) for _ in range(100)],
contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
scaler=StandardScaler(), test_size=0.4, batch_size=5,
is_ordered=False, seed=7)
sim.run()
self.assertTrue(bool(sim.arm_to_stats_total))
self.assertTrue(bool(sim.bandit_to_predictions))
self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys())
示例20
def preprocess_data(csv_data):
credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
# print(credit_card_data.head())
credit_card_np_data = credit_card_data.as_matrix()
y_true = csv_data['Class'].as_matrix()
return credit_card_np_data, y_true
示例21
def preprocess_data(csv_data):
credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
# print(credit_card_data.head())
credit_card_np_data = credit_card_data.as_matrix()
y_true = csv_data['Class'].as_matrix()
return credit_card_np_data, y_true
示例22
def standard_scale(X_train, X_test):
preprocessor = prep.StandardScaler().fit(X_train)
X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test
示例23
def standard_scale(X_train, X_test):
preprocessor = prep.StandardScaler().fit(X_train)
X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test
示例24
def standard_scale(X_train, X_test):
preprocessor = prep.StandardScaler().fit(X_train)
X_train = preprocessor.transform(X_train)
X_test = preprocessor.transform(X_test)
return X_train, X_test
示例25
def classify_1nn():
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
data = {'src': np.loadtxt(args.source + '_' + args.source + '.csv', delimiter=','),
'tar': np.loadtxt(args.source + '_' + args.target + '.csv', delimiter=','),
}
Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -1], data['tar'][:, :-1], data['tar'][:, -1]
Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(Xs, Ys)
ypred = clf.predict(Xt)
acc = accuracy_score(y_true=Yt, y_pred=ypred)
print('{} - {}: acc: {:.4f}'.format(args.source, args.target, acc))
示例26
def test_class(self):
from sklearn.base import BaseEstimator
self.assertTrue(issubclass(StandardScaler, BaseEstimator))
示例27
def test_not_none(self):
ss = StandardScaler()
self.assertIsNotNone(ss)
示例28
def test_no_range(self):
X = np.random.rand(10, 5)
ss = StandardScaler()
with self.assertWarns(PrivacyLeakWarning):
ss.fit(X)
示例29
def test_inf_epsilon(self):
X = np.random.rand(10, 5)
dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
dp_ss.fit(X)
sk_ss = sk_pp.StandardScaler()
sk_ss.fit(X)
self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_), "Arrays %s and %s should be the same" %
(dp_ss.mean_, sk_ss.mean_))
self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_), "Arrays %s and %s should be the same" %
(dp_ss.var_, sk_ss.var_))
self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
示例30
def test_different_results(self):
X = np.random.rand(10, 5)
ss1 = StandardScaler(bounds=(0, 1))
ss1.fit(X)
ss2 = StandardScaler(bounds=(0, 1))
ss2.fit(X)
self.assertFalse(np.allclose(ss1.mean_, ss2.mean_), "Arrays %s and %s should be the same" %
(ss1.mean_, ss2.mean_))
self.assertFalse(np.allclose(ss1.var_, ss2.var_), "Arrays %s and %s should be the same" %
(ss1.var_, ss2.var_))