Python源码示例:sklearn.preprocessing.StandardScaler()

示例1
def classify_1nn(data_train, data_test):
    '''
    Classification using 1NN
    Inputs: data_train, data_test: train and test csv file path
    Outputs: yprediction and accuracy
    '''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(data_train, delimiter=','),
            'tar': np.loadtxt(data_test, delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -
                                                      1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('Acc: {:.4f}'.format(acc))
    return ypred, acc 
示例2
def test_similar_results(self):
        global_seed(314159)

        X = np.random.rand(100000, 5)

        dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
        dp_ss.fit(X)

        sk_ss = sk_pp.StandardScaler()
        sk_ss.fit(X)

        self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.mean_, sk_ss.mean_))
        self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_, rtol=1, atol=1e-4), "Arrays %s and %s should be close" %
                        (dp_ss.var_, sk_ss.var_))
        self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_)) 
示例3
def test_accountant(self):
        from diffprivlib.accountant import BudgetAccountant
        acc = BudgetAccountant()

        X = np.random.rand(10, 5)
        ss = StandardScaler(epsilon=1, bounds=(0, 1), accountant=acc)
        ss.fit(X)
        self.assertEqual((1, 0), acc.total())

        with BudgetAccountant(1.5, 0) as acc2:
            ss = StandardScaler(epsilon=1, bounds=(0, 1))
            ss.fit(X)
            self.assertEqual((1, 0), acc2.total())

            with self.assertRaises(BudgetError):
                ss.fit(X)

        self.assertEqual((1, 0), acc.total()) 
示例4
def pca(self, **kwargs):
        if 'n_components' in kwargs:
            nComp = kwargs['n_components']
        else:
            nComp = 0.995

        if 'dates' in kwargs:
            mat = self.to_matrix(kwargs['dates'])
        else:
            mat = self.to_matrix()
        scaler = StandardScaler()
        pca = PCA(n_components=nComp)
        self._pipeline = Pipeline([('scaler', scaler), ('pca', pca)])
        self._pipeline.fit(mat)
        
        if 'file' in kwargs:
            tofile(kwargs['file'], self._pipeline)
        
        return self._pipeline 
示例5
def random_normal_draw(history, nb_samples, **kwargs):
    """Random normal distributed draws
    
    Arguments:
        history: numpy 2D array, with history along axis=0 and parameters 
            along axis=1
        nb_samples: number of samples to draw
        
    Returns:
        numpy 2D array, with samples along axis=0 and parameters along axis=1
    """
    scaler = StandardScaler()
    scaler.fit(history)
    scaled = scaler.transform(history)
    sqrt_cov = sqrtm(empirical_covariance(scaled)).real
    
    #Draw correlated random variables
    #draws are generated transposed for convenience of the dot operation
    draws = np.random.standard_normal((history.shape[-1], nb_samples))
    draws = np.dot(sqrt_cov, draws)
    draws = np.transpose(draws)
    return scaler.inverse_transform(draws) 
示例6
def fetch(self, n_tr, n_val, n_test, seed=0):
        x, y = self.load()
        
        # split data
        x_tr, x_val, y_tr, y_val = train_test_split(
            x, y, train_size=n_tr, test_size=n_val+n_test, random_state=seed)
        x_val, x_test, y_val, y_test = train_test_split(
            x_val, y_val, train_size=n_val, test_size=n_test, random_state=seed+1)
        
        # process x
        if self.normalize:
            scaler = StandardScaler()
            scaler.fit(x_tr)
            x_tr = scaler.transform(x_tr)
            x_val = scaler.transform(x_val)
            x_test = scaler.transform(x_test)
        if self.append_one:
            x_tr = np.c_[x_tr, np.ones(n_tr)]
            x_val = np.c_[x_val, np.ones(n_val)]
            x_test = np.c_[x_test, np.ones(n_test)]
        
        return (x_tr, y_tr), (x_val, y_val), (x_test, y_test) 
示例7
def test_invalid_test_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=1, batch_size=0,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=50.0, batch_size=0,
                      is_ordered=True, seed=7) 
示例8
def test_invalid_batch_size(self):
        rng = np.random.RandomState(seed=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0.5,
                      is_ordered=True, seed=7)

        with self.assertRaises(ValueError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=10,
                      is_ordered=True, seed=7) 
示例9
def test_invalid_log_format(self):
        rng = np.random.RandomState(seed=7)
        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=7)

        with self.assertRaises(TypeError):
            Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))],
                      decisions=[rng.randint(0, 2) for _ in range(10)],
                      rewards=[rng.randint(0, 100) for _ in range(10)],
                      contexts=[[rng.rand() for _ in range(5)] for _ in range(10)],
                      scaler=StandardScaler(), test_size=0.4, batch_size=0,
                      is_ordered=True, seed=7, log_format=None) 
示例10
def test_simulator_mixed(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)),
                 ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))]

        sim = Simulator(mixed, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions) 
示例11
def test_simulator_hyper_parameter(self):
        size = 100

        decisions = [random.randint(0, 2) for _ in range(size)]
        rewards = [random.randint(0, 1000) for _ in range(size)]
        contexts = [[random.random() for _ in range(50)] for _ in range(size)]

        n_jobs = 1
        hyper_parameter_tuning = []
        for radius in range(6, 10):
            hyper_parameter_tuning.append(('Radius' + str(radius),
                                           MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius),
                                               n_jobs=n_jobs)))

        sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts,
                        scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456,
                        is_quick=True)
        sim.run()

        self.assertTrue(sim.bandit_to_confusion_matrices)
        self.assertTrue(sim.bandit_to_predictions) 
示例12
def test_unused_arm_scaled2(self):

        context_history = np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1], [0, 0, 1, 0, 0], [0, 2, 2, 3, 5],
                                    [1, 3, 1, 1, 1], [0, 0, 0, 0, 0], [0, 1, 4, 3, 5], [0, 1, 2, 4, 5],
                                    [1, 2, 1, 1, 3], [0, 2, 1, 0, 0]], dtype='float64')

        scaler = StandardScaler()
        scaled_contexts = scaler.fit_transform(context_history)
        scaled_predict = scaler.transform(np.array([[0, 1, 2, 3, 5], [1, 1, 1, 1, 1]], dtype='float64'))

        arms, mab = self.predict(arms=[1, 2, 3, 4],
                                 decisions=[1, 1, 1, 2, 2, 3, 3, 3, 3, 3],
                                 rewards=[0, 0, 1, 0, 0, 0, 0, 1, 1, 1],
                                 learning_policy=LearningPolicy.LinUCB(alpha=1),
                                 context_history=scaled_contexts,
                                 contexts=scaled_predict,
                                 seed=123456,
                                 num_run=1,
                                 is_predict=True)

        self.assertEqual(arms, [4, 4]) 
示例13
def test_contextual_offline(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7) 
示例14
def test_contextual_offline_run_n_jobs(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp, n_jobs=2)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para, n_jobs=2)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
示例15
def test_contextual_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
示例16
def test_contextual_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
示例17
def test_contextual_online_quick(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=True, seed=7, is_quick=True)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
示例18
def test_contextual_unordered(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(20)],
                        rewards=[rng.randint(0, 2) for _ in range(20)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(20)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=0,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions)) 
示例19
def test_contextual_unordered_online(self):
        rng = np.random.RandomState(seed=7)
        bandits = []
        counter = 0
        for cp in TestSimulator.nps:
            for lp in TestSimulator.lps:
                bandits.append((str(counter), MAB([0, 1], lp, cp)))
                counter += 1

        for para in TestSimulator.parametric:
            bandits.append((str(counter), MAB([0, 1], para)))
            counter += 1

        sim = Simulator(bandits=bandits,
                        decisions=[rng.randint(0, 2) for _ in range(100)],
                        rewards=[rng.randint(0, 2) for _ in range(100)],
                        contexts=[[rng.rand() for _ in range(5)] for _ in range(100)],
                        scaler=StandardScaler(), test_size=0.4, batch_size=5,
                        is_ordered=False, seed=7)
        sim.run()
        self.assertTrue(bool(sim.arm_to_stats_total))
        self.assertTrue(bool(sim.bandit_to_predictions))
        self.assertTrue('total' in sim.bandit_to_arm_to_stats_max['0'].keys()) 
示例20
def preprocess_data(csv_data):
    credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
    credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
    # print(credit_card_data.head())
    credit_card_np_data = credit_card_data.as_matrix()
    y_true = csv_data['Class'].as_matrix()
    return credit_card_np_data, y_true 
示例21
def preprocess_data(csv_data):
    credit_card_data = csv_data.drop(labels=['Class', 'Time'], axis=1)
    credit_card_data['Amount'] = StandardScaler().fit_transform(credit_card_data['Amount'].values.reshape(-1, 1))
    # print(credit_card_data.head())
    credit_card_np_data = credit_card_data.as_matrix()
    y_true = csv_data['Class'].as_matrix()
    return credit_card_np_data, y_true 
示例22
def standard_scale(X_train, X_test):
    preprocessor = prep.StandardScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test 
示例23
def standard_scale(X_train, X_test):
    preprocessor = prep.StandardScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test 
示例24
def standard_scale(X_train, X_test):
    preprocessor = prep.StandardScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test 
示例25
def classify_1nn():
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from sklearn.preprocessing import StandardScaler
    data = {'src': np.loadtxt(args.source + '_' + args.source + '.csv', delimiter=','),
            'tar': np.loadtxt(args.source + '_' + args.target + '.csv', delimiter=','),
            }
    Xs, Ys, Xt, Yt = data['src'][:, :-1], data['src'][:, -1], data['tar'][:, :-1], data['tar'][:, -1]
    Xs = StandardScaler(with_mean=0, with_std=1).fit_transform(Xs)
    Xt = StandardScaler(with_mean=0, with_std=1).fit_transform(Xt)
    clf = KNeighborsClassifier(n_neighbors=1)
    clf.fit(Xs, Ys)
    ypred = clf.predict(Xt)
    acc = accuracy_score(y_true=Yt, y_pred=ypred)
    print('{} - {}: acc: {:.4f}'.format(args.source, args.target, acc)) 
示例26
def test_class(self):
        from sklearn.base import BaseEstimator
        self.assertTrue(issubclass(StandardScaler, BaseEstimator)) 
示例27
def test_not_none(self):
        ss = StandardScaler()
        self.assertIsNotNone(ss) 
示例28
def test_no_range(self):
        X = np.random.rand(10, 5)
        ss = StandardScaler()

        with self.assertWarns(PrivacyLeakWarning):
            ss.fit(X) 
示例29
def test_inf_epsilon(self):
        X = np.random.rand(10, 5)

        dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"))
        dp_ss.fit(X)

        sk_ss = sk_pp.StandardScaler()
        sk_ss.fit(X)

        self.assertTrue(np.allclose(dp_ss.mean_, sk_ss.mean_), "Arrays %s and %s should be the same" %
                        (dp_ss.mean_, sk_ss.mean_))
        self.assertTrue(np.allclose(dp_ss.var_, sk_ss.var_), "Arrays %s and %s should be the same" %
                        (dp_ss.var_, sk_ss.var_))
        self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_)) 
示例30
def test_different_results(self):
        X = np.random.rand(10, 5)

        ss1 = StandardScaler(bounds=(0, 1))
        ss1.fit(X)

        ss2 = StandardScaler(bounds=(0, 1))
        ss2.fit(X)

        self.assertFalse(np.allclose(ss1.mean_, ss2.mean_), "Arrays %s and %s should be the same" %
                        (ss1.mean_, ss2.mean_))
        self.assertFalse(np.allclose(ss1.var_, ss2.var_), "Arrays %s and %s should be the same" %
                        (ss1.var_, ss2.var_))