Python源码示例:sklearn.datasets.load_wine()

示例1
def test_load_wine():
    res = load_wine()
    assert_equal(res.data.shape, (178, 13))
    assert_equal(res.target.size, 178)
    assert_equal(res.target_names.size, 3)
    assert res.DESCR

    # test return_X_y option
    check_return_X_y(res, partial(load_wine)) 
示例2
def generate_multiclass_figures():
    oversamplers= sv.get_all_oversamplers()
    oversamplers= [o for o in oversamplers if not sv.OverSampling.cat_changes_majority in o.categories and 'proportion' in o().get_params()]
    
    import sklearn.datasets as datasets
    
    dataset= datasets.load_wine()
    
    X= dataset['data']
    y= dataset['target']
    
    import matplotlib.pyplot as plt
    
    import sklearn.preprocessing as preprocessing
    
    ss= preprocessing.StandardScaler()
    
    X_ss= ss.fit_transform(X)
    
    def plot_and_save(X, y, filename, oversampler_name):
        plt.figure(figsize=(4, 3))
        plt.scatter(X[y == 0][:,0], X[y == 0][:,1], c='r', marker='o', label='class 0')
        plt.scatter(X[y == 1][:,0], X[y == 1][:,1], c='b', marker='P', label='class 1')
        plt.scatter(X[y == 2][:,0], X[y == 2][:,1], c='green', marker='x', label='class 2')
        plt.xlabel('feature 0')
        plt.ylabel('feature 1')
        plt.title(", ".join(["wine dataset", oversampler_name]))
        plt.savefig(filename)
        plt.show()
    
    plot_and_save(X, y, 'figures/multiclass-base.png', "No Oversampling")
    
    for o in oversamplers:
        print(o.__name__)
        mcos= sv.MulticlassOversampling(o())
        X_samp, y_samp= mcos.sample(X_ss, y)
        plot_and_save(ss.inverse_transform(X_samp), y_samp, "figures/multiclass-%s" % o.__name__, o.__name__) 
示例3
def test_multiclass(self):
        dataset = datasets.load_wine()

        oversampler = sv.MulticlassOversampling(sv.distance_SMOTE())

        X_samp, y_samp = oversampler.sample(dataset['data'], dataset['target'])

        self.assertTrue(len(X_samp) > 0)

        oversampler = sv.MulticlassOversampling(
            sv.distance_SMOTE(), strategy='equalize_1_vs_many')

        X_samp, y_samp = oversampler.sample(dataset['data'], dataset['target'])

        self.assertTrue(len(X_samp) > 0) 
示例4
def test_mlp_wrapper(self):
        dataset = datasets.load_wine()
        classifier = sv.MLPClassifierWrapper()
        classifier.fit(dataset['data'], dataset['target'])

        self.assertTrue(classifier is not None) 
示例5
def test_cross_validate(self):
        X = np.vstack([data_min, data_maj])
        y = np.hstack([np.repeat(1, len(data_min)),
                       np.repeat(0, len(data_maj))])

        # setting cache path
        cache_path = os.path.join(os.path.expanduser('~'), 'smote_test')
        if not os.path.exists(cache_path):
            os.mkdir(cache_path)

        # prepare dataset
        dataset = {'data': X, 'target': y, 'name': 'ballpark_data'}

        # instantiating classifiers
        knn_classifier = KNeighborsClassifier()

        # instantiate the validation object
        results = sv.cross_validate(dataset=dataset,
                                    sampler=sv.SMOTE(),
                                    classifier=knn_classifier)

        self.assertTrue(len(results) > 0)

        dataset = datasets.load_wine()

        results = sv.cross_validate(dataset=dataset,
                                    sampler=sv.SMOTE(),
                                    classifier=knn_classifier)

        self.assertTrue(len(results) > 0) 
示例6
def wine():
    from sklearn.datasets import load_wine
    data = load_wine().data
    missing_data, full_data = create_data(data)
    h5_file = h5py.File('wine.hdf5', 'w')
    h5_file['missing'] = missing_data
    h5_file['full'] = full_data
    h5_file.close() 
示例7
def objective(trial):
    # Clear clutter from previous Keras session graphs.
    clear_session()

    X, y = load_wine(return_X_y=True)
    X = standardize(X)
    X_train, X_valid, y_train, y_valid = train_test_split(
        X, y, test_size=TEST_SIZE, random_state=42
    )

    model = create_model(X.shape[1], trial)
    model.fit(X_train, y_train, shuffle=True, batch_size=BATCHSIZE, epochs=EPOCHS, verbose=False)

    return model.evaluate(X_valid, y_valid, verbose=0) 
示例8
def wine():
    return Xy_dataset(load_wine) 
示例9
def test_load_wine():
    res = load_wine()
    assert_equal(res.data.shape, (178, 13))
    assert_equal(res.target.size, 178)
    assert_equal(res.target_names.size, 3)
    assert_true(res.DESCR)

    # test return_X_y option
    X_y_tuple = load_wine(return_X_y=True)
    bunch = load_wine()
    assert_true(isinstance(X_y_tuple, tuple))
    assert_array_equal(X_y_tuple[0], bunch.data)
    assert_array_equal(X_y_tuple[1], bunch.target) 
示例10
def wine(training_size, test_size, n, plot_data=False):
    """ returns wine dataset """
    class_labels = [r'A', r'B', r'C']

    data, target = datasets.load_wine(return_X_y=True)
    sample_train, sample_test, label_train, label_test = \
        train_test_split(data, target, test_size=test_size, random_state=7)

    # Now we standardize for gaussian around 0 with unit variance
    std_scale = StandardScaler().fit(sample_train)
    sample_train = std_scale.transform(sample_train)
    sample_test = std_scale.transform(sample_test)

    # Now reduce number of features to number of qubits
    pca = PCA(n_components=n).fit(sample_train)
    sample_train = pca.transform(sample_train)
    sample_test = pca.transform(sample_test)

    # Scale to the range (-1,+1)
    samples = np.append(sample_train, sample_test, axis=0)
    minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
    sample_train = minmax_scale.transform(sample_train)
    sample_test = minmax_scale.transform(sample_test)
    # Pick training size number of samples from each distro
    training_input = {key: (sample_train[label_train == k, :])[:training_size]
                      for k, key in enumerate(class_labels)}
    test_input = {key: (sample_test[label_test == k, :])[:test_size]
                  for k, key in enumerate(class_labels)}

    if plot_data:
        try:
            import matplotlib.pyplot as plt
        except ImportError:
            raise NameError('Matplotlib not installed. Please install it before plotting')
        for k in range(0, 3):
            plt.scatter(sample_train[label_train == k, 0][:training_size],
                        sample_train[label_train == k, 1][:training_size])

        plt.title("PCA dim. reduced Wine dataset")
        plt.show()

    return sample_train, training_input, test_input, class_labels