Python源码示例:sklearn.datasets.load_wine()
示例1
def test_load_wine():
res = load_wine()
assert_equal(res.data.shape, (178, 13))
assert_equal(res.target.size, 178)
assert_equal(res.target_names.size, 3)
assert res.DESCR
# test return_X_y option
check_return_X_y(res, partial(load_wine))
示例2
def generate_multiclass_figures():
oversamplers= sv.get_all_oversamplers()
oversamplers= [o for o in oversamplers if not sv.OverSampling.cat_changes_majority in o.categories and 'proportion' in o().get_params()]
import sklearn.datasets as datasets
dataset= datasets.load_wine()
X= dataset['data']
y= dataset['target']
import matplotlib.pyplot as plt
import sklearn.preprocessing as preprocessing
ss= preprocessing.StandardScaler()
X_ss= ss.fit_transform(X)
def plot_and_save(X, y, filename, oversampler_name):
plt.figure(figsize=(4, 3))
plt.scatter(X[y == 0][:,0], X[y == 0][:,1], c='r', marker='o', label='class 0')
plt.scatter(X[y == 1][:,0], X[y == 1][:,1], c='b', marker='P', label='class 1')
plt.scatter(X[y == 2][:,0], X[y == 2][:,1], c='green', marker='x', label='class 2')
plt.xlabel('feature 0')
plt.ylabel('feature 1')
plt.title(", ".join(["wine dataset", oversampler_name]))
plt.savefig(filename)
plt.show()
plot_and_save(X, y, 'figures/multiclass-base.png', "No Oversampling")
for o in oversamplers:
print(o.__name__)
mcos= sv.MulticlassOversampling(o())
X_samp, y_samp= mcos.sample(X_ss, y)
plot_and_save(ss.inverse_transform(X_samp), y_samp, "figures/multiclass-%s" % o.__name__, o.__name__)
示例3
def test_multiclass(self):
dataset = datasets.load_wine()
oversampler = sv.MulticlassOversampling(sv.distance_SMOTE())
X_samp, y_samp = oversampler.sample(dataset['data'], dataset['target'])
self.assertTrue(len(X_samp) > 0)
oversampler = sv.MulticlassOversampling(
sv.distance_SMOTE(), strategy='equalize_1_vs_many')
X_samp, y_samp = oversampler.sample(dataset['data'], dataset['target'])
self.assertTrue(len(X_samp) > 0)
示例4
def test_mlp_wrapper(self):
dataset = datasets.load_wine()
classifier = sv.MLPClassifierWrapper()
classifier.fit(dataset['data'], dataset['target'])
self.assertTrue(classifier is not None)
示例5
def test_cross_validate(self):
X = np.vstack([data_min, data_maj])
y = np.hstack([np.repeat(1, len(data_min)),
np.repeat(0, len(data_maj))])
# setting cache path
cache_path = os.path.join(os.path.expanduser('~'), 'smote_test')
if not os.path.exists(cache_path):
os.mkdir(cache_path)
# prepare dataset
dataset = {'data': X, 'target': y, 'name': 'ballpark_data'}
# instantiating classifiers
knn_classifier = KNeighborsClassifier()
# instantiate the validation object
results = sv.cross_validate(dataset=dataset,
sampler=sv.SMOTE(),
classifier=knn_classifier)
self.assertTrue(len(results) > 0)
dataset = datasets.load_wine()
results = sv.cross_validate(dataset=dataset,
sampler=sv.SMOTE(),
classifier=knn_classifier)
self.assertTrue(len(results) > 0)
示例6
def wine():
from sklearn.datasets import load_wine
data = load_wine().data
missing_data, full_data = create_data(data)
h5_file = h5py.File('wine.hdf5', 'w')
h5_file['missing'] = missing_data
h5_file['full'] = full_data
h5_file.close()
示例7
def objective(trial):
# Clear clutter from previous Keras session graphs.
clear_session()
X, y = load_wine(return_X_y=True)
X = standardize(X)
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size=TEST_SIZE, random_state=42
)
model = create_model(X.shape[1], trial)
model.fit(X_train, y_train, shuffle=True, batch_size=BATCHSIZE, epochs=EPOCHS, verbose=False)
return model.evaluate(X_valid, y_valid, verbose=0)
示例8
def wine():
return Xy_dataset(load_wine)
示例9
def test_load_wine():
res = load_wine()
assert_equal(res.data.shape, (178, 13))
assert_equal(res.target.size, 178)
assert_equal(res.target_names.size, 3)
assert_true(res.DESCR)
# test return_X_y option
X_y_tuple = load_wine(return_X_y=True)
bunch = load_wine()
assert_true(isinstance(X_y_tuple, tuple))
assert_array_equal(X_y_tuple[0], bunch.data)
assert_array_equal(X_y_tuple[1], bunch.target)
示例10
def wine(training_size, test_size, n, plot_data=False):
""" returns wine dataset """
class_labels = [r'A', r'B', r'C']
data, target = datasets.load_wine(return_X_y=True)
sample_train, sample_test, label_train, label_test = \
train_test_split(data, target, test_size=test_size, random_state=7)
# Now we standardize for gaussian around 0 with unit variance
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)
# Now reduce number of features to number of qubits
pca = PCA(n_components=n).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)
# Scale to the range (-1,+1)
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)
# Pick training size number of samples from each distro
training_input = {key: (sample_train[label_train == k, :])[:training_size]
for k, key in enumerate(class_labels)}
test_input = {key: (sample_test[label_test == k, :])[:test_size]
for k, key in enumerate(class_labels)}
if plot_data:
try:
import matplotlib.pyplot as plt
except ImportError:
raise NameError('Matplotlib not installed. Please install it before plotting')
for k in range(0, 3):
plt.scatter(sample_train[label_train == k, 0][:training_size],
sample_train[label_train == k, 1][:training_size])
plt.title("PCA dim. reduced Wine dataset")
plt.show()
return sample_train, training_input, test_input, class_labels