Python源码示例:sklearn.datasets.load_iris()
示例1
def test_different_results(self):
from sklearn import datasets
from sklearn import linear_model
from sklearn.model_selection import train_test_split
dataset = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
clf = LogisticRegression(data_norm=12)
clf.fit(X_train, y_train)
predict1 = clf.predict(X_test)
clf = LogisticRegression(data_norm=12)
clf.fit(X_train, y_train)
predict2 = clf.predict(X_test)
clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
clf.fit(X_train, y_train)
predict3 = clf.predict(X_test)
self.assertFalse(np.all(predict1 == predict2))
self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2))
示例2
def test_same_results(self):
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import linear_model
dataset = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
clf = LogisticRegression(data_norm=12, epsilon=float("inf"))
clf.fit(X_train, y_train)
predict1 = clf.predict(X_test)
clf = linear_model.LogisticRegression(solver="lbfgs", multi_class="ovr")
clf.fit(X_train, y_train)
predict2 = clf.predict(X_test)
self.assertTrue(np.all(predict1 == predict2))
示例3
def test_different_results(self):
from sklearn import datasets
from sklearn import linear_model
from sklearn.model_selection import train_test_split
dataset = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
clf.fit(X_train, y_train)
predict1 = clf.predict(X_test)
clf = LinearRegression(data_norm=12, bounds_X=([4.3, 2.0, 1.1, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
clf.fit(X_train, y_train)
predict2 = clf.predict(X_test)
clf = linear_model.LinearRegression()
clf.fit(X_train, y_train)
predict3 = clf.predict(X_test)
self.assertFalse(np.all(predict1 == predict2))
self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2))
示例4
def test_same_results(self):
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import linear_model
dataset = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
clf = LinearRegression(data_norm=12, epsilon=float("inf"),
bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]), bounds_y=(0, 2))
clf.fit(X_train, y_train)
predict1 = clf.predict(X_test)
clf = linear_model.LinearRegression(normalize=False)
clf.fit(X_train, y_train)
predict2 = clf.predict(X_test)
self.assertTrue(np.allclose(predict1, predict2))
示例5
def test_different_results(self):
from sklearn.naive_bayes import GaussianNB as sk_nb
from sklearn import datasets
global_seed(12345)
dataset = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)
bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])
clf_dp = GaussianNB(epsilon=1.0, bounds=bounds)
clf_non_private = sk_nb()
for clf in [clf_dp, clf_non_private]:
clf.fit(x_train, y_train)
same_prediction = clf_dp.predict(x_test) == clf_non_private.predict(x_test)
self.assertFalse(np.all(same_prediction))
示例6
def test_with_iris(self):
global_seed(12345)
from sklearn import datasets
dataset = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=.2)
bounds = ([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5])
clf = GaussianNB(epsilon=5.0, bounds=bounds)
clf.fit(x_train, y_train)
accuracy = clf.score(x_test, y_test)
counts = clf.class_count_.copy()
self.assertGreater(accuracy, 0.5)
clf.partial_fit(x_train, y_train)
new_counts = clf.class_count_
self.assertEqual(np.sum(new_counts), np.sum(counts) * 2)
示例7
def setUp(self):
iris = load_iris()
theano.config.floatX = 'float32'
X = iris.data.astype(theano.config.floatX)
y = iris.target.astype(np.int32)
y_ohe = np_utils.to_categorical(y)
model = Sequential()
model.add(Dense(input_dim=X.shape[1], output_dim=5, activation='tanh'))
model.add(Dense(input_dim=5, output_dim=y_ohe.shape[1], activation='sigmoid'))
model.compile(loss='categorical_crossentropy', optimizer='sgd')
model.fit(X, y_ohe, nb_epoch=10, batch_size=1, verbose=3, validation_data=None)
params = {'copyright': 'Václav Čadek', 'model_name': 'Iris Model'}
self.model = model
self.pmml = keras2pmml(self.model, **params)
self.num_inputs = self.model.input_shape[1]
self.num_outputs = self.model.output_shape[1]
self.num_connection_layers = len(self.model.layers)
self.features = ['x{}'.format(i) for i in range(self.num_inputs)]
self.class_values = ['y{}'.format(i) for i in range(self.num_outputs)]
示例8
def test_bagged_imputer_classification():
iris = load_iris()
# make DF, add species col
X = pd.DataFrame.from_records(data=iris.data, columns=iris.feature_names)
X['species'] = iris.target
# shuffle...
X = shuffle_dataframe(X)
# set random indices to be null.. 15% should be good
rands = np.random.rand(X.shape[0])
mask = rands > 0.85
X['species'].iloc[mask] = np.nan
# define imputer, assert no missing
imputer = BaggedCategoricalImputer(cols=['species'])
y = imputer.fit_transform(X)
assert y['species'].isnull().sum() == 0, 'expected no null...'
# now test with a different estimator
imputer = BaggedCategoricalImputer(cols=['species'], base_estimator=RandomForestClassifier())
y = imputer.fit_transform(X)
assert y['species'].isnull().sum() == 0, 'expected no null...'
示例9
def test_few_classification():
"""test_few.py: tests default classification settings"""
np.random.seed(42)
X, y = load_iris(return_X_y=True)
train,test = train_test_split(np.arange(X.shape[0]), train_size=0.75,
test_size=0.25)
few = FEW(classification=True,population_size='1x',generations=10)
few.fit(X[train],y[train])
print('train score:', few.score(X[train],y[train]))
print('test score:', few.score(X[test],y[test]))
# test boolean output
few = FEW(classification=True,otype='b',population_size='2x',
seed_with_ml=False,generations=10)
np.random.seed(42)
few.fit(X[train],y[train])
print('train score:', few.score(X[train],y[train]))
print('test score:', few.score(X[test],y[test]))
few.print_model()
示例10
def main():
raw_data = load_iris()
data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
pipeline = FeatureUnion([
("1", make_pipeline(
FunctionTransformer(lambda X: X.loc[:, ["sepal length (cm)"]]),
# other transformations
)),
("2", make_pipeline(
FunctionTransformer(lambda X: X.loc[:, ["sepal width (cm)"]]),
# other transformations
))
])
X = pipeline.fit_transform(data)
print(X["sepal length (cm)"].mean())
print(X["sepal width (cm)"].mean())
示例11
def main():
raw_data = load_iris()
data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
data.loc[:, "class"] = raw_data["target"]
pipeline = PandasFeatureUnion([
("1", make_pipeline(
PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
# other transformations
)),
("2", make_pipeline(
PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
# other transformations
))
])
X = pipeline.fit_transform(data)
print(X["sepal length (cm)"].mean())
print(X["sepal width (cm)"].mean())
示例12
def main():
raw_data = load_iris()
data = pd.DataFrame(raw_data["data"], columns=raw_data["feature_names"])
data.loc[:, "class"] = raw_data["target"]
pipeline = FeatureUnion([
("1", make_pipeline(
PandasTransform(lambda X: X.loc[:, ["sepal length (cm)"]]),
# other transformations
)),
("2", make_pipeline(
PandasTransform(lambda X: X.loc[:, ["sepal width (cm)"]]),
# other transformations
))
])
X = pipeline.fit_transform(data)
print(X["sepal length (cm)"].mean())
print(X["sepal width (cm)"].mean())
示例13
def test_build_meowa_factory():
iris = datasets.load_iris()
X = iris.data
y = iris.target
from sklearn.preprocessing import MinMaxScaler
X = MinMaxScaler().fit_transform(X)
l = nfpc.FuzzyPatternClassifier(membership_factory=t_factory,
aggregation_factory=nfpc.MEOWAFactory())
from sklearn.model_selection import cross_val_score
scores = cross_val_score(l, X, y, cv=10)
mean = np.mean(scores)
assert 0.80 < mean
示例14
def test_build_ps_owa_factory():
iris = datasets.load_iris()
X = iris.data
y = iris.target
from sklearn.preprocessing import MinMaxScaler
X = MinMaxScaler().fit_transform(X)
l = nfpc.FuzzyPatternClassifier(
membership_factory=t_factory,
aggregation_factory=nfpc.GAOWAFactory(optimizer=nfpc.ps_owa_optimizer())
)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(l, X, y, cv=10)
mean = np.mean(scores)
print("mean", mean)
assert 0.92 < mean
示例15
def test_classifier_iris():
iris = load_iris()
X = iris.data
y = iris.target
from sklearn.preprocessing import MinMaxScaler
X = MinMaxScaler().fit_transform(X)
l = fpcga.FuzzyPatternClassifierGA(iterations=100, random_state=1)
from sklearn.model_selection import cross_val_score
scores = cross_val_score(l, X, y, cv=10)
assert len(scores) == 10
assert np.mean(scores) > 0.6
mean = np.mean(scores)
print("mean", mean)
assert 0.92 == pytest.approx(mean, 0.01)
示例16
def test_graphical_lasso_iris():
# Hard-coded solution from R glasso package for alpha=1.0
# (need to set penalize.diagonal to FALSE)
cov_R = np.array([
[0.68112222, 0.0000000, 0.265820, 0.02464314],
[0.00000000, 0.1887129, 0.000000, 0.00000000],
[0.26582000, 0.0000000, 3.095503, 0.28697200],
[0.02464314, 0.0000000, 0.286972, 0.57713289]
])
icov_R = np.array([
[1.5190747, 0.000000, -0.1304475, 0.0000000],
[0.0000000, 5.299055, 0.0000000, 0.0000000],
[-0.1304475, 0.000000, 0.3498624, -0.1683946],
[0.0000000, 0.000000, -0.1683946, 1.8164353]
])
X = datasets.load_iris().data
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R)
assert_array_almost_equal(icov, icov_R)
示例17
def test_graphical_lasso_iris_singular():
# Small subset of rows to test the rank-deficient case
# Need to choose samples such that none of the variances are zero
indices = np.arange(10, 13)
# Hard-coded solution from R glasso package for alpha=0.01
cov_R = np.array([
[0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
[0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
[0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
[0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
])
icov_R = np.array([
[24.42244057, -16.831679593, 0.0, 0.0],
[-16.83168201, 24.351841681, -6.206896552, -12.5],
[0.0, -6.206896171, 153.103448276, 0.0],
[0.0, -12.499999143, 0.0, 462.5]
])
X = datasets.load_iris().data[indices, :]
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R, decimal=5)
assert_array_almost_equal(icov, icov_R, decimal=5)
示例18
def test_graph_lasso_iris():
# Hard-coded solution from R glasso package for alpha=1.0
# (need to set penalize.diagonal to FALSE)
cov_R = np.array([
[0.68112222, 0.0000000, 0.265820, 0.02464314],
[0.00000000, 0.1887129, 0.000000, 0.00000000],
[0.26582000, 0.0000000, 3.095503, 0.28697200],
[0.02464314, 0.0000000, 0.286972, 0.57713289]
])
icov_R = np.array([
[1.5190747, 0.000000, -0.1304475, 0.0000000],
[0.0000000, 5.299055, 0.0000000, 0.0000000],
[-0.1304475, 0.000000, 0.3498624, -0.1683946],
[0.0000000, 0.000000, -0.1683946, 1.8164353]
])
X = datasets.load_iris().data
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=1.0, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R)
assert_array_almost_equal(icov, icov_R)
示例19
def test_graph_lasso_iris_singular():
# Small subset of rows to test the rank-deficient case
# Need to choose samples such that none of the variances are zero
indices = np.arange(10, 13)
# Hard-coded solution from R glasso package for alpha=0.01
cov_R = np.array([
[0.08, 0.056666662595, 0.00229729713223, 0.00153153142149],
[0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222],
[0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009],
[0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222]
])
icov_R = np.array([
[24.42244057, -16.831679593, 0.0, 0.0],
[-16.83168201, 24.351841681, -6.206896552, -12.5],
[0.0, -6.206896171, 153.103448276, 0.0],
[0.0, -12.499999143, 0.0, 462.5]
])
X = datasets.load_iris().data[indices, :]
emp_cov = empirical_covariance(X)
for method in ('cd', 'lars'):
cov, icov = graph_lasso(emp_cov, alpha=0.01, return_costs=False,
mode=method)
assert_array_almost_equal(cov, cov_R, decimal=5)
assert_array_almost_equal(icov, icov_R, decimal=5)
示例20
def test_classification_report_multiclass_with_label_detection():
iris = datasets.load_iris()
y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
# print classification report with label detection
expected_report = """\
precision recall f1-score support
0 0.83 0.79 0.81 24
1 0.33 0.10 0.15 31
2 0.42 0.90 0.57 20
accuracy 0.53 75
macro avg 0.53 0.60 0.51 75
weighted avg 0.51 0.53 0.47 75
"""
report = classification_report(y_true, y_pred)
assert_equal(report, expected_report)
示例21
def test_classification_report_multiclass_with_digits():
# Test performance report with added digits in floating point values
iris = datasets.load_iris()
y_true, y_pred, _ = make_prediction(dataset=iris, binary=False)
# print classification report with class names
expected_report = """\
precision recall f1-score support
setosa 0.82609 0.79167 0.80851 24
versicolor 0.33333 0.09677 0.15000 31
virginica 0.41860 0.90000 0.57143 20
accuracy 0.53333 75
macro avg 0.52601 0.59615 0.50998 75
weighted avg 0.51375 0.53333 0.47310 75
"""
report = classification_report(
y_true, y_pred, labels=np.arange(len(iris.target_names)),
target_names=iris.target_names, digits=5)
assert_equal(report, expected_report)
示例22
def test_correct_labelsize():
# Assert 1 < n_labels < n_samples
dataset = datasets.load_iris()
X = dataset.data
# n_labels = n_samples
y = np.arange(X.shape[0])
assert_raises_regexp(ValueError,
r'Number of labels is %d\. Valid values are 2 '
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
silhouette_score, X, y)
# n_labels = 1
y = np.zeros(X.shape[0])
assert_raises_regexp(ValueError,
r'Number of labels is %d\. Valid values are 2 '
r'to n_samples - 1 \(inclusive\)' % len(np.unique(y)),
silhouette_score, X, y)
示例23
def test_safe_split_with_precomputed_kernel():
clf = SVC()
clfp = SVC(kernel="precomputed")
iris = datasets.load_iris()
X, y = iris.data, iris.target
K = np.dot(X, X.T)
cv = ShuffleSplit(test_size=0.25, random_state=0)
train, test = list(cv.split(X))[0]
X_train, y_train = _safe_split(clf, X, y, train)
K_train, y_train2 = _safe_split(clfp, K, y, train)
assert_array_almost_equal(K_train, np.dot(X_train, X_train.T))
assert_array_almost_equal(y_train, y_train2)
X_test, y_test = _safe_split(clf, X, y, test, train)
K_test, y_test2 = _safe_split(clfp, K, y, test, train)
assert_array_almost_equal(K_test, np.dot(X_test, X_train.T))
assert_array_almost_equal(y_test, y_test2)
示例24
def test_rfe_features_importance():
generator = check_random_state(0)
iris = load_iris()
X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
y = iris.target
clf = RandomForestClassifier(n_estimators=20,
random_state=generator, max_depth=2)
rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
rfe.fit(X, y)
assert_equal(len(rfe.ranking_), X.shape[1])
clf_svc = SVC(kernel="linear")
rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
rfe_svc.fit(X, y)
# Check if the supports are equal
assert_array_equal(rfe.get_support(), rfe_svc.get_support())
示例25
def test_rfecv_verbose_output():
# Check verbose=1 is producing an output.
from io import StringIO
import sys
sys.stdout = StringIO()
generator = check_random_state(0)
iris = load_iris()
X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
y = list(iris.target)
rfecv = RFECV(estimator=SVC(kernel="linear"), step=1, cv=5, verbose=1)
rfecv.fit(X, y)
verbose_output = sys.stdout
verbose_output.seek(0)
assert_greater(len(verbose_output.readline()), 0)
示例26
def test_rfecv_grid_scores_size():
generator = check_random_state(0)
iris = load_iris()
X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
y = list(iris.target) # regression test: list should be supported
# Non-regression test for varying combinations of step and
# min_features_to_select.
for step, min_features_to_select in [[2, 1], [2, 2], [3, 3]]:
rfecv = RFECV(estimator=MockClassifier(), step=step,
min_features_to_select=min_features_to_select, cv=5)
rfecv.fit(X, y)
score_len = np.ceil(
(X.shape[1] - min_features_to_select) / step) + 1
assert len(rfecv.grid_scores_) == score_len
assert len(rfecv.ranking_) == X.shape[1]
assert rfecv.n_features_ >= min_features_to_select
示例27
def test_cross_val_score_mask():
# test that cross_val_score works with boolean masks
svm = SVC(kernel="linear")
iris = load_iris()
X, y = iris.data, iris.target
kfold = KFold(5)
scores_indices = cross_val_score(svm, X, y, cv=kfold)
kfold = KFold(5)
cv_masks = []
for train, test in kfold.split(X, y):
mask_train = np.zeros(len(y), dtype=np.bool)
mask_test = np.zeros(len(y), dtype=np.bool)
mask_train[train] = 1
mask_test[test] = 1
cv_masks.append((train, test))
scores_masks = cross_val_score(svm, X, y, cv=cv_masks)
assert_array_equal(scores_indices, scores_masks)
示例28
def test_cross_val_score_precomputed():
# test for svm with precomputed kernel
svm = SVC(kernel="precomputed")
iris = load_iris()
X, y = iris.data, iris.target
linear_kernel = np.dot(X, X.T)
score_precomputed = cross_val_score(svm, linear_kernel, y)
svm = SVC(kernel="linear")
score_linear = cross_val_score(svm, X, y)
assert_array_almost_equal(score_precomputed, score_linear)
# test with callable
svm = SVC(gamma='scale', kernel=lambda x, y: np.dot(x, y.T))
score_callable = cross_val_score(svm, X, y)
assert_array_almost_equal(score_precomputed, score_callable)
# Error raised for non-square X
svm = SVC(kernel="precomputed")
assert_raises(ValueError, cross_val_score, svm, X, y)
# test error is raised when the precomputed kernel is not array-like
# or sparse
assert_raises(ValueError, cross_val_score, svm,
linear_kernel.tolist(), y)
示例29
def test_cross_val_score_with_score_func_classification():
iris = load_iris()
clf = SVC(kernel='linear')
# Default score (should be the accuracy score)
scores = cross_val_score(clf, iris.data, iris.target, cv=5)
assert_array_almost_equal(scores, [0.97, 1., 0.97, 0.97, 1.], 2)
# Correct classification score (aka. zero / one score) - should be the
# same as the default estimator score
zo_scores = cross_val_score(clf, iris.data, iris.target,
scoring="accuracy", cv=5)
assert_array_almost_equal(zo_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
# F1 score (class are balanced so f1_score should be equal to zero/one
# score
f1_scores = cross_val_score(clf, iris.data, iris.target,
scoring="f1_weighted", cv=5)
assert_array_almost_equal(f1_scores, [0.97, 1., 0.97, 0.97, 1.], 2)
示例30
def test_iris(self):
from sklearn import datasets
dataset = datasets.load_iris()
X_train, y_train = dataset.data, dataset.target
norms = np.linalg.norm(X_train, axis=1)
clip = (norms[0] + norms[1]) / 2
X_clipped = clip_to_norm(X_train, clip)
clipped_norms = np.linalg.norm(X_clipped, axis=1)
self.assertLessEqual(clipped_norms[0], norms[0])
self.assertLessEqual(clipped_norms[1], norms[1])
self.assertTrue(np.isclose(clipped_norms[0], clip) or np.isclose(clipped_norms[1], clip))