Python源码示例:sklearn.datasets.fetch_lfw_people()
示例1
def load_data():
global training_data, testing_data
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)
xs = lfw_people.data
ys = lfw_people.target
inputs = []
labels = list(ys)
for face in xs:
V = Vol(50, 37, 1, 0.0)
V.w = list(face)
inputs.append(augment(V, 30))
x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25)
training_data = zip(x_tr, y_tr)
testing_data = zip(x_te, y_te)
print 'Dataset made...'
示例2
def test_load_empty_lfw_people():
assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)
示例3
def test_load_fake_lfw_people():
lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
min_faces_per_person=3,
download_if_missing=False)
# The data is croped around the center as a rectangular bounding box
# around the face. Colors are converted to gray levels:
assert_equal(lfw_people.images.shape, (10, 62, 47))
assert_equal(lfw_people.data.shape, (10, 2914))
# the target is array of person integer ids
assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])
# names of the persons can be found using the target_names array
expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez']
assert_array_equal(lfw_people.target_names, expected_classes)
# It is possible to ask for the original data without any croping or color
# conversion and not limit on the number of picture per person
lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
slice_=None, color=True,
download_if_missing=False)
assert_equal(lfw_people.images.shape, (17, 250, 250, 3))
# the ids and class names are the same as previously
assert_array_equal(lfw_people.target,
[0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2])
assert_array_equal(lfw_people.target_names,
['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro',
'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
# test return_X_y option
fetch_func = partial(fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
resize=None,
slice_=None, color=True,
download_if_missing=False)
check_return_X_y(lfw_people, fetch_func)
示例4
def test_load_fake_lfw_people_too_restrictive():
assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
min_faces_per_person=100, download_if_missing=False)
示例5
def get_lfw(max_size=None):
dataset = fetch_lfw_people(color=True)
# keep only one image per person
return image_per_label(
dataset.images,
dataset.target,
dataset.target_names,
max_size=max_size)
示例6
def dictionary_learn_ex():
patch_shape = (18, 18)
n_atoms = 225
n_plot_atoms = 225
n_nonzero_coefs = 2
n_jobs = 8
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=False)
n_imgs, h, w = lfw_people.images.shape
imgs = []
for i in range(n_imgs):
img = lfw_people.images[i, :, :].reshape((h, w))
img /= 255.
imgs.append(img)
print 'Extracting reference patches...'
X = extract_patches(imgs, patch_size=patch_shape[0],scale=False,n_patches=int(1e5),verbose=True,n_jobs=n_jobs)
print "number of patches:", X.shape[1]
se = sparse_encoder(algorithm='bomp',params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=n_jobs)
odc = online_dictionary_coder(n_atoms=n_atoms, sparse_coder=se, n_epochs=2,
batch_size=1000, non_neg=False, verbose=True, n_jobs=n_jobs)
odc.fit(X)
D = odc.D
plt.figure(figsize=(4.2, 4))
for i in range(n_plot_atoms):
plt.subplot(15, 15, i + 1)
plt.imshow(D[:, i].reshape(patch_shape), cmap=plt.cm.gray)
plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0)
plt.xticks(())
plt.yticks(())
plt.show()
示例7
def visualize():
"""
Writes out various visualizations of our testing data."
"""
print "Preparing visualizations..."
tile_faces(fetch_lfw_people()["images"], constants.LOG_DIR + "/all_faces_tiled.png")
示例8
def test_load_empty_lfw_people():
fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
download_if_missing=False)
示例9
def test_load_fake_lfw_people():
lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
min_faces_per_person=3,
download_if_missing=False)
# The data is croped around the center as a rectangular bounding box
# around the face. Colors are converted to gray levels:
assert_equal(lfw_people.images.shape, (10, 62, 47))
assert_equal(lfw_people.data.shape, (10, 2914))
# the target is array of person integer ids
assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])
# names of the persons can be found using the target_names array
expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez']
assert_array_equal(lfw_people.target_names, expected_classes)
# It is possible to ask for the original data without any croping or color
# conversion and not limit on the number of picture per person
lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
slice_=None, color=True,
download_if_missing=False)
assert_equal(lfw_people.images.shape, (17, 250, 250, 3))
# the ids and class names are the same as previously
assert_array_equal(lfw_people.target,
[0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2])
assert_array_equal(lfw_people.target_names,
['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro',
'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
示例10
def test_load_fake_lfw_people_too_restrictive():
fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
download_if_missing=False)
示例11
def whitened_rgb_atoms():
# a small dataset of images
imgs = get_images(colored=True)
# alternatively we could use the lfw dataset
"""
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=True)
faces = lfw_people.data
n_imgs,h,w,n_channels = lfw_people.images.shape
imgs = []
for i in range(n_imgs):
img = lfw_people.images[i,:,:,:].reshape((h,w,n_channels))
imgs.append(img)
"""
patch_shape = (8, 8)
n_atoms = 100
n_plot_atoms = 100
n_nonzero_coefs = 1
print 'Extracting reference patches...'
X = extract_patches(imgs, patch_size=patch_shape[0], scale=False, n_patches=int(5e5), mem="low")
print "number of patches:", X.shape[1]
wn = preproc("whitening")
from lyssa.feature_extract.preproc import local_contrast_normalization
# apply lcn and then whiten the patches
X = wn(local_contrast_normalization(X))
# learn the dictionary using Batch Orthognal Matching Pursuit and KSVD
se = sparse_encoder(algorithm='bomp', params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=8)
kc = ksvd_coder(n_atoms=n_atoms, sparse_coder=se, init_dict="data",
max_iter=3, verbose=True, approx=False, n_jobs=8)
kc.fit(X)
D = kc.D
for i in range(n_atoms):
D[:, i] = (D[:, i] - D[:, i].min()) / float((D[:, i].max() - D[:, i].min()))
# plot the learned dictionary
plt.figure(figsize=(4.2, 4))
for i in range(n_plot_atoms):
plt.subplot(10, 10, i + 1)
plt.imshow(D[:, i].reshape((patch_shape[0], patch_shape[1], 3)))
plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0)
plt.xticks(())
plt.yticks(())
plt.show()