Python源码示例:sklearn.datasets.fetch_lfw_people()

示例1
def load_data():
    global training_data, testing_data

    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

    xs = lfw_people.data
    ys = lfw_people.target

    inputs = []
    labels = list(ys)

    for face in xs:
        V = Vol(50, 37, 1, 0.0)
        V.w = list(face)
        inputs.append(augment(V, 30))

    x_tr, x_te, y_tr, y_te = train_test_split(inputs, labels, test_size=0.25)

    training_data = zip(x_tr, y_tr)
    testing_data = zip(x_te, y_te)

    print 'Dataset made...' 
示例2
def test_load_empty_lfw_people():
    assert_raises(IOError, fetch_lfw_people, data_home=SCIKIT_LEARN_EMPTY_DATA,
                  download_if_missing=False) 
示例3
def test_load_fake_lfw_people():
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
                                  min_faces_per_person=3,
                                  download_if_missing=False)

    # The data is croped around the center as a rectangular bounding box
    # around the face. Colors are converted to gray levels:
    assert_equal(lfw_people.images.shape, (10, 62, 47))
    assert_equal(lfw_people.data.shape, (10, 2914))

    # the target is array of person integer ids
    assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])

    # names of the persons can be found using the target_names array
    expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez']
    assert_array_equal(lfw_people.target_names, expected_classes)

    # It is possible to ask for the original data without any croping or color
    # conversion and not limit on the number of picture per person
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
                                  slice_=None, color=True,
                                  download_if_missing=False)
    assert_equal(lfw_people.images.shape, (17, 250, 250, 3))

    # the ids and class names are the same as previously
    assert_array_equal(lfw_people.target,
                       [0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2])
    assert_array_equal(lfw_people.target_names,
                       ['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro',
                        'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])

    # test return_X_y option
    fetch_func = partial(fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
                         resize=None,
                         slice_=None, color=True,
                         download_if_missing=False)
    check_return_X_y(lfw_people, fetch_func) 
示例4
def test_load_fake_lfw_people_too_restrictive():
    assert_raises(ValueError, fetch_lfw_people, data_home=SCIKIT_LEARN_DATA,
                  min_faces_per_person=100, download_if_missing=False) 
示例5
def get_lfw(max_size=None):
    dataset = fetch_lfw_people(color=True)
    # keep only one image per person
    return image_per_label(
        dataset.images,
        dataset.target,
        dataset.target_names,
        max_size=max_size) 
示例6
def dictionary_learn_ex():

    patch_shape = (18, 18)
    n_atoms = 225
    n_plot_atoms = 225
    n_nonzero_coefs = 2
    n_jobs = 8
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=False)
    n_imgs, h, w = lfw_people.images.shape

    imgs = []
    for i in range(n_imgs):
        img = lfw_people.images[i, :, :].reshape((h, w))
        img /= 255.
        imgs.append(img)

    print 'Extracting reference patches...'
    X = extract_patches(imgs, patch_size=patch_shape[0],scale=False,n_patches=int(1e5),verbose=True,n_jobs=n_jobs)
    print "number of patches:", X.shape[1]

    se = sparse_encoder(algorithm='bomp',params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=n_jobs)

    odc = online_dictionary_coder(n_atoms=n_atoms, sparse_coder=se, n_epochs=2,
                                  batch_size=1000, non_neg=False, verbose=True, n_jobs=n_jobs)
    odc.fit(X)
    D = odc.D
    plt.figure(figsize=(4.2, 4))
    for i in range(n_plot_atoms):
        plt.subplot(15, 15, i + 1)
        plt.imshow(D[:, i].reshape(patch_shape), cmap=plt.cm.gray)
        plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0)
        plt.xticks(())
        plt.yticks(())
    plt.show() 
示例7
def visualize():
  """
  Writes out various visualizations of our testing data."
  """
  print "Preparing visualizations..."

  tile_faces(fetch_lfw_people()["images"], constants.LOG_DIR + "/all_faces_tiled.png") 
示例8
def test_load_empty_lfw_people():
    fetch_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA,
                     download_if_missing=False) 
示例9
def test_load_fake_lfw_people():
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA,
                                  min_faces_per_person=3,
                                  download_if_missing=False)

    # The data is croped around the center as a rectangular bounding box
    # around the face. Colors are converted to gray levels:
    assert_equal(lfw_people.images.shape, (10, 62, 47))
    assert_equal(lfw_people.data.shape, (10, 2914))

    # the target is array of person integer ids
    assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2])

    # names of the persons can be found using the target_names array
    expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez']
    assert_array_equal(lfw_people.target_names, expected_classes)

    # It is possible to ask for the original data without any croping or color
    # conversion and not limit on the number of picture per person
    lfw_people = fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None,
                                  slice_=None, color=True,
                                  download_if_missing=False)
    assert_equal(lfw_people.images.shape, (17, 250, 250, 3))

    # the ids and class names are the same as previously
    assert_array_equal(lfw_people.target,
                       [0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2])
    assert_array_equal(lfw_people.target_names,
                       ['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro',
                        'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez']) 
示例10
def test_load_fake_lfw_people_too_restrictive():
    fetch_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100,
                     download_if_missing=False) 
示例11
def whitened_rgb_atoms():

    # a small dataset of images
    imgs = get_images(colored=True)

    # alternatively we could use the lfw dataset
    """
    lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4,color=True)
    faces = lfw_people.data
    n_imgs,h,w,n_channels = lfw_people.images.shape
    imgs = []
    for i in range(n_imgs):
        img = lfw_people.images[i,:,:,:].reshape((h,w,n_channels))
        imgs.append(img)
    """

    patch_shape = (8, 8)
    n_atoms = 100
    n_plot_atoms = 100
    n_nonzero_coefs = 1

    print 'Extracting reference patches...'
    X = extract_patches(imgs, patch_size=patch_shape[0], scale=False, n_patches=int(5e5), mem="low")
    print "number of patches:", X.shape[1]

    wn = preproc("whitening")
    from lyssa.feature_extract.preproc import local_contrast_normalization
    # apply lcn and then whiten the patches
    X = wn(local_contrast_normalization(X))
    # learn the dictionary using Batch Orthognal Matching Pursuit and KSVD
    se = sparse_encoder(algorithm='bomp', params={'n_nonzero_coefs': n_nonzero_coefs}, n_jobs=8)
    kc = ksvd_coder(n_atoms=n_atoms, sparse_coder=se, init_dict="data",
                    max_iter=3, verbose=True, approx=False, n_jobs=8)

    kc.fit(X)
    D = kc.D
    for i in range(n_atoms):
        D[:, i] = (D[:, i] - D[:, i].min()) / float((D[:, i].max() - D[:, i].min()))
    # plot the learned dictionary
    plt.figure(figsize=(4.2, 4))
    for i in range(n_plot_atoms):
        plt.subplot(10, 10, i + 1)
        plt.imshow(D[:, i].reshape((patch_shape[0], patch_shape[1], 3)))
        plt.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0, wspace=0.0, hspace=0.0)
        plt.xticks(())
        plt.yticks(())
    plt.show()