Python源码示例:sklearn.datasets.load_files()
示例1
def test_default_empty_load_files(load_files_root):
res = load_files(load_files_root)
assert_equal(len(res.filenames), 0)
assert_equal(len(res.target_names), 0)
assert_equal(res.DESCR, None)
示例2
def test_default_load_files(test_category_dir_1, test_category_dir_2,
load_files_root):
res = load_files(load_files_root)
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 2)
assert_equal(res.DESCR, None)
assert_equal(res.data, [b"Hello World!\n"])
示例3
def test_load_files_w_categories_desc_and_encoding(
test_category_dir_1, test_category_dir_2, load_files_root):
category = os.path.abspath(test_category_dir_1).split('/').pop()
res = load_files(load_files_root, description="test",
categories=category, encoding="utf-8")
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 1)
assert_equal(res.DESCR, "test")
assert_equal(res.data, ["Hello World!\n"])
示例4
def test_load_files_wo_load_content(
test_category_dir_1, test_category_dir_2, load_files_root):
res = load_files(load_files_root, load_content=False)
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 2)
assert_equal(res.DESCR, None)
assert_equal(res.get('data'), None)
示例5
def _load_data_from_local(
model, categories=None, encoding=None):
'''
1. Find local cache files
2. If we can't find the cache files
3.1 Try to create cache files using data files inside `datasets`.
2.2 Raise error if create cache files failed.
'''
model_path = os.path.join(DATA_DIR, model)
cache_path = os.path.join(model_path, model + '.pkz')
if os.path.exists(cache_path):
try:
with open(cache_path, 'rb') as f:
compressed_content = f.read()
uncompressed_content = codecs.decode(
compressed_content, 'zlib_codec')
return pickle.loads(uncompressed_content)['all']
except Exception as e:
# Can't load cache files
error = ('Can\'t load cached data from {0}. '
'Please try again after delete cache files.'.format(model))
raise NotSupportError(error)
cache = dict(all=load_files(
model_path, categories=categories, encoding=encoding))
compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec')
with open(cache_path, 'wb') as f:
f.write(compressed_content)
return cache['all']
示例6
def __call__(self):
download()
dataset = load_files(self.path, categories=['pos', 'neg'])
X, y = dataset['data'], dataset['target']
X = np.asarray([x.decode() for x in X]) # decode from bytes
return X, y
示例7
def load_lease_dataset(root):
return load_files(root)
示例8
def get_datasets_localdata(container_path=None, categories=None, load_content=True,
encoding='utf-8', shuffle=True, random_state=42):
"""
Load text files with categories as subfolder names.
Individual samples are assumed to be files stored a two levels folder structure.
:param container_path: The path of the container
:param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
:param shuffle: shuffle the list or not
:param random_state: seed integer to shuffle the dataset
:return: data and labels of the dataset
"""
datasets = load_files(container_path=container_path, categories=categories,
load_content=load_content, shuffle=shuffle, encoding=encoding,
random_state=random_state)
return datasets
示例9
def __init__(self, cfg=None):
"""
Load text files with categories as subfolder names.
Individual samples are assumed to be files stored a two levels folder structure.
:param container_path: The path of the container
:param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
:param shuffle: shuffle the list or not
:param random_state: seed integer to shuffle the dataset
:return: data and labels of the dataset
"""
super().__init__()
self.__dataset__ = load_files(container_path=cfg['container_path'], categories=cfg['categories'],
load_content=cfg['load_content'], shuffle=cfg['shuffle'],
encoding=cfg['encoding'], random_state=cfg['random_state'])
示例10
def get_datasets_localdata(container_path=None, categories=None, load_content=True,
encoding='utf-8', shuffle=True, random_state=42):
"""
Load text files with categories as subfolder names.
Individual samples are assumed to be files stored a two levels folder structure.
:param container_path: The path of the container
:param categories: List of classes to choose, all classes are chosen by default (if empty or omitted)
:param shuffle: shuffle the list or not
:param random_state: seed integer to shuffle the dataset
:return: data and labels of the dataset
"""
datasets = load_files(container_path=container_path, categories=categories,
load_content=load_content, shuffle=shuffle, encoding=encoding,
random_state=random_state)
return datasets
示例11
def test_default_empty_load_files():
res = load_files(LOAD_FILES_ROOT)
assert_equal(len(res.filenames), 0)
assert_equal(len(res.target_names), 0)
assert_equal(res.DESCR, None)
示例12
def test_default_load_files():
res = load_files(LOAD_FILES_ROOT)
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 2)
assert_equal(res.DESCR, None)
assert_equal(res.data, [b("Hello World!\n")])
示例13
def test_load_files_w_categories_desc_and_encoding():
category = os.path.abspath(TEST_CATEGORY_DIR1).split('/').pop()
res = load_files(LOAD_FILES_ROOT, description="test",
categories=category, encoding="utf-8")
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 1)
assert_equal(res.DESCR, "test")
assert_equal(res.data, [u("Hello World!\n")])
示例14
def test_load_files_wo_load_content():
res = load_files(LOAD_FILES_ROOT, load_content=False)
assert_equal(len(res.filenames), 1)
assert_equal(len(res.target_names), 2)
assert_equal(res.DESCR, None)
assert_equal(res.get('data'), None)
示例15
def main(args):
with tf.Graph().as_default():
with tf.Session() as sess:
# create output directory if it doesn't exist
output_dir = os.path.expanduser(args.output_dir)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
# load the model
print("Loading trained model...\n")
meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir))
facenet.load_model(args.trained_model_dir, meta_file, ckpt_file)
# grab all image paths and labels
print("Finding image paths and targets...\n")
data = load_files(args.data_dir, load_content=False, shuffle=False)
labels_array = data['target']
paths = data['filenames']
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
image_size = images_placeholder.get_shape()[1]
embedding_size = embeddings.get_shape()[1]
# Run forward pass to calculate embeddings
print('Generating embeddings from images...\n')
start_time = time.time()
batch_size = args.batch_size
nrof_images = len(paths)
nrof_batches = int(np.ceil(1.0*nrof_images / batch_size))
emb_array = np.zeros((nrof_images, embedding_size))
for i in xrange(nrof_batches):
start_index = i*batch_size
end_index = min((i+1)*batch_size, nrof_images)
paths_batch = paths[start_index:end_index]
images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True)
feed_dict = { images_placeholder:images, phase_train_placeholder:False}
emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)
time_avg_forward_pass = (time.time() - start_time) / float(nrof_images)
print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images))
print("Finally saving embeddings and gallery to: %s" % (output_dir))
# save the gallery and embeddings (signatures) as numpy arrays to disk
np.save(os.path.join(output_dir, "gallery.npy"), labels_array)
np.save(os.path.join(output_dir, "signatures.npy"), emb_array)
示例16
def main(args):
with tf.Graph().as_default():
with tf.Session() as sess:
# create output directory if it doesn't exist
output_dir = os.path.expanduser(args.output_dir)
if not os.path.isdir(output_dir):
os.makedirs(output_dir)
# load the model
print("Loading trained model...\n")
meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.trained_model_dir))
facenet.load_model(args.trained_model_dir, meta_file, ckpt_file)
# grab all image paths and labels
print("Finding image paths and targets...\n")
data = load_files(args.data_dir, load_content=False, shuffle=False)
labels_array = data['target']
paths = data['filenames']
# Get input and output tensors
images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
image_size = images_placeholder.get_shape()[1]
embedding_size = embeddings.get_shape()[1]
# Run forward pass to calculate embeddings
print('Generating embeddings from images...\n')
start_time = time.time()
batch_size = args.batch_size
nrof_images = len(paths)
nrof_batches = int(np.ceil(1.0*nrof_images / batch_size))
emb_array = np.zeros((nrof_images, embedding_size))
for i in xrange(nrof_batches):
start_index = i*batch_size
end_index = min((i+1)*batch_size, nrof_images)
paths_batch = paths[start_index:end_index]
images = facenet.load_data(paths_batch, do_random_crop=False, do_random_flip=False, image_size=image_size, do_prewhiten=True)
feed_dict = { images_placeholder:images, phase_train_placeholder:False}
emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict)
time_avg_forward_pass = (time.time() - start_time) / float(nrof_images)
print("Forward pass took avg of %.3f[seconds/image] for %d images\n" % (time_avg_forward_pass, nrof_images))
print("Finally saving embeddings and gallery to: %s" % (output_dir))
# save the gallery and embeddings (signatures) as numpy arrays to disk
np.save(os.path.join(output_dir, "gallery.npy"), labels_array)
np.save(os.path.join(output_dir, "signatures.npy"), emb_array)