Python源码示例:concurrent.futures.ProcessPoolExecutor()
示例1
def main():
t1 = timeit.default_timer()
with ProcessPoolExecutor(max_workers=4) as executor:
for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)):
print('%d is prime: %s' % (number, prime))
print("{} Seconds Needed for ProcessPoolExecutor".format(timeit.default_timer() - t1))
t2 = timeit.default_timer()
with ThreadPoolExecutor(max_workers=4) as executor:
for number, prime in zip(PRIMES, executor.map(is_prime, PRIMES)):
print('%d is prime: %s' % (number, prime))
print("{} Seconds Needed for ThreadPoolExecutor".format(timeit.default_timer() - t2))
t3 = timeit.default_timer()
for number in PRIMES:
isPrime = is_prime(number)
print("{} is prime: {}".format(number, isPrime))
print("{} Seconds needed for single threaded execution".format(timeit.default_timer()-t3))
示例2
def test_smoke_one_file(self):
db_path = "test.sqlite"
runner = CliRunner()
with runner.isolated_filesystem():
fifo_name = "jsonl_fifo"
os.mkfifo(fifo_name)
with ProcessPoolExecutor() as executor:
executor.submit(fifo_writer, fifo_name)
result = runner.invoke(cmd, ["-o", db_path, "file", fifo_name, "--format", "jsonl"])
print_traceback(result)
assert result.exit_code == ExitCode.SUCCESS, fifo_name
assert SimpleSQLite(db_path).fetch_num_records("jsonl_fifo") == 8
示例3
def launch(experiment: str, num_workers: int = 1, seed: tp.Optional[int] = None,
cap_index: tp.Optional[int] = None, output: tp.Optional[PathLike] = None) -> Path:
"""Launch experiment with given names and selection modulo
max_index can be specified to provide a limited number of settings
"""
# create the data
csvpath = Path(experiment + ".csv") if output is None else Path(output)
if num_workers == 1:
df = core.compute(experiment, cap_index=cap_index, seed=seed)
else:
with futures.ProcessPoolExecutor(max_workers=num_workers) as executor:
df = core.compute(experiment, seed=seed, cap_index=cap_index, executor=executor, num_workers=num_workers)
# save data to csv
try:
core.save_or_append_to_csv(df, csvpath)
except Exception: # pylint: disable=broad-except
csvpath = Path(experiment + ".csv")
print(f"Failed to save to {output}, falling back to {csvpath}")
core.save_or_append_to_csv(df, csvpath)
else:
print(f"Saved data to {csvpath}")
return csvpath
示例4
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
'''Preprocesses the LJ Speech dataset from a given input path into a given output directory.
Args:
in_dir: The directory where you have downloaded the LJ Speech dataset
out_dir: The directory to write the output into
num_workers: Optional number of worker processes to parallelize across
tqdm: You can optionally pass tqdm to get a nice progress bar
Returns:
A list of tuples describing the training examples. This should be written to train.txt
'''
# We use ProcessPoolExecutor to parallize across processes. This is just an optimization and you
# can omit it and just call _process_utterance on each input if you want.
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
index = 1
with open(os.path.join(in_dir, 'metadata.train'), encoding='utf-8') as f:
for line in f:
parts = line.strip().split('|')
wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
text = parts[1]
futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
index += 1
results = [future.result() for future in tqdm(futures)]
return [r for r in results if r is not None]
示例5
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
index = 1
for book in books:
with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
for line in f:
parts = line.strip().split('\t')
if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
text = parts[5]
task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
futures.append(executor.submit(task))
index += 1
results = [future.result() for future in tqdm(futures)]
return [r for r in results if r is not None]
示例6
def __init__(self, config: Config = None) -> None:
if config is None:
config = Config(**DEFAULT, TOKEN='asdf', CHANNELS={}, USERS={})
Namespace._bot = self
self.loop = asyncio.get_event_loop()
self.call_queue: List[Call] = []
self.api = SlackAPI(self)
self.channels: List[PublicChannel] = []
self.ims: List[DirectMessageChannel] = []
self.groups: List[PrivateChannel] = []
self.mc = aiomcache.Client(
host=config.CACHE['HOST'], port=config.CACHE['PORT'],
)
self.cache: CacheMock = CacheMock(self.mc, 'YUI_TEST_')
self.users: List[User] = [User(id='U0', team_id='T0', name='system')]
self.responses: Dict[str, Callable] = {}
self.config = config
self.process_pool_executor = ProcessPoolExecutor()
self.thread_pool_executor = ThreadPoolExecutor()
示例7
def par_crop(args):
"""
Dataset curation,crop data and transform the format of a label
"""
crop_path = os.path.join(args.download_dir, './crop{:d}'.format(args.instance_size))
if not os.path.isdir(crop_path): makedirs(crop_path)
VID_base_path = os.path.join(args.download_dir, './ILSVRC')
ann_base_path = os.path.join(VID_base_path, 'Annotations/DET/train/')
sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
for sub_set in sub_sets:
sub_set_base_path = os.path.join(ann_base_path, sub_set)
if 'a' == sub_set:
xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*', '*.xml')))
else:
xmls = sorted(glob.glob(os.path.join(sub_set_base_path, '*.xml')))
n_imgs = len(xmls)
sub_set_crop_path = os.path.join(crop_path, sub_set)
with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
fs = [executor.submit(crop_xml, args, xml, sub_set_crop_path, args.instance_size) for xml in xmls]
for i, f in enumerate(futures.as_completed(fs)):
printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80)
示例8
def par_crop(args, ann_base_path):
"""
Dataset curation, crop data and transform the format of label
Parameters
----------
ann_base_path: str, Annotations base path
"""
crop_path = os.path.join(args.download_dir, './crop{:d}'.format(int(args.instance_size)))
if not os.path.isdir(crop_path):
makedirs(crop_path)
sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
for sub_set in sub_sets:
sub_set_base_path = os.path.join(ann_base_path, sub_set)
videos = sorted(os.listdir(sub_set_base_path))
n_videos = len(videos)
with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
fs = [executor.submit(crop_video, args, sub_set, video, crop_path, ann_base_path) for video in videos]
for i, f in enumerate(futures.as_completed(fs)):
# Write progress to error so that it can be seen
printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40)
示例9
def __init__(self, threads: int = None) -> None:
try: # get or create loop (threads don't have one)
#: our asyncio loop
self.loop = asyncio.get_event_loop()
except RuntimeError:
self.loop = asyncio.new_event_loop()
asyncio.set_event_loop(self.loop)
#: number of threads to use
self.threads = threads or threads_to_use()
#: semaphore to limit io parallelism
self.io_sem: asyncio.Semaphore = asyncio.Semaphore(1)
#: must never run more than one conda at the same time
#: (used by PyPi when running skeleton)
self.conda_sem: asyncio.Semaphore = asyncio.Semaphore(1)
#: the filters successively applied to each item
self.filters: List[AsyncFilter] = []
#: executor running things in separate python processes
self.proc_pool_executor = ProcessPoolExecutor(self.threads)
self._shutting_down = False
示例10
def run(self) -> bool:
"""Enters the asyncio loop and manages shutdown."""
# We need to handle KeyboardInterrupt "manually" to get clean shutdown
# for the ProcessPoolExecutor
self.loop.add_signal_handler(signal.SIGINT,
lambda: asyncio.ensure_future(self.shutdown(signal.SIGINT)))
try:
task = asyncio.ensure_future(self._async_run())
self.loop.run_until_complete(task)
logger.warning("Finished update")
except asyncio.CancelledError:
pass
except EndProcessing:
logger.error("Terminating...")
self.loop.run_until_complete(self.shutdown())
for filt in self.filters:
filt.finalize()
示例11
def preprocess(wav_dirs, out_dir, num_workers, params):
audio_out_dir = os.path.join(out_dir, "audio")
mel_out_dir = os.path.join(out_dir, "mels")
os.makedirs(out_dir, exist_ok=True)
os.makedirs(audio_out_dir, exist_ok=True)
os.makedirs(mel_out_dir, exist_ok=True)
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
wav_paths = chain.from_iterable(glob.iglob("{}/*.wav".format(dir), recursive=True) for dir in wav_dirs)
for wav_path in wav_paths:
fid = os.path.basename(wav_path).replace(".wav", ".npy")
audio_path = os.path.join(audio_out_dir, fid)
mel_path = os.path.join(mel_out_dir, fid)
futures.append(executor.submit(partial(process_wav, wav_path, audio_path, mel_path, params)))
metadata = [future.result() for future in tqdm(futures)]
write_metadata(metadata, out_dir, params)
示例12
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
index = 1
for book in books:
with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
for line in f:
parts = line.strip().split('\t')
if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
text = parts[5]
task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
futures.append(executor.submit(task))
index += 1
results = [future.result() for future in tqdm(futures)]
return [r for r in results if r is not None]
示例13
def __init__(self, eggroll_session):
self.data_dir = os.path.join(file_utils.get_project_base_directory(), 'data')
self.session_id = eggroll_session.get_session_id()
self.meta_table = _DTable('__META__', '__META__', 'fragments', 10)
self.pool = Executor()
Standalone.__instance = self
self.eggroll_session = eggroll_session
self.unique_id_template = '_EggRoll_%s_%s_%s_%.20f_%d'
eggroll_session.set_gc_table(self)
eggroll_session.add_cleanup_task(eggroll_session.clean_duplicated_table)
# todo: move to eggrollSession
try:
self.host_name = socket.gethostname()
self.host_ip = socket.gethostbyname(self.host_name)
except socket.gaierror as e:
self.host_name = 'unknown'
self.host_ip = 'unknown'
示例14
def convert_dataset(path, filemap, name, num_processes, max_num_support, max_tokens, is_web=True):
with open(path, 'rb') as f:
dataset = pickle.load(f)
if num_processes == 1:
instances = process((dataset, filemap, max_num_support, max_tokens, is_web), True)
else:
chunk_size = 1000
executor = ProcessPoolExecutor(num_processes)
instances = []
i = 0
for processed in executor.map(
process, [(dataset[i * chunk_size:(i + 1) * chunk_size], filemap, max_num_support, max_tokens, is_web)
for i in range(len(dataset) // chunk_size + 1)]):
instances.extend(processed)
i += chunk_size
print("%d/%d done" % (min(len(dataset), i), len(dataset)))
return {"meta": {"source": name}, 'instances': instances}
示例15
def simulate_walks(self,num_walks,walk_length):
# for large graphs, it is serially executed, because of memory use.
if(len(self.G) > 500000):
with ProcessPoolExecutor(max_workers=1) as executor:
job = executor.submit(generate_random_walks_large_graphs,num_walks,walk_length,self.workers,self.G.keys())
job.result()
else:
with ProcessPoolExecutor(max_workers=1) as executor:
job = executor.submit(generate_random_walks,num_walks,walk_length,self.workers,self.G.keys())
job.result()
return
示例16
def main():
print("Starting ThreadPoolExecutor")
with ProcessPoolExecutor(max_workers=3) as executor:
future = executor.submit(task, (2))
future = executor.submit(task, (3))
future = executor.submit(task, (4))
print("All tasks complete")
示例17
def main():
executor = ProcessPoolExecutor(max_workers=3)
task1 = executor.submit(task)
task2 = executor.submit(task)
示例18
def init_pool(self, worker_count):
return ProcessPoolExecutor(worker_count)
示例19
def parallelize(func, arg_lst, show_progress=False, max_workers=None):
"""Parallel execution of function func across a list of arguments.
The function func and all of the arguments must be pickable. Func is
executed on each elements of arg_list as func(*args)
Parameters
----------
func:
Function to repeatedly execute, must be pickable.
arg_lst: iterable
Iterator of unnamed arguments. Each element arg is passed as func(*arg).
show_progress: bool
Whether or not to display a progress bar.
max_workers: int
Maximum number of parallel processes to execute simultaneously.
Returns
-------
results: list
List of outcomes of running func(*arg) on each arg in arg_list.
Results are in the same order as the input arg_list.
"""
def display(range_obj):
if show_progress:
range_obj = tqdm(range_obj)
return range_obj
results = []
with ProcessPoolExecutor(max_workers=max_workers) as executor:
futures = []
for args in arg_lst:
futures.append(executor.submit(func, *args))
for future in display(futures):
data = future.result()
results.append(data)
return results
示例20
def load_adjacencylist(file_, undirected=False, chunksize=10000, unchecked=True):
if unchecked:
parse_func = parse_adjacencylist_unchecked
convert_func = from_adjlist_unchecked
else:
parse_func = parse_adjacencylist
convert_func = from_adjlist
adjlist = []
t0 = time()
with open(file_) as f:
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
total = 0
for idx, adj_chunk in enumerate(executor.map(parse_func, grouper(int(chunksize), f))):
adjlist.extend(adj_chunk)
total += len(adj_chunk)
t1 = time()
logger.info('Parsed {} edges with {} chunks in {}s'.format(total, idx, t1-t0))
t0 = time()
G = convert_func(adjlist)
t1 = time()
logger.info('Converted edges to graph in {}s'.format(t1-t0))
if undirected:
t0 = time()
G = G.make_undirected()
t1 = time()
logger.info('Made graph undirected in {}s'.format(t1-t0))
return G
示例21
def count_textfiles(files, workers=1):
c = Counter()
with ProcessPoolExecutor(max_workers=workers) as executor:
for c_ in executor.map(count_words, files):
c.update(c_)
return c
示例22
def write_walks_to_disk(G, filebase, num_paths, path_length, alpha=0, rand=random.Random(0), num_workers=cpu_count(),
always_rebuild=True):
global __current_graph
global __vertex2str
__current_graph = G
__vertex2str = {v:str(v) for v in G.nodes()}
files_list = ["{}.{}".format(filebase, str(x)) for x in xrange(num_paths)]
expected_size = len(G)
args_list = []
files = []
if num_paths <= num_workers:
paths_per_worker = [1 for x in range(num_paths)]
else:
paths_per_worker = [len(filter(lambda z: z!= None, [y for y in x]))
for x in graph.grouper(int(num_paths / num_workers)+1, range(1, num_paths+1))]
with ProcessPoolExecutor(max_workers=num_workers) as executor:
for size, file_, ppw in zip(executor.map(count_lines, files_list), files_list, paths_per_worker):
if always_rebuild or size != (ppw*expected_size):
args_list.append((ppw, path_length, alpha, random.Random(rand.randint(0, 2**31)), file_))
else:
files.append(file_)
with ProcessPoolExecutor(max_workers=num_workers) as executor:
for file_ in executor.map(_write_walks_to_disk, args_list):
files.append(file_)
return files
示例23
def load_adjacencylist(file_, undirected=False, chunksize=10000, unchecked=True):
if unchecked:
parse_func = parse_adjacencylist_unchecked
convert_func = from_adjlist_unchecked
else:
parse_func = parse_adjacencylist
convert_func = from_adjlist
adjlist = []
t0 = time()
with open(file_) as f:
with ProcessPoolExecutor(max_workers=cpu_count()) as executor:
total = 0
for idx, adj_chunk in enumerate(executor.map(parse_func, grouper(int(chunksize), f))):
adjlist.extend(adj_chunk)
total += len(adj_chunk)
t1 = time()
logger.info('Parsed {} edges with {} chunks in {}s'.format(total, idx, t1-t0))
t0 = time()
G = convert_func(adjlist)
t1 = time()
logger.info('Converted edges to graph in {}s'.format(t1-t0))
if undirected:
t0 = time()
G = G.make_undirected()
t1 = time()
logger.info('Made graph undirected in {}s'.format(t1-t0))
return G
示例24
def count_textfiles(files, workers=1):
c = Counter()
with ProcessPoolExecutor(max_workers=workers) as executor:
for c_ in executor.map(count_words, files):
c.update(c_)
return c
示例25
def preprocess_midi_files_under(midi_root, save_dir, num_workers):
midi_paths = list(utils.find_files_by_extensions(midi_root, ['.mid', '.midi']))
os.makedirs(save_dir, exist_ok=True)
out_fmt = '{}-{}.data'
results = []
executor = ProcessPoolExecutor(num_workers)
for path in midi_paths:
try:
results.append((path, executor.submit(preprocess_midi, path)))
except KeyboardInterrupt:
print(' Abort')
return
except:
print(' Error')
continue
for path, future in Bar('Processing').iter(results):
print(' ', end='[{}]'.format(path), flush=True)
name = os.path.basename(path)
code = hashlib.md5(path.encode()).hexdigest()
save_path = os.path.join(save_dir, out_fmt.format(name, code))
torch.save(future.result(), save_path)
print('Done')
示例26
def executor():
return ProcessPoolExecutor()
示例27
def _assert_urls_exists(urls):
with ProcessPoolExecutor(20) as ex:
it = ex.map(_url_status, urls, chunksize=10)
it = zip(
urls,
tqdm(it, total=len(urls), desc='checking urls'),
)
for i, (url, code) in enumerate(it):
assert code == 200, (url, code)
assert i == (len(urls) - 1)
示例28
def scan_more(urls, data=None, headers=None, encoding="UTF-8"):
"""批量扫描URL"""
scan = partial(scan_one, data=data, headers=headers, encoding=encoding)
with futures.ProcessPoolExecutor(max_workers=process) as executor:
results = list(executor.map(scan, urls))
示例29
def preprocess(args):
with open(args.config) as f:
config = yaml.load(f, Loader=yaml.FullLoader)
# Make directory if not exist
os.makedirs(args.output_dir, exist_ok=True)
print('')
print('[INFO] Root directory:', args.data_dir)
AP = AudioProcessor(**config['audio'])
executor = ProcessPoolExecutor(max_workers=args.n_jobs)
fid = []
text = []
wav = []
futures = []
with open(args.old_meta, encoding='utf-8') as f:
for line in f:
parts = line.strip().split('|')
fpath = os.path.join(args.data_dir, '%s.wav' % parts[0])
text = parts[2]
job = executor.submit(partial(process_utterance, fpath, text, args.output_dir, AP))
futures += [job]
print('[INFO] Preprocessing', end=' => ')
print(len(futures), 'audio files found')
results = [future.result() for future in tqdm(futures)]
fpath_meta = os.path.join(args.output_dir, 'ljspeech_meta.txt')
with open(fpath_meta, 'w') as f:
for x in results:
s = map(lambda x: str(x), x)
f.write('|'.join(s) + '\n')
示例30
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
'''Preprocesses the LJ Speech dataset from a given input path into a given output directory.
Args:
in_dir: The directory where you have downloaded the LJ Speech dataset
out_dir: The directory to write the output into
num_workers: Optional number of worker processes to parallelize across
tqdm: You can optionally pass tqdm to get a nice progress bar
Returns:
A list of tuples describing the training examples. This should be written to train.txt
'''
# We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you
# can omit it and just call _process_utterance on each input if you want.
executor = ProcessPoolExecutor(max_workers=num_workers)
futures = []
index = 1
with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
for line in f:
parts = line.strip().split('|')
wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
text = parts[2]
if not os.path.exists(wav_path):
continue
futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
index += 1
return [future.result() for future in tqdm(futures)]