diff --git a/tacos.py b/tacos.py index 415088656b1a0005147f73dfdddb0710fe0a4111..1485db6510160d37631e03112c3aaecf30f5f1b4 100644 --- a/tacos.py +++ b/tacos.py @@ -2,10 +2,10 @@ import click import numpy as np import pandas as pd import itertools -import multiprocessing as mp +from multiprocessing import get_context import igraph from functools import partial - +import tqdm def my_hamming(seq1, seq2, missing='X', gap='-', ignore_gap=False, adjust=False): """Return the Hamming distance between two given sequence of characters. Sequences must have equal length. @@ -92,20 +92,28 @@ def get_distance_matrix(missing, gap, ignore_gap, adjust, ncores, type, input_, if ncores>1: click.echo('Starting pool with {} processes ...'.format(ncores)) - pool = mp.Pool(ncores) + pool = get_context("spawn").Pool(ncores) get_distance_partial = partial(get_distance_helper, data_df=positions_genomes, missing=missing, gap=gap, ignore_gap=ignore_gap, adjust=adjust) - distance_mp = pool.map(get_distance_partial, np.array_split(genome_pairs, ncores)) + genome_pairs_chunks = np.array_split(genome_pairs, ncores) + distance_mp = tqdm.tqdm(pool.imap_unordered(get_distance_partial, genome_pairs_chunks, chunksize=1), + total=len(genome_pairs_chunks)) pool.close() pool.join() + #with click.progressbar(distance_mp, + # length=len(genome_pairs_chunks), + # label='Computing pairwise distances') as bar: + # pair_distances = [] + # for item in bar: + # pair_distances.extend([pair_dist for pair_dist in item]) pair_distances = [pair_dist for item in distance_mp for pair_dist in item] del distance_mp else: pair_distances = [(g1, g2, my_hamming(positions_genomes.get(g1).values, positions_genomes.get(g2).values, missing, gap, ignore_gap, adjust)) - for g1,g2 in genome_pairs] + for g1,g2 in tqdm.tqdm(genome_pairs)] del positions_genomes @@ -115,6 +123,7 @@ def get_distance_matrix(missing, gap, ignore_gap, adjust, ncores, type, input_, genomes_names = genomes_graph.vs['name'] genomes_adj=genomes_graph.get_adjacency(type=np.int(type), attribute='weight') del genomes_graph + with click.open_file(output, 'w') as f: f.write('\t'.join(['']+genomes_names)) f.write('\n')