diff --git a/barrOs.py b/barrOs.py index e934064fb3ef3a5eacabca5f2a20f6dc19faa37d..3d20bf738c8eaf8d51346c5d877d5ae252083878 100644 --- a/barrOs.py +++ b/barrOs.py @@ -9,7 +9,7 @@ number_of_jobs = 1 barros.print_hello() # Get inputs -input_files, input_types, input_mode, outf, complexes_only, multimer_only = barros.get_inputs(sys.argv) +input_files, input_types, input_mode, outf, complexes_only, multimer_only, delete = barros.get_inputs(sys.argv) # Check if inputs are correct in case we are not dealing with a 'help' check @@ -36,7 +36,7 @@ if __name__ == '__main__': # Prepare all parallel jobs and run the main barrOs method for each pdbid separated_jobs = barros.chunk_list(pdbIDs, number_of_jobs) - list_arguments = [i for i in zip(range(number_of_jobs), [input_mode for job in separated_jobs],[complexes_only for job in separated_jobs],[multimer_only for job in separated_jobs], separated_jobs)] + list_arguments = [i for i in zip(range(number_of_jobs), [input_mode for job in separated_jobs],[complexes_only for job in separated_jobs],[multimer_only for job in separated_jobs],[delete for job in separated_jobs], separated_jobs)] pool = mp.Pool(number_of_jobs) data = pool.map(barros.run_barros, list_arguments) diff --git a/barrOs_library.py b/barrOs_library.py index af455b18d6619464ce2ae0d2dc6d2468cbdc36df..44a3feb9d951bf6de1d2a6062e72a99ab3f81242 100644 --- a/barrOs_library.py +++ b/barrOs_library.py @@ -68,7 +68,8 @@ def print_help(): print(" -mode: \tdefines the mode of running, i.e. either we want to deal only with membrane proteins, all, or non-membrane proteins") print(" \tvalues accepted: {}".format(accepted_modes)) print(" -outputf: \tdefines the name of the output file") - print(" \tit is not mandatory. Default: BARRoS_results.csv") + print(" \tit is not mandatory. Default: BARRoS_results.csv") + print(" -nodelete: \tflag to define if files downloaded but without detected barrels should not be deleted. Default: False") def print_summary(input_files, input_types, input_mode): @@ -84,6 +85,7 @@ def get_inputs(argv): found_mode = False complexes_only = False multimer_only = False + delete = True input_files = [] input_types = [] input_type = 'nan' @@ -111,6 +113,8 @@ def get_inputs(argv): complexes_only = True elif '-multimer' in arg: multimer_only = True + elif '-nodelete' in arg: + delete = False if found_input and not found_mode: if input_type in accepted_input_types: @@ -122,7 +126,7 @@ def get_inputs(argv): elif not found_input and found_mode: input_mode = tmp_mode - return input_files, input_types, input_mode, output_file, complexes_only, multimer_only + return input_files, input_types, input_mode, output_file, complexes_only, multimer_only, delete ## 1.3. Functions to check if the inputs are correct @@ -2239,7 +2243,7 @@ def plot_parameter(x_col, y_col, df, saveto, fit_line = False): def run_barros(arguments, offset = 1, step = 2, local_angle_threshold = 25, distance_threshold = 5, max_loop_size = 0): - job_number, input_mode, complexes_only, multimer_only, in_queue = arguments + job_number, input_mode, complexes_only, multimer_only, delete, in_queue = arguments # create output files to save the sequences outfasta = "full_sequences_matched_pdbs_job{}.fasta".format(job_number) @@ -2303,7 +2307,6 @@ def run_barros(arguments, offset = 1, step = 2, local_angle_threshold = 25, dist pdb_file, protein_type, membrane_thickness = download_pdb(pdbID) pdbID, chainID = pdbID.split('_') - print(pdbID) if complexes_only and pdb_file != 'not available': chains_inpdb, pdb_file = get_chains_in_pdb(pdb_file) target_chains = [] @@ -2328,7 +2331,7 @@ def run_barros(arguments, offset = 1, step = 2, local_angle_threshold = 25, dist if pdb_file != 'not available' and protein_type != 'not available' and target_chains is not None: if input_mode != 'all': - if protein_type != input_mode: + if protein_type != input_mode and delete: os.system("rm {}".format(pdb_file)) #print(" ... ... pdbID '{}' is '{}' (different from '{}'). Will delete it!".format(pdbID, protein_type, input_mode)) deleted_it = True @@ -2454,11 +2457,19 @@ def run_barros(arguments, offset = 1, step = 2, local_angle_threshold = 25, dist outbb.write('{}\n'.format(barrel_seq)) else: print('... ... There is a barrel in {}_{} but it does not cross the membrane\n'.format(pdbID, chainID)) - os.system("rm {}*".format(pdb_file[:-4])) + if delete: + os.system("rm {}*".format(pdb_file[:-4])) else: print('... ... Not able to detect barrel topology for {}_{}\n'.format(pdbID, chainID)) - os.system("rm {}*".format(pdb_file[:-4])) - + if delete: + os.system("rm {}*".format(pdb_file[:-4])) + + else: + seqstruct, pdb_sequence, chains = get_secondary_structure(pdb_file) + + outbb.write('>NaN_TM_BARREL_{}_{}_{}\n'.format(protein_type, pdbID, chainID)) + outbb.write('{}\n'.format(pdb_sequence)) + else: print(" ... ... pdbID '{}_{}' impossible to get".format(pdbID, chainID))