diff --git a/assembly/README.md b/assembly/README.md index 670400b106a079dbb8b5fc99d08142e88af4e9c9..e5a56cc070f00d9b491176b421958cdff23a9831 100755 --- a/assembly/README.md +++ b/assembly/README.md @@ -8,7 +8,7 @@ The genome assembly workflow includes the following tools/steps: - [minimap2](https://doi.org/10.1093/bioinformatics/bty191): Map the long reads back against the assembly. The resulting alignments can be used to check for inconsistencies between reads and assemblies. -# Run the pipeline +# Run the pipeline on sciCORE The user needs to provide two things to run the workflow on her samples: - a config file with some global options for the analysis - a tab separate table, without header, that contains the sample names and the corresponding paths to the HiFi consensus reads. @@ -29,6 +29,7 @@ ref: gbf: resources/H37Rv.gbf # Used for bakta annotation step bakta_db: resources/bakta_db # Used for bakta annotation step +container: containers/assemblySMK.sif # Singularity container containing all reuquired software threads_per_job: 4 # Should match cpus-per-task in the snakemake command diff --git a/assembly/run_assembly_pipeline.py b/assembly/run_assembly_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..0c4985ce6a8ee28c8c870553f88397e4cdecbd42 --- /dev/null +++ b/assembly/run_assembly_pipeline.py @@ -0,0 +1,67 @@ + +import argparse +import os +import yaml + +def get_args(): + + parser = argparse.ArgumentParser( + description='') + + parser.add_argument( + '-c', '--configfile', + dest='config', + required=True, + help='.' + ) + + parser.add_argument( + '-j', '--njobs', + dest="win_size", + required=True, type=int, + help='Window size.' + ) + + parser.add_argument( + '-t', + dest='threads', + type=int, default=0, + help='Threads per job.' + ) + + args=parser.parse_args() + return args + + +def main(): + + args = get_args() + + with open(args.config, 'r') as file: + config = yaml.safe_load(file) + + + # Infer pipeline location from path of run_assembly_pipeline.py + + + + + cmd = [ + "snakemake", + "--profile", "", + "--snakefile", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly/workflow/Snakefile", + "--directory", "/scicore/home/gagneux/GROUP/PacbioSnake/assembly", + "--configfile", "/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/config.yml", + "--jobs", "4", + "--latency-wait", "60", + "--cleanup-shadow", + "--shadow-prefix", + "--verbose", + "--use-singularity", "--singularity-args", "--bind /scicore/home/gagneux/GROUP/tbresearch/genomes/IN_PROGRESS/PacBio_genomes/Gagneux --bind /scicore/home/gagneux/stritt0001 --bind /scratch", + "--cluster", "sbatch --job-name=pbassembly --cpus-per-task=4 --mem-per-cpu=4G --time=06:00:00 --qos=6hours --output=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.o%j --error=/scicore/home/gagneux/stritt0001/TB/projects/pacbio_microscale/results/demo/pbassembly.e%j" + ] + + os.system(" ".join(cmd)) + +if __name__ == '__main__': + main()