diff --git a/cluster/config.yaml b/cluster/config.yaml index e4ec8291c8c7c987c4632272466c1ea09d471d18..381cd8c5622a0102833b07cebbfc052489dedbb4 100644 --- a/cluster/config.yaml +++ b/cluster/config.yaml @@ -16,7 +16,7 @@ default-resources: restart-times: 3 max-jobs-per-second: 10 max-status-checks-per-second: 1 -local-cores: 1 +local-cores: 20 latency-wait: 60 jobs: 500 keep-going: True diff --git a/config/config.yaml b/config/config.yaml index eb11d3bea3047d9be05641541f294eb53ad152b0..7feb5f7ba4240674bb812ef322e873fc7de5c39c 100755 --- a/config/config.yaml +++ b/config/config.yaml @@ -2,8 +2,10 @@ # ############################## -samples: config/samples.tsv -outdir: ./results +samples: config/samples.tsv # overwritten by run_assembly_pipeline.py +outdir: ./results # overwritten by run_assembly_pipeline.py + +annotate: "No" ref: genome_size: 4.4m @@ -12,7 +14,7 @@ ref: bakta_db: /scicore/home/gagneux/GROUP/PacbioSnake_resources/databases/bakta_db container: /scicore/home/gagneux/GROUP/PacbioSnake_resources/containers/assemblySC.sif -threads_per_job: 4 +threads_per_job: 10 # Max. 20 assembly_iterations: 3 diff --git a/workflow/rules/circularize.smk b/workflow/rules/circularize.smk index 89f2f783376deb23fa141fdaddd49b69c669e514..94d3a62f95204b94689d88a6d848ec0679db847d 100755 --- a/workflow/rules/circularize.smk +++ b/workflow/rules/circularize.smk @@ -24,8 +24,27 @@ rule circlator_bam2reads: """ + +rule circlator_removeduplicates: + input: config["outdir"] +"/{sample}/circlator/02.bam2reads.fasta" + output: config["outdir"] +"/{sample}/circlator/02.bam2reads.nodup.fasta" + run: + + import sys + from Bio import SeqIO + + record_dict = {} + + for record in SeqIO.parse(input[0], "fasta"): + record_dict[record.id] = record + + # record_dict = SeqIO.to_dict(SeqIO.parse(input[0], "fasta")) # Does not allow duplicate entries... + with open(output[0], "w") as output_handle: + SeqIO.write(record_dict.values(), output_handle, "fasta") + + rule circlator_localassembly: - input: config["outdir"] + "/{sample}/circlator/02.bam2reads.fasta" + input: config["outdir"] + "/{sample}/circlator/02.bam2reads.nodup.fasta" output: config["outdir"] + "/{sample}/circlator/03.assemble/assembly.fasta" params: outdir = config["outdir"] + "/{sample}/circlator/03.assemble",