From d18d5994709595b4396513a79dbebff1368c92cf Mon Sep 17 00:00:00 2001 From: BIOPZ-Gypas Foivos <foivos.gypas@unibas.ch> Date: Wed, 28 Nov 2018 19:48:39 +0100 Subject: [PATCH] Creation of bed file for the CDS coordinates --- snakemake/prepare_annotation/Snakefile | 22 ++++++++++++++++++++-- snakemake/process_data/Snakefile | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/snakemake/prepare_annotation/Snakefile b/snakemake/prepare_annotation/Snakefile index 7240fe2..5320edf 100644 --- a/snakemake/prepare_annotation/Snakefile +++ b/snakemake/prepare_annotation/Snakefile @@ -1,6 +1,6 @@ configfile: "config.yaml" -localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, finish +localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish ################################################################################# ### Finish rule @@ -10,7 +10,7 @@ rule finish: input: idx_other = os.path.join(config["output_dir"], "other_RNAs_sequence.idx"), idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"), - tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") + bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") ################################################################################# ### Create output and log directories @@ -115,6 +115,24 @@ rule create_tab_delimited_CDS_file: --fasta {input.transcripts} \ --out {output.tsv}) &> {log}" +################################################################################# +### BED CDS table +################################################################################# + +rule create_bed_CDS_file: + input: + tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") + output: + bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") + params: + cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log") + log: + os.path.join(config["local_log"], "create_bed_CDS_file.log") + # singularity: + # "docker://zavolab/python_htseq_biopython:3.6.5_0.10.0_1.71" + shell: + "(tail -n+2 {input.tsv} | awk \'{{print $1 \"\t\" $3-1 \"\t\" $4 \"\t\" $2 }}\' > {output.bed}) &> {log}" + ################################################################################# ### Generate segemehl index for transcripts ################################################################################# diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index d74b958..a1db044 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -233,7 +233,7 @@ rule remove_multimappers: rule sam2bam_sort_and_index: input: - sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam") + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam.bai") -- GitLab