diff --git a/snakemake/prepare_annotation/Snakefile b/snakemake/prepare_annotation/Snakefile index 7240fe26b0a4d849d2cf87b9d1858b2cc10ab9eb..5320edf13f98b35f8ece36533e03f6b27f4e697d 100644 --- a/snakemake/prepare_annotation/Snakefile +++ b/snakemake/prepare_annotation/Snakefile @@ -1,6 +1,6 @@ configfile: "config.yaml" -localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, finish +localrules: create_output_and_log_directories, create_tab_delimited_CDS_file, create_bed_CDS_file, finish ################################################################################# ### Finish rule @@ -10,7 +10,7 @@ rule finish: input: idx_other = os.path.join(config["output_dir"], "other_RNAs_sequence.idx"), idx_transcripts = os.path.join(config["output_dir"], "longest_pc_transcript_per_gene.idx"), - tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") + bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") ################################################################################# ### Create output and log directories @@ -115,6 +115,24 @@ rule create_tab_delimited_CDS_file: --fasta {input.transcripts} \ --out {output.tsv}) &> {log}" +################################################################################# +### BED CDS table +################################################################################# + +rule create_bed_CDS_file: + input: + tsv = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.tsv") + output: + bed = os.path.join(config["output_dir"], "transcript_id_gene_id_CDS.bed") + params: + cluster_log = os.path.join(config["cluster_log"], "create_bed_CDS_file.log") + log: + os.path.join(config["local_log"], "create_bed_CDS_file.log") + # singularity: + # "docker://zavolab/python_htseq_biopython:3.6.5_0.10.0_1.71" + shell: + "(tail -n+2 {input.tsv} | awk \'{{print $1 \"\t\" $3-1 \"\t\" $4 \"\t\" $2 }}\' > {output.bed}) &> {log}" + ################################################################################# ### Generate segemehl index for transcripts ################################################################################# diff --git a/snakemake/process_data/Snakefile b/snakemake/process_data/Snakefile index d74b9588097051464cd5df317d3189d574d6ba02..a1db0443509f0d41f70ab65a1750df71cdadec1c 100644 --- a/snakemake/process_data/Snakefile +++ b/snakemake/process_data/Snakefile @@ -233,7 +233,7 @@ rule remove_multimappers: rule sam2bam_sort_and_index: input: - sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.sam") + sam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sam") output: bam = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam"), bai = os.path.join(config["output_dir"], "{sample}/transcripts.mapped.unique.sorted.bam.bai")