From cd2052dd15a3b559618afcaafd15e875a0f4acaf Mon Sep 17 00:00:00 2001 From: BIOPZ-Herrmann Christina <christina.herrmann@unibas.ch> Date: Fri, 13 Dec 2019 12:04:36 +0100 Subject: [PATCH] Started working on subpipelines --- prepare_annotation/Snakefile | 24 +-------------------- process_data/Snakefile | 31 ++++++++++++++++++++++++---- process_data/config.yaml | 3 +++ process_data/preprocessing.snakefile | 28 +++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 27 deletions(-) create mode 100644 process_data/preprocessing.snakefile diff --git a/prepare_annotation/Snakefile b/prepare_annotation/Snakefile index efd220e..94c9db7 100644 --- a/prepare_annotation/Snakefile +++ b/prepare_annotation/Snakefile @@ -110,29 +110,7 @@ rule generate_segemehl_index_other_RNAs: ### Index genome STAR ################################################################################# -rule index_genome_STAR: - input: - genome = os.path.join(config["output_dir"], "genome.fa"), - annotation = os.path.join(config["output_dir"], "annotation.gtf") - output: - output = os.path.join(config["output_dir"], "STAR_index") - params: - outputdir = os.path.join(config["output_dir"],"STAR_index"), - sjdbOverhang = config["sjdbOverhang"] - threads: 8 - singularity: - "docker://zavolab/star:2.6.0a" - log: - os.path.join(config["local_log"],"index_genome_STAR.log") - shell: - "mkdir -p {output.output}; \ - chmod -R 777 {output.output}; \ - (STAR --runMode genomeGenerate \ - --sjdbOverhang {params.sjdbOverhang} \ - --genomeDir {params.outputdir} \ - --genomeFastaFiles {input.genome} \ - --runThreadN {threads} \ - --sjdbGTFfile {input.annotation}) &> {log}" + ################################################################################## ### Filter protein coding and lncRNA transcripts diff --git a/process_data/Snakefile b/process_data/Snakefile index edd9baa..0343922 100644 --- a/process_data/Snakefile +++ b/process_data/Snakefile @@ -1,5 +1,16 @@ +import pandas as pd + configfile: "config.yaml" + + + +samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python') + +samples['out_name'] = samples['Sample_name'] + samples['Library_Type'] + + + localrules: finish ################################################################################# @@ -8,10 +19,22 @@ localrules: finish rule finish: input: - fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]), - htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]), - gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]), - bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"]) + final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values), + + #fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]), + #htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]), + #gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]), + #bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"]) + + +################################################################################## +# Execution dependend on sequencing mode +################################################################################## + + +include: 'paired_end.snakefile' +include: 'single_end.snakefile' + ################################################################################## ### Fastqc diff --git a/process_data/config.yaml b/process_data/config.yaml index d2c34ba..65bdfa9 100644 --- a/process_data/config.yaml +++ b/process_data/config.yaml @@ -2,6 +2,7 @@ ############################################################################## ### Annotation ############################################################################## + organism: "Homo_sapiens" annotation: "../prepare_annotation/results/annotation.gtf" genome: "../prepare_annotation/results/genome.fa" annotation_filtered: "../prepare_annotation/results/filtered_transcripts.gtf" @@ -12,6 +13,8 @@ ############################################################################## ### Output and log directories ############################################################################## + database_path: "/scicore/home/zavolan/GROUP/Rna_Seq_pipeline/Blabla" + STAR_idx_folder: "STAR_indices" output_dir: "results" local_log: "logs/local_log" cluster_log: "logs/cluster_log" diff --git a/process_data/preprocessing.snakefile b/process_data/preprocessing.snakefile new file mode 100644 index 0000000..54d1d32 --- /dev/null +++ b/process_data/preprocessing.snakefile @@ -0,0 +1,28 @@ + + +rule index_genome_STAR: + ''' + Create Star index + ''' + input: + genome = os.path.join(config["output_dir"], "genome.fa"), + annotation = os.path.join(config["output_dir"], "annotation.gtf") + output: + output = os.path.join(config["database_path"], config['organism'], config['STAR_idx_folder], "STAR_index" + {sjdb}) + params: + outputdir = os.path.join(config["output_dir"],"STAR_index"), + sjdb = lambda wildcards: samples.loc['sjdb'] + threads: 8 + singularity: + "docker://zavolab/star:2.6.0a" + log: + os.path.join(config["local_log"],"index_genome_STAR.log") + shell: + "mkdir -p {output.output}; \ + chmod -R 777 {output.output}; \ + (STAR --runMode genomeGenerate \ + --sjdbOverhang {params.sjdbOverhang} \ + --genomeDir {params.outputdir} \ + --genomeFastaFiles {input.genome} \ + --runThreadN {threads} \ + --sjdbGTFfile {input.annotation}) &> {log}" \ No newline at end of file -- GitLab