diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 0343922f47131004c41a1f723c922b8e01ba949b..e0b8f6f44b84ac3f5cc20a0d6e255c8b53fdc695 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -2,15 +2,6 @@ import pandas as pd configfile: "config.yaml" - - - -samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python') - -samples['out_name'] = samples['Sample_name'] + samples['Library_Type'] - - - localrules: finish ################################################################################# @@ -19,187 +10,11 @@ localrules: finish rule finish: input: - final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values), - - #fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]), - #htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]), - #gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]), - #bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"]) - + final_sample = expand() ################################################################################## # Execution dependend on sequencing mode ################################################################################## - include: 'paired_end.snakefile' include: 'single_end.snakefile' - - -################################################################################## -### Fastqc -################################################################################## - -rule fastqc: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") - output: - outdir = os.path.join(config["output_dir"], "{sample}", "fastqc") - singularity: - "docker://zavolab/fastqc:0.11.8" - log: - os.path.join(config["local_log"], "fastqc_{sample}.log") - shell: - "(mkdir -p {output.outdir}; \ - fastqc \ - --outdir {output.outdir} \ - {input.reads}) &> {log}" - -################################################################################## -### HTSeq quality assessment of the fastq file -################################################################################## - -rule htseq_qa: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") - output: - qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf") - singularity: - "docker://zavolab/python_htseq:3.6.5_0.10.0" - log: - os.path.join(config["local_log"], "htseq_qa_{sample}.log") - shell: - "(htseq-qa \ - -t fastq \ - -o {output.qual_pdf} \ - {input.reads} ) &> {log}" - -################################################################################## -### Map to other RNAs with Segemehl -################################################################################## - -rule map_to_other_RNAs: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"), - index = config["other_RNAs_index"], - sequence = config["other_RNAs_sequence"] - output: - sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"), - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz") - params: - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"), - silent = "--silent", - accuracy = "90" - log: - os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") - threads: 8 - singularity: - "docker://zavolab/segemehl:0.2.0" - shell: - "(segemehl.x \ - {params.silent} \ - -i {input.index} \ - -d {input.sequence} \ - -q {input.reads} \ - --accuracy {params.accuracy} \ - --threads {threads} \ - -o {output.sam} \ - -u {params.reads}; \ - gzip -c {params.reads} > {output.reads}; \ - rm {params.reads}) &> {log}" - -################################################################################## -### salmon quant -################################################################################## - -rule salmon_quant: - input: - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), - gtf = config["annotation_filtered"], - index = config["salmon_index"] - output: - output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"), - gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), - tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf") - params: - libType = lambda wildcards: config[wildcards.sample]['libType'], - fldMean = lambda wildcards: config[wildcards.sample]['fldMean'], - fldSD = lambda wildcards: config[wildcards.sample]['fldSD'], - log: - os.path.join(config["local_log"], "salmon_quant_{sample}.log") - threads: 6 - singularity: - "docker://zavolab/salmon:0.11.0" - shell: - "(salmon quant \ - --index {input.index} \ - --libType {params.libType} \ - --unmatedReads <(zcat {input.reads}) \ - --seqBias \ - --geneMap {input.gtf} \ - --fldMean {params.fldMean} \ - --fldSD {params.fldSD} \ - --threads {threads} \ - --output {output.output_dir}) &> {log}" - -################################################################################# -### Align reads STAR -################################################################################# - -rule align_reads_STAR: - input: - index = config["STAR_index"], - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), - gtf = config["annotation"] - output: - outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") - params: - outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_") - log: - os.path.join(config["local_log"],"align_reads_STAR_{sample}.log") - threads: 8 - singularity: - "docker://zavolab/star:2.6.0a" - shell: - "(STAR --runMode alignReads \ - --twopassMode Basic \ - --runThreadN {threads} \ - --genomeDir {input.index} \ - --sjdbGTFfile {input.gtf} \ - --readFilesIn {input.reads} \ - --readFilesCommand zcat \ - --outFileNamePrefix {params.outFileNamePrefix} \ - --outSAMtype BAM Unsorted) &> {log}" - -################################################################################ -### Sort alignment file -################################################################################ - -rule sort_bam: - input: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") - output: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") - threads: 8 - log: - os.path.join(config["local_log"],"sort_bam_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}" - -################################################################################ -### Index alignment file -################################################################################ - -rule samtools_index: - input: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") - output: - bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai") - log: - os.path.join(config["local_log"],"samtools_index_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools index {input.bam} > {output.bai}) &> {log}" diff --git a/snakemake/paired_end.snakemake b/snakemake/paired_end.snakemake index 820a59a6b47b4b6c14be39b4d5aa318cdb0e0e05..b9443eb12d5fd7583fe039737ea65b8d23df9b00 100644 --- a/snakemake/paired_end.snakemake +++ b/snakemake/paired_end.snakemake @@ -27,12 +27,14 @@ rule htseq_qa: output: qual_pdf_mate1 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate1.pdf"), qual_pdf_mate2 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate2.pdf") + threads: + 2 singularity: "docker://zavolab/python_htseq:3.6.5_0.10.0" log: os.path.join(config["local_log"], "paired_end", "{sample}", "htseq_qa_.log") shell: - "(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; \ + "(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; & \ htseq-qa -t fastq -o {output.qual_pdf_mate2} {input.reads2}; ) &> {log}" diff --git a/tests/RNA_Seq_data_template_test.tsv b/tests/RNA_Seq_data_template_test.tsv new file mode 100644 index 0000000000000000000000000000000000000000..34c7bb10db59c7ca9d693ded26e5cf1b5ab79b1d --- /dev/null +++ b/tests/RNA_Seq_data_template_test.tsv @@ -0,0 +1,3 @@ +Entry_Date Path_Fastq_Files Condition_Name Replicate_Name Single_Paired Mate1_File Mate2_File Mate1_Direction Mate2_Direction Mate1_5p_Adapter Mate1_3p_Adapter Mate2_5p_Adapter Mate2_3p_Adapter Fragment_Length_Mean Fragment_Length_SD Quality_Control_Flag Checksum_Raw_FASTQ_Mate1 Checksum_Raw_FASTQ_Mate2 File_Name_Metadata_File Name_Quality_Control_File_Mate1 Name_Quality_Control_File_Mate2 Organism TaxonID Strain_Isolate_Breed_Ecotype Strain_Isolate_Breed_Ecotype_ID Biomaterial_Provider Source_Tissue_Name Tissue_Code Additional_Tissue_Description Genotype_Short_Name Genotype_Description Disease_Short_Name Disease_Description Treatment_Short_Name Treatment_Description Gender Age Developmental_Stage Passage_Number Sample_Preparation_Date Prepared_By Documentation Protocol_File Sequencing_Date Sequencing_Instrument Library_preparation_kit Cycles Molecule Contaminant_Sequences BioAnalyzer_File +Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031355465-60677668 LN18C LN18C_rep1 PAIRED BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx +Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031410069-60677669 LN18C LN18C_rep2 PAIRED BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx