From 5094be702ba7a900b4dff7c45bb785f65b04df24 Mon Sep 17 00:00:00 2001 From: fgypas <fgypas@gmail.com> Date: Fri, 20 Dec 2019 16:13:49 +0100 Subject: [PATCH] Clean up unused code in snakemake/Snakefile. Add 2 threads in htseq_qa (paired end mode). Add example of tsv files from LabKey. --- snakemake/Snakefile | 187 +-------------------------- snakemake/paired_end.snakemake | 4 +- tests/RNA_Seq_data_template_test.tsv | 3 + 3 files changed, 7 insertions(+), 187 deletions(-) create mode 100644 tests/RNA_Seq_data_template_test.tsv diff --git a/snakemake/Snakefile b/snakemake/Snakefile index 0343922..e0b8f6f 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -2,15 +2,6 @@ import pandas as pd configfile: "config.yaml" - - - -samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python') - -samples['out_name'] = samples['Sample_name'] + samples['Library_Type'] - - - localrules: finish ################################################################################# @@ -19,187 +10,11 @@ localrules: finish rule finish: input: - final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values), - - #fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]), - #htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]), - #gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]), - #bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"]) - + final_sample = expand() ################################################################################## # Execution dependend on sequencing mode ################################################################################## - include: 'paired_end.snakefile' include: 'single_end.snakefile' - - -################################################################################## -### Fastqc -################################################################################## - -rule fastqc: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") - output: - outdir = os.path.join(config["output_dir"], "{sample}", "fastqc") - singularity: - "docker://zavolab/fastqc:0.11.8" - log: - os.path.join(config["local_log"], "fastqc_{sample}.log") - shell: - "(mkdir -p {output.outdir}; \ - fastqc \ - --outdir {output.outdir} \ - {input.reads}) &> {log}" - -################################################################################## -### HTSeq quality assessment of the fastq file -################################################################################## - -rule htseq_qa: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz") - output: - qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf") - singularity: - "docker://zavolab/python_htseq:3.6.5_0.10.0" - log: - os.path.join(config["local_log"], "htseq_qa_{sample}.log") - shell: - "(htseq-qa \ - -t fastq \ - -o {output.qual_pdf} \ - {input.reads} ) &> {log}" - -################################################################################## -### Map to other RNAs with Segemehl -################################################################################## - -rule map_to_other_RNAs: - input: - reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"), - index = config["other_RNAs_index"], - sequence = config["other_RNAs_sequence"] - output: - sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"), - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz") - params: - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"), - silent = "--silent", - accuracy = "90" - log: - os.path.join(config["local_log"], "map_to_other_genes_{sample}.log") - threads: 8 - singularity: - "docker://zavolab/segemehl:0.2.0" - shell: - "(segemehl.x \ - {params.silent} \ - -i {input.index} \ - -d {input.sequence} \ - -q {input.reads} \ - --accuracy {params.accuracy} \ - --threads {threads} \ - -o {output.sam} \ - -u {params.reads}; \ - gzip -c {params.reads} > {output.reads}; \ - rm {params.reads}) &> {log}" - -################################################################################## -### salmon quant -################################################################################## - -rule salmon_quant: - input: - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), - gtf = config["annotation_filtered"], - index = config["salmon_index"] - output: - output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"), - gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), - tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf") - params: - libType = lambda wildcards: config[wildcards.sample]['libType'], - fldMean = lambda wildcards: config[wildcards.sample]['fldMean'], - fldSD = lambda wildcards: config[wildcards.sample]['fldSD'], - log: - os.path.join(config["local_log"], "salmon_quant_{sample}.log") - threads: 6 - singularity: - "docker://zavolab/salmon:0.11.0" - shell: - "(salmon quant \ - --index {input.index} \ - --libType {params.libType} \ - --unmatedReads <(zcat {input.reads}) \ - --seqBias \ - --geneMap {input.gtf} \ - --fldMean {params.fldMean} \ - --fldSD {params.fldSD} \ - --threads {threads} \ - --output {output.output_dir}) &> {log}" - -################################################################################# -### Align reads STAR -################################################################################# - -rule align_reads_STAR: - input: - index = config["STAR_index"], - reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"), - gtf = config["annotation"] - output: - outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") - params: - outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_") - log: - os.path.join(config["local_log"],"align_reads_STAR_{sample}.log") - threads: 8 - singularity: - "docker://zavolab/star:2.6.0a" - shell: - "(STAR --runMode alignReads \ - --twopassMode Basic \ - --runThreadN {threads} \ - --genomeDir {input.index} \ - --sjdbGTFfile {input.gtf} \ - --readFilesIn {input.reads} \ - --readFilesCommand zcat \ - --outFileNamePrefix {params.outFileNamePrefix} \ - --outSAMtype BAM Unsorted) &> {log}" - -################################################################################ -### Sort alignment file -################################################################################ - -rule sort_bam: - input: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam") - output: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") - threads: 8 - log: - os.path.join(config["local_log"],"sort_bam_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}" - -################################################################################ -### Index alignment file -################################################################################ - -rule samtools_index: - input: - bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam") - output: - bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai") - log: - os.path.join(config["local_log"],"samtools_index_{sample}.log") - singularity: - "docker://zavolab/samtools:1.8" - shell: - "(samtools index {input.bam} > {output.bai}) &> {log}" diff --git a/snakemake/paired_end.snakemake b/snakemake/paired_end.snakemake index 820a59a..b9443eb 100644 --- a/snakemake/paired_end.snakemake +++ b/snakemake/paired_end.snakemake @@ -27,12 +27,14 @@ rule htseq_qa: output: qual_pdf_mate1 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate1.pdf"), qual_pdf_mate2 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate2.pdf") + threads: + 2 singularity: "docker://zavolab/python_htseq:3.6.5_0.10.0" log: os.path.join(config["local_log"], "paired_end", "{sample}", "htseq_qa_.log") shell: - "(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; \ + "(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; & \ htseq-qa -t fastq -o {output.qual_pdf_mate2} {input.reads2}; ) &> {log}" diff --git a/tests/RNA_Seq_data_template_test.tsv b/tests/RNA_Seq_data_template_test.tsv new file mode 100644 index 0000000..34c7bb1 --- /dev/null +++ b/tests/RNA_Seq_data_template_test.tsv @@ -0,0 +1,3 @@ +Entry_Date Path_Fastq_Files Condition_Name Replicate_Name Single_Paired Mate1_File Mate2_File Mate1_Direction Mate2_Direction Mate1_5p_Adapter Mate1_3p_Adapter Mate2_5p_Adapter Mate2_3p_Adapter Fragment_Length_Mean Fragment_Length_SD Quality_Control_Flag Checksum_Raw_FASTQ_Mate1 Checksum_Raw_FASTQ_Mate2 File_Name_Metadata_File Name_Quality_Control_File_Mate1 Name_Quality_Control_File_Mate2 Organism TaxonID Strain_Isolate_Breed_Ecotype Strain_Isolate_Breed_Ecotype_ID Biomaterial_Provider Source_Tissue_Name Tissue_Code Additional_Tissue_Description Genotype_Short_Name Genotype_Description Disease_Short_Name Disease_Description Treatment_Short_Name Treatment_Description Gender Age Developmental_Stage Passage_Number Sample_Preparation_Date Prepared_By Documentation Protocol_File Sequencing_Date Sequencing_Instrument Library_preparation_kit Cycles Molecule Contaminant_Sequences BioAnalyzer_File +Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031355465-60677668 LN18C LN18C_rep1 PAIRED BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx +Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031410069-60677669 LN18C LN18C_rep2 PAIRED BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx -- GitLab