Skip to content
Snippets Groups Projects
Commit 5094be70 authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Clean up unused code in snakemake/Snakefile. Add 2 threads in htseq_qa (paired...

Clean up unused code in snakemake/Snakefile. Add 2 threads in htseq_qa (paired end mode). Add example of tsv files from LabKey.
parent cb46aa34
No related branches found
No related tags found
1 merge request!4Subpipelines
......@@ -2,15 +2,6 @@ import pandas as pd
configfile: "config.yaml"
samples = pd.read_table(config['samples_table'], header=0, index_col=0, comment='#', engine='python')
samples['out_name'] = samples['Sample_name'] + samples['Library_Type']
localrules: finish
#################################################################################
......@@ -19,187 +10,11 @@ localrules: finish
rule finish:
input:
final_sample = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=samples['out_name'].values),
#fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]),
#htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]),
#gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]),
#bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"])
final_sample = expand()
##################################################################################
# Execution dependend on sequencing mode
##################################################################################
include: 'paired_end.snakefile'
include: 'single_end.snakefile'
##################################################################################
### Fastqc
##################################################################################
rule fastqc:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
output:
outdir = os.path.join(config["output_dir"], "{sample}", "fastqc")
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
os.path.join(config["local_log"], "fastqc_{sample}.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
--outdir {output.outdir} \
{input.reads}) &> {log}"
##################################################################################
### HTSeq quality assessment of the fastq file
##################################################################################
rule htseq_qa:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
output:
qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf")
singularity:
"docker://zavolab/python_htseq:3.6.5_0.10.0"
log:
os.path.join(config["local_log"], "htseq_qa_{sample}.log")
shell:
"(htseq-qa \
-t fastq \
-o {output.qual_pdf} \
{input.reads} ) &> {log}"
##################################################################################
### Map to other RNAs with Segemehl
##################################################################################
rule map_to_other_RNAs:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"),
index = config["other_RNAs_index"],
sequence = config["other_RNAs_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz")
params:
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"),
silent = "--silent",
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
singularity:
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {params.reads}; \
gzip -c {params.reads} > {output.reads}; \
rm {params.reads}) &> {log}"
##################################################################################
### salmon quant
##################################################################################
rule salmon_quant:
input:
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
gtf = config["annotation_filtered"],
index = config["salmon_index"]
output:
output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"),
gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"),
tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf")
params:
libType = lambda wildcards: config[wildcards.sample]['libType'],
fldMean = lambda wildcards: config[wildcards.sample]['fldMean'],
fldSD = lambda wildcards: config[wildcards.sample]['fldSD'],
log:
os.path.join(config["local_log"], "salmon_quant_{sample}.log")
threads: 6
singularity:
"docker://zavolab/salmon:0.11.0"
shell:
"(salmon quant \
--index {input.index} \
--libType {params.libType} \
--unmatedReads <(zcat {input.reads}) \
--seqBias \
--geneMap {input.gtf} \
--fldMean {params.fldMean} \
--fldSD {params.fldSD} \
--threads {threads} \
--output {output.output_dir}) &> {log}"
#################################################################################
### Align reads STAR
#################################################################################
rule align_reads_STAR:
input:
index = config["STAR_index"],
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
gtf = config["annotation"]
output:
outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
params:
outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_")
log:
os.path.join(config["local_log"],"align_reads_STAR_{sample}.log")
threads: 8
singularity:
"docker://zavolab/star:2.6.0a"
shell:
"(STAR --runMode alignReads \
--twopassMode Basic \
--runThreadN {threads} \
--genomeDir {input.index} \
--sjdbGTFfile {input.gtf} \
--readFilesIn {input.reads} \
--readFilesCommand zcat \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMtype BAM Unsorted) &> {log}"
################################################################################
### Sort alignment file
################################################################################
rule sort_bam:
input:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
output:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
threads: 8
log:
os.path.join(config["local_log"],"sort_bam_{sample}.log")
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}"
################################################################################
### Index alignment file
################################################################################
rule samtools_index:
input:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
output:
bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai")
log:
os.path.join(config["local_log"],"samtools_index_{sample}.log")
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools index {input.bam} > {output.bai}) &> {log}"
......@@ -27,12 +27,14 @@ rule htseq_qa:
output:
qual_pdf_mate1 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate1.pdf"),
qual_pdf_mate2 = os.path.join(config["output_dir"], "paired_end", "{sample}", "htseq_quality_mate2.pdf")
threads:
2
singularity:
"docker://zavolab/python_htseq:3.6.5_0.10.0"
log:
os.path.join(config["local_log"], "paired_end", "{sample}", "htseq_qa_.log")
shell:
"(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; \
"(htseq-qa -t fastq -o {output.qual_pdf_mate1} {input.reads1}; & \
htseq-qa -t fastq -o {output.qual_pdf_mate2} {input.reads2}; ) &> {log}"
......
Entry_Date Path_Fastq_Files Condition_Name Replicate_Name Single_Paired Mate1_File Mate2_File Mate1_Direction Mate2_Direction Mate1_5p_Adapter Mate1_3p_Adapter Mate2_5p_Adapter Mate2_3p_Adapter Fragment_Length_Mean Fragment_Length_SD Quality_Control_Flag Checksum_Raw_FASTQ_Mate1 Checksum_Raw_FASTQ_Mate2 File_Name_Metadata_File Name_Quality_Control_File_Mate1 Name_Quality_Control_File_Mate2 Organism TaxonID Strain_Isolate_Breed_Ecotype Strain_Isolate_Breed_Ecotype_ID Biomaterial_Provider Source_Tissue_Name Tissue_Code Additional_Tissue_Description Genotype_Short_Name Genotype_Description Disease_Short_Name Disease_Description Treatment_Short_Name Treatment_Description Gender Age Developmental_Stage Passage_Number Sample_Preparation_Date Prepared_By Documentation Protocol_File Sequencing_Date Sequencing_Instrument Library_preparation_kit Cycles Molecule Contaminant_Sequences BioAnalyzer_File
Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031355465-60677668 LN18C LN18C_rep1 PAIRED BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131557_HHK5FDRXX_1_7_1_LN18C_1_GAATGAGA_GAGGCATT_S1_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx
Fri Dec 20 00:00:00 CET 2019 /scicore/projects/openbis/userstore/biozentrum_zavolan/20191119031410069-60677669 LN18C LN18C_rep2 PAIRED BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R1_001_MM_1.fastq.gz BSSE_QGF_131558_HHK5FDRXX_1_7_2_LN18C_2_AGGCAGAG_AGAATGCC_S2_L001_R2_001_MM_1.fastq.gz ANTISENSE ANTISENSE AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300.0 100.0 xxx xxx xxx xxx xxx xxx Homo sapiens 9606 xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx xxx
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment