Skip to content
Snippets Groups Projects
Commit 52c1de75 authored by BIOPZ-Gypas Foivos's avatar BIOPZ-Gypas Foivos
Browse files

Addition of basic steps for process data

parent 609db153
No related branches found
No related tags found
No related merge requests found
download_annotation
process_data
.*
Log.out
results
logs
dag.png
nohup.out
samples
configfile: "config.yaml"
localrules: finish
#################################################################################
### Final rule
#################################################################################
rule finish:
input:
fastqc = expand(os.path.join(config["output_dir"], "{sample}", "fastqc"), sample=config["sample"]),
htseq_qa = expand(os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf"), sample=config["sample"]),
gn_estimates = expand(os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"), sample=config["sample"]),
bam = expand(os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam"), sample=config["sample"])
##################################################################################
### Fastqc
##################################################################################
rule fastqc:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
output:
outdir = os.path.join(config["output_dir"], "{sample}", "fastqc")
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
os.path.join(config["local_log"], "fastqc_{sample}.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
--outdir {output.outdir} \
{input.reads}) &> {log}"
##################################################################################
### HTSeq quality assessment of the fastq file
##################################################################################
rule htseq_qa:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz")
output:
qual_pdf = os.path.join(config["output_dir"], "{sample}", "htseq_qa", "htseq_quality.pdf")
singularity:
"docker://zavolab/python_htseq:3.6.5_0.10.0"
log:
os.path.join(config["local_log"], "htseq_qa_{sample}.log")
shell:
"(htseq-qa \
-t fastq \
-o {output.qual_pdf} \
{input.reads} ) &> {log}"
##################################################################################
### Map to other RNAs with Segemehl
##################################################################################
rule map_to_other_RNAs:
input:
reads = os.path.join(config["input_dir"], "{sample}.fastq.gz"),
index = config["other_RNAs_index"],
sequence = config["other_RNAs_sequence"]
output:
sam = os.path.join(config["output_dir"], "{sample}", "other_genes.mapped.sam"),
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz")
params:
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq"),
silent = "--silent",
accuracy = "90"
log:
os.path.join(config["local_log"], "map_to_other_genes_{sample}.log")
threads: 8
singularity:
"docker://zavolab/segemehl:0.2.0"
shell:
"(segemehl.x \
{params.silent} \
-i {input.index} \
-d {input.sequence} \
-q {input.reads} \
--accuracy {params.accuracy} \
--threads {threads} \
-o {output.sam} \
-u {params.reads}; \
gzip -c {params.reads} > {output.reads}; \
rm {params.reads}) &> {log}"
##################################################################################
### salmon quant
##################################################################################
rule salmon_quant:
input:
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
gtf = config["annotation_filtered"],
index = config["salmon_index"]
output:
output_dir = os.path.join(config["output_dir"], "{sample}", "salmon_quant"),
gn_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.genes.sf"),
tr_estimates = os.path.join(config["output_dir"], "{sample}", "salmon_quant", "quant.sf")
params:
libType = lambda wildcards: config[wildcards.sample]['libType'],
fldMean = lambda wildcards: config[wildcards.sample]['fldMean'],
fldSD = lambda wildcards: config[wildcards.sample]['fldSD'],
log:
os.path.join(config["local_log"], "salmon_quant_{sample}.log")
threads: 6
singularity:
"docker://zavolab/salmon:0.11.0"
shell:
"(salmon quant \
--index {input.index} \
--libType {params.libType} \
--unmatedReads <(zcat {input.reads}) \
--seqBias \
--geneMap {input.gtf} \
--fldMean {params.fldMean} \
--fldSD {params.fldSD} \
--threads {threads} \
--output {output.output_dir}) &> {log}"
#################################################################################
### Align reads STAR
#################################################################################
rule align_reads_STAR:
input:
index = config["STAR_index"],
reads = os.path.join(config["output_dir"], "{sample}", "other_genes.unmapped.fastq.gz"),
gtf = config["annotation"]
output:
outputfile = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
params:
outFileNamePrefix = os.path.join(config["output_dir"], "{sample}", "STAR_")
log:
os.path.join(config["local_log"],"align_reads_STAR_{sample}.log")
threads: 8
singularity:
"docker://zavolab/star:2.6.0a"
shell:
"(STAR --runMode alignReads \
--twopassMode Basic \
--runThreadN {threads} \
--genomeDir {input.index} \
--sjdbGTFfile {input.gtf} \
--readFilesIn {input.reads} \
--readFilesCommand zcat \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMtype BAM Unsorted) &> {log}"
################################################################################
### Sort alignment file
################################################################################
rule sort_bam:
input:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.bam")
output:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
threads: 8
log:
os.path.join(config["local_log"],"sort_bam_{sample}.log")
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools sort -@ {threads} {input.bam} > {output.bam}) &> {log}"
################################################################################
### Index alignment file
################################################################################
rule samtools_index:
input:
bam = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam")
output:
bai = os.path.join(config["output_dir"], "{sample}", "STAR_Aligned.out.sorted.bam.bai")
log:
os.path.join(config["local_log"],"samtools_index_{sample}.log")
singularity:
"docker://zavolab/samtools:1.8"
shell:
"(samtools index {input.bam} > {output.bai}) &> {log}"
{
"__default__" :
{
"queue": "6hours",
"time": "01:00:00",
"threads": "1",
"mem": "4G",
"name": "{rule}.{wildcards}",
"out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out"
},
"generate_segemehl_index_other_RNAs":
{
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"map_to_other_RNAs":
{
"time": "06:00:00",
"threads":"8",
"mem":"50G"
},
"index_genome_STAR":
{
"time": "06:00:00",
"threads":"8",
"mem":"75G"
},
"align_reads_STAR":
{
"time": "06:00:00",
"threads":"8",
"mem":"75G"
},
"salmon_quant":
{
"time": "02:00:00",
"threads":"6",
"mem":"32G"
}
}
---
##############################################################################
### Annotation
##############################################################################
annotation: "../prepare_annotation/results/annotation.gtf"
genome: "../prepare_annotation/results/genome.fa"
annotation_filtered: "../prepare_annotation/results/filtered_transcripts.gtf"
STAR_index: "../prepare_annotation/results/STAR_index/"
other_RNAs_sequence: "../prepare_annotation/other.fa"
other_RNAs_index: "../prepare_annotation/results/other_RNAs_sequence.idx"
salmon_index: "../prepare_annotation/results/filtered_transcripts_salmon.idx/"
##############################################################################
### Output and log directories
##############################################################################
output_dir: "results"
local_log: "logs/local_log"
cluster_log: "logs/cluster_log"
##############################################################################
### Sample info
##############################################################################
input_dir: "samples"
sample: ["test"]
test: {libType: A, fldMean: 300, fldSD: 100}
...
snakemake --dag -np | dot -Tpng > dag.png
# set -e
mkdir -p logs/cluster_log
mkdir -p logs/local_log
snakemake \
--cluster-config cluster.json \
--cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \
--cores 256 \
-p \
--rerun-incomplete \
--use-singularity \
--singularity-args "--bind ${PWD}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment