Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Z
ZARP
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
zavolan_group
pipelines
ZARP
Commits
0b6e541f
Commit
0b6e541f
authored
5 years ago
by
BIOPZ-Iborra de Toledo Paula
Browse files
Options
Downloads
Patches
Plain Diff
Single end sequencing analysis subpipeline
parent
cd2052dd
No related branches found
No related tags found
1 merge request
!4
Subpipelines
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
process_data/single_end.snakefile
+378
-0
378 additions, 0 deletions
process_data/single_end.snakefile
with
378 additions
and
0 deletions
process_data/single_end.snakefile
0 → 100644
+
378
−
0
View file @
0b6e541f
rule fastqc:
''' A quality control tool for high throughput sequence data. '''
input:
reads = os.path.join(get_fq_1("{sample}"))
output:
outdir = directory(os.path.join(config["output_dir"], "single_end", "{sample}", "fastqc"))
singularity:
"docker://zavolab/fastqc:0.11.8"
log:
os.path.join(config["local_log"], "single_end", "{sample}", "fastqc.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc \
--outdir {output.outdir} \
{input.reads}) &> {log}"
rule htseq_qa:
''' Assess the technical quality of a run. '''
input:
reads = os.path.join(get_fq_1("{sample}"))
output:
qual_pdf = os.path.join(config["output_dir"], "single_end", "{sample}", "htseq_quality.pdf")
singularity:
"docker://zavolab/python_htseq:3.6.5_0.10.0"
log:
os.path.join(config["local_log"], "single_end", "{sample}", "htseq_qa.log")
shell:
"(htseq-qa \
-t fastq \
-o {output.qual_pdf} \
{input.reads} ) &> {log}"
rule remove_adapters_cutadapt:
''' Remove adapters '''
input:
reads = os.path.join(get_fq_1("{sample}"))
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_adapters.fastq.gz")
params:
adapters_3 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_3p']
adapters_5 = lambda wildcards:
sample_table.loc[wildcards.sample, 'fq1_5p']
singularity:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join(config["local_log"], "single_end", "{sample}", "remove_adapters_cutadapt.log")
shell:
"cutadapt \
-e 0.1 \
-O 1 \
-j {threads} \
-m 10 \
-n 3 \
-a {params.adapters_3} \
-g {params.adapters_5} \
-o {output.reads} \
{input.reads}) &> {log}"
rule remove_polya_cutadapt:
''' Remove ployA tails'''
input:
reads = os.path.join(get_fq_1("{sample}"))
output:
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
params:
polya_3 =
adapters_3 = lambda wildcards:
sample_table.loc[wildcards.sample, "fq1_polya"]
singularity:
"docker://zavolab/cutadapt:1.16"
threads: 8
log:
os.path.join(config["local_log"], "single_end", "{sample}", "remove_polya_cutadapt.log")
shell:
"(cutadapt \
--match-read-wildcards \
-j {threads} \
-n 2 \
-e 0.1 \
-O 1 \
-q 6 \
-m 10 \
-a {params.polya_3} \
-o {output.reads} \
{input.reads}) &> {log}"
rule create_index_star:
''' Create index using STAR. '''
input:
genome = sample_table.loc["{sample}", "genome"],
gtf = sample_table.loc["{sample}", "gtf"]
output:
chromosome_info = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index","chrNameLength.txt"),
chromosome_names = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index", "chrName.txt")
params:
output_dir = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index/STAR_"),
sjdbOverhang = lambda wildcards:
sample_table.loc[wildcards.sample, "index_size"]
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_star.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
STAR \
--runMode genomeGenerate \
--sjdbOverhang {params.sjdbOverhang} \
--genomeDir {params.output_dir} \
--genomeFastaFiles {input.genome} \
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) &> {log}"
rule map_genome_star:
''' Map to genome using STAR. '''
input:
index = os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc["{sample}", "index_size"],
"STAR_index","chrNameLength.txt"),
reads = os.path.join(config["output_dir"], "single_end", "{sample}", "{sample}.remove_polya.fastq.gz")
output:
bam = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam"),
logfile = os.path.join(config["output_dir"], "single_end",
"{sample}",
"map_genome",
"{sample}_Log.final.out")
params:
sample_id = "{sample}"
index = lambda wildcards:
os.path.join(
config["star_indexes"],
sample_table.loc["{sample}", "organism"],
sample_table.loc[wildcards.sample, "index_size"],
"STAR_index"),
outFileNamePrefix = lambda wildcards:
os.path.join(
config["output_dir"],
"single_end",
"{sample}", "map_genome", "{sample}_"),
multimappers = lambda wildcards:
sample_table.loc[wildcards.sample, "multimappers"],
soft_clip = lambda wildcards:
sample_table.loc[wildcards.sample, "soft_clip"],
pass_mode = lambda wildcards:
sample_table.loc[wildcards.sample, "pass_mode"],
singularity:
"docker://zavolab/star:2.6.0a"
threads: 12
log:
os.path.join(config["local_log"], "single_end", "{sample}", "map_genome_star.log")
shell:
"(STAR \
--runMode alignReads \
-- twopassMode {params.pass_mode} \
--runThreadN {threads} \
--genomeDir {params.index} \
--readFilesIn {input.reads} \
--readFilesCommand zcat \
--outSAMunmapped None \
--outFilterMultimapNmax {params.multimappers} \
--outFilterMultimapScoreRange 1 \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outFilterMismatchNoverLmax 0.04 \
--outFilterScoreMinOverLread 0.3 \
--outFilterMatchNminOverLread 0.3 \
--outFilterType BySJout \
--outReadsUnmapped None \
--outSAMattrRGline ID:rcrunch SM:{params.sample_id} \
--alignEndsType {params.soft_clip}} > {output.bam};) &> {log}"
rule index_genomic_alignment_samtools:
'''Index genome bamfile using samtools.'''
input:
bam = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam")
output:
bai = os.path.join(config["output_dir"],
"single_end",
"{sample}",
"map_genome",
"{sample}_Aligned.sortedByCoord.out.bam.bai")
singularity:
"docker://zavolab/samtools:1.8"
threads: 1
log:
os.path.join(config["local_log"], "single_end", "{sample}", "index_genomic_alignment_samtools.log")
shell:
"(samtools index {input.bam} {output.bai};) &> {log}"
rule create_index_salmon:
''' Create index for Salmon quantification. '''
input:
transcriptome = sample_table.loc[wildcards.sample, 'tr_fasta_filtered']
output:
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx")
params:
kmerLen = lambda wildcards:
sample_table.loc[wildcards.sample, 'kmer']
singularity:
"docker://zavolab/salmon:0.11.0"
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_salmon.log")
threads: 8
shell:
"(salmon index \
--t {input.transcriptome} \
--i {output.index} \
--k {params.kmerLen} \
--threads {threads}) &> {log}"
rule quantification_salmon:
''' Quantification at transcript and gene level using Salmon. '''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya.fastq.gz"),
index = os.path.join(
config["salmon_indexes"],
sample_table["{sample}", 'organism'],
"salmon.idx"),
gtf = sample_table.loc["{sample}", "gtf_filtered"]
output:
gn_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.genes.sf"),
tr_estimates = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant",
"quant.sf")
params:
output_dir = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"salmon_quant"),
libType = lambda wildcards:
sample_table.loc[wildcards.sample, "libtype"]
log:
os.path.join(config["local_log"], "single_end", "{sample}", "quantification_salmon.log")
threads: 12
conda:
"envs/salmon.yaml"
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
-o {params.output_dir}) &> {log}"
rule create_index_kallisto:
''' Create index for running Kallisto. '''
input:
transcriptome = sample_table.loc["{sample}", 'tr_fasta_filtered']
output:
index = os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"],
"kallisto.idx")
params:
output_dir = lambda wildcards:
os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"]),
log:
os.path.join(config["local_log"], "single_end", "{sample}", "create_index_kallisto.log")
singularity:
"docker://zavolab/kallisto:0.9"
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) &> {log}"
rule genome_quantification_kallisto:
''' Quantification at transcript and gene level using Kallisto. '''
input:
reads = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.remove_polya.fastq.gz"),
index = os.path.join(
config["kallisto_indexes"],
sample_table.loc["{sample}", "organism"],
"kallisto.idx")
output:
pseudoalignment = os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"{sample}.kallisto.pseudo.sam")
params:
output_dir = lambda wildcards:
os.path.join(
config["output_dir"],
"single_end",
"{sample}",
"quant_kallisto"),
fraglen = lambda wildcards: sample_table.loc[wildcards.sample, 'mean'],
fragsd = lambda wildcards: sample_table.loc[wildcards.sample, 'sd'],
directionality = lambda wildcards: sample_table.loc[wildcards.sample, 'kallisto_directionality']
threads: 8
log:
os.path.join(config["local_log"],"kallisto_align_{sample}.log")
singularity:
"docker://zavolab/kallisto:0.9"
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--single \
-l {params.fraglen} \
-s {params.fragsd} \
--pseudobam \
--{params.directionality}-stranded \
{input.reads} > {output.pseudoalignment}) &> {log}"
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment