Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision
  • dev
  • v0.1.0
  • v0.2.0
3 results

Target

Select target project
  • zavolan_group/pipelines/zarp
1 result
Select Git revision
  • dev
  • v0.1.0
  • v0.2.0
3 results
Show changes
Commits on Source (6)
......@@ -2,7 +2,11 @@
import os
import pandas as pd
import shutil
import yaml
from shlex import quote
from typing import Tuple
## Preparations
# Get sample table
samples_table = pd.read_csv(
config['samples'],
......@@ -13,8 +17,33 @@ samples_table = pd.read_csv(
sep="\t",
)
# Parse YAML rule config file
if 'rule_config' in config and config['rule_config']:
try:
with open(config['rule_config']) as _file:
rule_config = yaml.safe_load(_file)
logger.info(f"Loaded rule_config from {config['rule_config']}.")
except FileNotFoundError:
logger.error(f"No rule config file found at {config['rule_config']}. Either provide file or remove rule_config parameter from config.yaml! ")
raise
else:
rule_config = {}
logger.warning(f"No rule config specified: using default values for all tools.")
# Create dir for cluster logs, if applicable
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
exist_ok=True)
## Function definitions
def get_sample(column_id, search_id=None, search_value=None):
""" Get relevant per sample information from samples table"""
if search_id:
if search_id == 'index':
return str(samples_table[column_id][samples_table.index == search_value][0])
......@@ -22,22 +51,59 @@ def get_sample(column_id, search_id=None, search_value=None):
return str(samples_table[column_id][samples_table[search_id] == search_value][0])
else:
return str(samples_table[column_id][0])
# Global config
localrules: start, finish, rename_star_rpm_for_alfa, prepare_multiqc_config
if cluster_config:
os.makedirs(
os.path.join(
os.getcwd(),
os.path.dirname(cluster_config['__default__']['out']),
),
exist_ok=True)
def parse_rule_config(rule_config: dict, current_rule: str, immutable: Tuple[str, ...] = ()):
"""Get rule specific parameters from rule_config file"""
# If rule config file not present, emtpy string will be returned
if not rule_config:
logger.info(f"No rule config specified: using default values for all tools.")
return ''
# Same if current rule not specified in rule config
if current_rule not in rule_config or not rule_config[current_rule]:
logger.info(f"No additional parameters for rule {current_rule} specified: using default settings.")
return ''
# Subset only section for current rule
rule_config = rule_config[current_rule]
# Build list of parameters and values
params_vals = []
for param, val in rule_config.items():
# Do not allow the user to change wiring-critical, fixed arguments, or arguments that are passed through samples table
if param in immutable:
raise ValueError(
f"The following parameter in rule {current_rule} is critical for the pipeline to "
f"function as expected and cannot be modified: {param}"
)
# Accept only strings; this prevents unintended results potentially
# arising from users entering reserved YAML keywords or nested
# structures (lists, dictionaries)
if isinstance(val, str):
params_vals.append(str(param))
# Do not include a value for flags (signified by empty strings)
if val:
params_vals.append(val)
else:
raise ValueError(
"Only string values allowed for tool parameters: Found type "
f"'{type(val).__name__}' for value of parameter '{param}'"
)
# Return quoted string
add_params = ' '.join(quote(item) for item in params_vals)
logger.info(f"User specified additional parameters for rule {current_rule}:\n {add_params}")
return add_params
# Global config
localrules: start, finish, rename_star_rpm_for_alfa, prepare_multiqc_config
# Include subworkflows
include: os.path.join("workflow", "rules", "paired_end.snakefile.smk")
include: os.path.join("workflow", "rules", "single_end.snakefile.smk")
rule finish:
"""
Rule for collecting outputs
......@@ -80,6 +146,8 @@ rule finish:
"summary_kallisto",
"genes_tpm.tsv")
current_rule = 'start'
rule start:
'''
Get samples
......@@ -104,21 +172,22 @@ rule start:
config["log_dir"],
"samples",
"{sample}",
"start_{sample}.{mate}.stderr.log"),
current_rule + "_{sample}.{mate}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"start_{sample}.{mate}.stdout.log")
current_rule + "_{sample}.{mate}.stdout.log")
singularity:
"docker://bash:5.0.16"
"docker://ubuntu:focal-20210416"
shell:
"(cat {input.reads} > {output.reads}) \
1> {log.stdout} 2> {log.stderr} "
current_rule = 'fastqc'
rule fastqc:
'''
A quality control tool for high throughput sequence data
......@@ -139,30 +208,43 @@ rule fastqc:
"{sample}",
"fastqc",
"{mate}"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--outdir',
)
)
threads: 2
singularity:
"docker://zavolab/fastqc:0.11.9-slim"
"docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1"
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"fastqc_{mate}.stderr.log"),
current_rule + "_{mate}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"fastqc_{mate}.stdout.log")
current_rule + "_{mate}.stdout.log")
shell:
"(mkdir -p {output.outdir}; \
fastqc --outdir {output.outdir} --threads {threads} {input.reads}) \
fastqc --outdir {output.outdir} \
--threads {threads} \
{params.additional_params} \
{input.reads}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'create_index_star'
rule create_index_star:
"""
Create index for STAR alignments
......@@ -205,20 +287,32 @@ rule create_index_star:
"{organism}",
"{index_size}",
"STAR_index/STAR_"),
sjdbOverhang = "{index_size}"
sjdbOverhang = "{index_size}",
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--runMode',
'--sjdbOverhang',
'--genomeDir',
'--genomeFastaFiles',
'--outFileNamePrefix',
'--sjdbGTFfile',
)
)
singularity:
"docker://zavolab/star:2.7.3a-slim"
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
threads: 12
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stderr.log"),
current_rule + "_{organism}_{index_size}.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{index_size}_create_index_star.stdout.log")
current_rule + "_{organism}_{index_size}.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
......@@ -231,9 +325,11 @@ rule create_index_star:
--runThreadN {threads} \
--outFileNamePrefix {params.outFileNamePrefix} \
--sjdbGTFfile {input.gtf}) \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'extract_transcriptome'
rule extract_transcriptome:
"""
Create transcriptome from genome and gene annotations
......@@ -256,24 +352,37 @@ rule extract_transcriptome:
"{organism}",
"transcriptome.fa"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-w',
'-g',
)
)
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log"),
current_rule + "_{organism}.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_transcriptome.log")
current_rule + "_{organism}.log")
singularity:
"docker://zavolab/gffread:0.11.7-slim"
"docker://quay.io/biocontainers/gffread:0.12.1--h2e03b76_1"
shell:
"(gffread \
-w {output.transcriptome} \
-g {input.genome} {input.gtf}) \
-g {input.genome} \
{params.additional_params} \
{input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'concatenate_transcriptome_and_genome'
rule concatenate_transcriptome_and_genome:
"""
Concatenate genome and transcriptome
......@@ -299,12 +408,12 @@ rule concatenate_transcriptome_and_genome:
"genome_transcriptome.fa"))
singularity:
"docker://bash:5.0.16"
"docker://ubuntu:focal-20210416"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_concatenate_transcriptome_and_genome.stderr.log")
current_rule + "_{organism}.stderr.log")
shell:
"(cat {input.transcriptome} {input.genome} \
......@@ -312,6 +421,7 @@ rule concatenate_transcriptome_and_genome:
2> {log.stderr}"
current_rule = 'create_index_salmon'
rule create_index_salmon:
"""
Create index for Salmon quantification
......@@ -339,18 +449,28 @@ rule create_index_salmon:
"salmon.idx"))
params:
kmerLen = "{kmer}"
kmerLen = "{kmer}",
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--transcripts',
'--decoys',
'--index',
'--kmerLen',
)
)
singularity:
"docker://zavolab/salmon:1.1.0-slim"
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stderr.log"),
current_rule + "_{organism}_{kmer}.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
current_rule + "_{organism}_{kmer}.stdout.log")
threads: 8
......@@ -361,9 +481,11 @@ rule create_index_salmon:
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'create_index_kallisto'
rule create_index_kallisto:
"""
Create index for Kallisto quantification
......@@ -384,26 +506,37 @@ rule create_index_kallisto:
params:
output_dir = os.path.join(
config['kallisto_indexes'],
"{organism}")
"{organism}"),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-i',
)
)
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
"docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stderr.log"),
current_rule + "_{organism}.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_create_index_kallisto.stdout.log")
current_rule + "_{organism}.stdout.log")
shell:
"(mkdir -p {params.output_dir}; \
chmod -R 777 {params.output_dir}; \
kallisto index -i {output.index} {input.transcriptome}) \
kallisto index \
{params.additional_params} \
-i {output.index} \
{input.transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'extract_transcripts_as_bed12'
rule extract_transcripts_as_bed12:
"""
Convert transcripts to BED12 format
......@@ -417,6 +550,16 @@ rule extract_transcripts_as_bed12:
config['output_dir'],
"full_transcripts_protein_coding.bed"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--gtf',
'--bed12',
)
)
singularity:
"docker://zavolab/zgtf:0.1"
......@@ -425,18 +568,20 @@ rule extract_transcripts_as_bed12:
log:
stdout = os.path.join(
config['log_dir'],
"extract_transcripts_as_bed12.stdout.log"),
current_rule + ".stdout.log"),
stderr = os.path.join(
config['log_dir'],
"extract_transcripts_as_bed12.stderr.log")
current_rule + ".stderr.log")
shell:
"(gtf2bed12 \
--gtf {input.gtf} \
--bed12 {output.bed12}); \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'index_genomic_alignment_samtools'
rule index_genomic_alignment_samtools:
'''
Index genome bamfile using samtools
......@@ -456,8 +601,15 @@ rule index_genomic_alignment_samtools:
"map_genome",
"{sample}.{seqmode}.Aligned.sortedByCoord.out.bam.bai")
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=()
)
singularity:
"docker://zavolab/samtools:1.10-slim"
"docker://quay.io/biocontainers/samtools:1.3.1--h1b8c3c0_8"
threads: 1
......@@ -466,18 +618,21 @@ rule index_genomic_alignment_samtools:
config["log_dir"],
"samples",
"{sample}",
"index_genomic_alignment_samtools.{seqmode}.stderr.log"),
current_rule + ".{seqmode}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"index_genomic_alignment_samtools.{seqmode}.stdout.log")
current_rule + ".{seqmode}.stdout.log")
shell:
"(samtools index {input.bam} {output.bai};) \
"(samtools index \
{params.additional_params} \
{input.bam} {output.bai};) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'calculate_TIN_scores'
rule calculate_TIN_scores:
"""
Calculate transcript integrity (TIN) score
......@@ -522,14 +677,23 @@ rule calculate_TIN_scores:
"TIN_score.tsv"))
params:
sample = "{sample}"
sample = "{sample}",
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-i',
'-r',
'--names',
)
)
log:
stderr = os.path.join(
config['log_dir'],
"samples",
"{sample}",
"calculate_TIN_scores.log")
current_rule + ".log")
threads: 8
......@@ -540,11 +704,12 @@ rule calculate_TIN_scores:
"(tin_score_calculation.py \
-i {input.bam} \
-r {input.transcripts_bed12} \
-c 0 \
--names {params.sample} \
{params.additional_params} \
> {output.TIN_score};) 2> {log.stderr}"
current_rule = 'salmon_quantmerge_genes'
rule salmon_quantmerge_genes:
'''
Merge gene quantifications into a single file
......@@ -589,20 +754,32 @@ rule salmon_quantmerge_genes:
sample_name_list = expand(
"{sample}",
sample=pd.unique(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
salmon_merge_on = "{salmon_merge_on}",
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--quants',
'--genes',
'--transcripts',
'--names',
'--column',
'--output',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stderr.log"),
current_rule + "_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_genes_{salmon_merge_on}.stdout.log")
current_rule + "_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
shell:
"(salmon quantmerge \
......@@ -611,9 +788,11 @@ rule salmon_quantmerge_genes:
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out};) \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'salmon_quantmerge_transcripts'
rule salmon_quantmerge_transcripts:
'''
Merge transcript quantifications into a single file
......@@ -655,24 +834,35 @@ rule salmon_quantmerge_transcripts:
search_id='index',
search_value=i)
for i in pd.unique(samples_table.index.values)]),
sample_name_list = expand(
"{sample}",
sample=pd.unique(samples_table.index.values)),
salmon_merge_on = "{salmon_merge_on}"
salmon_merge_on = "{salmon_merge_on}",
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--quants',
'--genes',
'--transcripts',
'--names',
'--column',
'--output',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stderr.log"),
current_rule + "_{salmon_merge_on}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"salmon_quantmerge_transcripts_{salmon_merge_on}.stdout.log")
current_rule + "_{salmon_merge_on}.stdout.log")
threads: 1
singularity:
"docker://zavolab/salmon:1.1.0-slim"
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
shell:
"(salmon quantmerge \
......@@ -680,9 +870,11 @@ rule salmon_quantmerge_transcripts:
--names {params.sample_name_list} \
--column {params.salmon_merge_on} \
--output {output.salmon_out}) \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule= 'kallisto_merge_genes'
rule kallisto_merge_genes:
'''
Merge gene quantifications into single file
......@@ -729,14 +921,25 @@ rule kallisto_merge_genes:
sample_name_list = ','.join(expand(
"{sample}",
sample=pd.unique(samples_table.index.values))),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--input',
'--names',
'--txOut',
'--anno',
'--output',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"kallisto_merge_genes.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"kallisto_merge_genes.stdout.log")
current_rule + ".stdout.log")
threads: 1
......@@ -750,10 +953,11 @@ rule kallisto_merge_genes:
--txOut FALSE \
--anno {input.gtf} \
--output {params.dir_out} \
--verbose) \
{params.additional_params} ) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'kallisto_merge_transcripts'
rule kallisto_merge_transcripts:
'''
Merge transcript quantifications into a single files
......@@ -799,14 +1003,24 @@ rule kallisto_merge_transcripts:
sample_name_list = ','.join(expand(
"{sample}",
sample=pd.unique(samples_table.index.values))),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--input',
'--names',
'--txOut',
'--output',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"kallisto_merge_transcripts.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"kallisto_merge_transcripts.stdout.log")
current_rule + ".stdout.log")
threads: 1
......@@ -818,10 +1032,11 @@ rule kallisto_merge_transcripts:
--input {params.tables} \
--names {params.sample_name_list} \
--output {params.dir_out} \
--verbose) \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'pca_salmon'
rule pca_salmon:
input:
tpm = os.path.join(
......@@ -836,27 +1051,38 @@ rule pca_salmon:
"zpca",
"pca_salmon_{molecule}"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--tpm',
'--out',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"pca_salmon_{molecule}.stderr.log"),
current_rule + "_{molecule}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"pca_salmon_{molecule}.stdout.log")
current_rule + "_{molecule}.stdout.log")
threads: 1
singularity:
"docker://zavolab/zpca:0.8"
"docker://zavolab/zpca:0.8.3-1"
shell:
"(zpca-tpm \
--tpm {input.tpm} \
--out {output.out} \
--verbose) \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'pca_kallisto'
rule pca_kallisto:
input:
tpm = os.path.join(
......@@ -871,27 +1097,38 @@ rule pca_kallisto:
"zpca",
"pca_kallisto_{molecule}"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--tpm',
'--out',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"pca_kallisto_{molecule}.stderr.log"),
current_rule + "_{molecule}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"pca_kallisto_{molecule}.stdout.log")
current_rule + "_{molecule}.stdout.log")
threads: 1
singularity:
"docker://zavolab/zpca:0.8"
"docker://zavolab/zpca:0.8.3-1"
shell:
"(zpca-tpm \
--tpm {input.tpm} \
--out {output.out} \
--verbose) \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'star_rpm'
rule star_rpm:
'''
Create stranded bedgraph coverage with STARs RPM normalisation
......@@ -958,22 +1195,32 @@ rule star_rpm:
prefix = lambda wildcards, output:
os.path.join(
os.path.dirname(output.str1),
str(wildcards.sample) + "_")
str(wildcards.sample) + "_"),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--runMode',
'--inputBAMfile',
'--outWigType',
'--outFileNamePrefix',
)
)
singularity:
"docker://zavolab/star:2.7.3a-slim"
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"star_rpm.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"star_rpm.stdout.log")
current_rule + ".stdout.log")
threads: 4
......@@ -986,9 +1233,11 @@ rule star_rpm:
--inputBAMfile {input.bam} \
--outWigType bedGraph \
--outFileNamePrefix {params.prefix}) \
{params.additional_params} \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'rename_star_rpm_for_alfa'
rule rename_star_rpm_for_alfa:
input:
plus = lambda wildcards:
......@@ -1041,15 +1290,15 @@ rule rename_star_rpm_for_alfa:
config["log_dir"],
"samples",
"{sample}",
"rename_star_rpm_for_alfa__{unique}.stderr.log"),
current_rule + "_{unique}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"rename_star_rpm_for_alfa__{unique}.stdout.log")
current_rule + "_{unique}.stdout.log")
singularity:
"docker://bash:5.0.16"
"docker://ubuntu:focal-20210416"
shell:
"(cp {input.plus} {output.plus}; \
......@@ -1057,6 +1306,7 @@ rule rename_star_rpm_for_alfa:
1>{log.stdout} 2>{log.stderr}"
current_rule = 'generate_alfa_index'
rule generate_alfa_index:
''' Generate ALFA index files from sorted GTF file '''
input:
......@@ -1065,7 +1315,6 @@ rule generate_alfa_index:
'gtf',
search_id='organism',
search_value=wildcards.organism)),
chr_len = os.path.join(
config["star_indexes"],
"{organism}",
......@@ -1090,26 +1339,39 @@ rule generate_alfa_index:
params:
genome_index = "sorted_genes",
out_dir = lambda wildcards, output:
os.path.dirname(output.index_stranded)
os.path.dirname(output.index_stranded),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-g',
'--chr_len',
'-o',
)
)
threads: 4
singularity:
"docker://zavolab/alfa:1.1.1-slim"
"docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
log:
os.path.join(
config["log_dir"],
"{organism}_{index_size}_generate_alfa_index.log")
current_rule + "_{organism}_{index_size}.log")
shell:
"(alfa -a {input.gtf} \
-g {params.genome_index} \
--chr_len {input.chr_len} \
-p {threads} \
-o {params.out_dir}) &> {log}"
-o {params.out_dir} \
{params.additional_params}) \
&> {log}"
current_rule = 'alfa_qc'
rule alfa_qc:
'''
Run ALFA from stranded bedgraph files
......@@ -1169,40 +1431,52 @@ rule alfa_qc:
params:
out_dir = lambda wildcards, output:
os.path.dirname(output.biotypes),
genome_index = lambda wildcards, input:
os.path.abspath(
os.path.join(
os.path.dirname(input.gtf),
"sorted_genes")),
plus = lambda wildcards, input:
os.path.basename(input.plus),
minus = lambda wildcards, input:
os.path.basename(input.minus),
name = "{sample}",
alfa_orientation = lambda wildcards:
get_sample(
'alfa_directionality',
search_id='index',
search_value=wildcards.sample),
genome_index = lambda wildcards, input:
os.path.abspath(
os.path.join(
os.path.dirname(input.gtf),
"sorted_genes")),
name = "{sample}"
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-g',
'--bedgraph',
'-s',
)
)
singularity:
"docker://zavolab/alfa:1.1.1-slim"
"docker://quay.io/biocontainers/alfa:1.1.1--pyh5e36f6f_0"
log:
os.path.join(
config["log_dir"],
"samples",
"{sample}",
"alfa_qc.{unique}.log")
current_rule + ".{unique}.log")
shell:
"(cd {params.out_dir}; \
alfa \
-g {params.genome_index} \
--bedgraph {params.plus} {params.minus} {params.name} \
-s {params.alfa_orientation}) &> {log}"
-s {params.alfa_orientation} \
{params.additional_params}) \
&> {log}"
current_rule = 'prepare_multiqc_config'
rule prepare_multiqc_config:
'''
Prepare config for the MultiQC
......@@ -1222,25 +1496,36 @@ rule prepare_multiqc_config:
params:
logo_path = config['report_logo'],
multiqc_intro_text = config['report_description'],
url = config['report_url']
url = config['report_url'],
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--config',
'--intro-text',
'--custom-logo',
'--url',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"prepare_multiqc_config.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"prepare_multiqc_config.stdout.log")
current_rule + ".stdout.log")
shell:
"(python {input.script} \
--config {output.multiqc_config} \
--intro-text '{params.multiqc_intro_text}' \
--custom-logo {params.logo_path} \
--url '{params.url}') \
--url '{params.url}' \
{params.additional_params}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'multiqc_report'
rule multiqc_report:
'''
Create report with MultiQC
......@@ -1326,28 +1611,37 @@ rule multiqc_report:
params:
results_dir = os.path.join(
config["output_dir"]),
log_dir = config["log_dir"]
log_dir = config["log_dir"],
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--outdir',
'--config',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"multiqc_report.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"multiqc_report.stdout.log")
current_rule + ".stdout.log")
singularity:
"docker://zavolab/multiqc-plugins:1.0.0"
"docker://zavolab/multiqc-plugins:1.2.1"
shell:
"(multiqc \
--outdir {output.multiqc_report} \
--config {input.multiqc_config} \
{params.additional_params} \
{params.results_dir} \
{params.log_dir};) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'sort_bed_4_big'
rule sort_bed_4_big:
'''
sort bedGraphs in order to work with bedGraphtobigWig
......@@ -1370,21 +1664,33 @@ rule sort_bed_4_big:
"{unique}",
"{sample}_{unique}_{strand}.sorted.bg"))
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-i',
)
)
singularity:
"docker://cjh4zavolab/bedtools:2.27"
"docker://quay.io/biocontainers/bedtools:2.27.1--h9a82719_5"
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"sort_bg_{unique}_{strand}.stderr.log")
current_rule + "_{unique}_{strand}.stderr.log")
shell:
"(sortBed \
-i {input.bg} \
> {output.sorted_bg};) 2> {log.stderr}"
-i {input.bg} \
{params.additional_params} \
> {output.sorted_bg};) 2> {log.stderr}"
current_rule = 'prepare_bigWig'
rule prepare_bigWig:
'''
bedGraphtobigWig, for viewing in genome browsers
......@@ -1420,25 +1726,33 @@ rule prepare_bigWig:
"{unique}",
"{sample}_{unique}_{strand}.bw")
params:
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=()
)
singularity:
"docker://zavolab/bedgraphtobigwig:4-slim"
"docker://quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h0b8a92a_2"
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"bigwig_{unique}_{strand}.stderr.log"),
current_rule + "_{unique}_{strand}.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"bigwig_{unique}_{strand}.stdout.log")
current_rule + "_{unique}_{strand}.stdout.log")
shell:
"(bedGraphToBigWig \
{input.sorted_bg} \
{input.chr_sizes} \
{output.bigWig};) \
1> {log.stdout} 2> {log.stderr}"
{params.additional_params} \
{input.sorted_bg} \
{input.chr_sizes} \
{output.bigWig};) \
1> {log.stdout} 2> {log.stderr}"
......@@ -104,7 +104,7 @@ sample | Descriptive sample name | `str`
seqmode | Required for various steps of the workflow. One of `pe` (for paired-end libraries) or `se` (for single-end libraries). | `str`
fq1 | Path of library file in `.fastq.gz` format (or mate 1 read file for paired-end libraries) | `str`
index_size | Required for [STAR](#third-party-software-used). Ideally the maximum read length minus 1 (`max(ReadLength)-1`). Values lower than maximum read length may result in lower mapping accuracy, while higher values may result in longer processing times. | `int`
kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values of poor mapping is observed. | `int`
kmer | Required for [Salmon](#third-party-software-used). Default value of 31 usually works fine for reads of 75 bp or longer. Consider using lower values if poor mapping is observed. | `int`
fq2 | Path of mate 2 read file in `.fastq.gz` format. Value ignored for for single-end libraries. | `str`
fq1_3p | Required for [Cutadapt](#third-party-software-used). 3' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str`
fq1_5p | Required for [Cutadapt](#third-party-software-used). 5' adapter of mate 1. Use value such as `XXXXXXXXXXXXXXX` if no adapter present or if no trimming is desired. | `str`
......@@ -156,9 +156,8 @@ Create index for [**STAR**](#third-party-software-used) short read aligner.
- Genome sequence file (`.fasta`)
- Gene annotation file (`.gtf`)
- **Parameters**
- `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy,
higher values may increase STAR runtime; specify in sample table column
`index_size`
- **samples.tsv**
- `--sjdbOverhang`: maximum read length - 1; lower values may reduce accuracy, higher values may increase STAR runtime; specify in sample table column `index_size`
- **Output**
- STAR index; used in [**map_genome_star**](#map_genome_star)
- Index includes files:
......@@ -215,7 +214,8 @@ Create index for [**Salmon**](#third-party-software-used) quantification.
- Chromosome name list `chrName.txt`; from
[**create_index_star**](#create_index_star)
- **Parameters**
- `--kmerLen`: k-mer length; specify in sample table column `kmer`
- **samples.tsv**
- `--kmerLen`: k-mer length; specify in sample table column `kmer`
- **Output**
- Salmon index; used in [**quantification_salmon**](#quantification_salmon)
......@@ -354,11 +354,13 @@ Calculates the Transcript Integrity Number (TIN) for each transcript with
[**index_genomic_alignment_samtools**](#index_genomic_alignment_samtools)
- Transcript annotations file (12-column `.bed`); from
[**extract_transcripts_as_bed12**](#extract_transcripts_as_bed12)
- **Parameters**
- **rule_config.yaml**
- `-c 0`: minimum number of read mapped to a transcript (default 10)
- **Output**
- TIN score table (custom `tsv`); used in
[**merge_TIN_scores**](#merge_tin_scores)
- **Non-configurable & non-default**
- `-c 0`: minimum number of read mapped to a transcript
#### `salmon_quantmerge_genes`
......@@ -470,8 +472,8 @@ Annotate alignments with [**ALFA**](#third-party-software-used).
[**rename_star_rpm_for_alfa**](#rename_star_rpm_for_alfa)
- ALFA index, stranded; from [**generate_alfa_index**](#generate_alfa_index)
- **Parameters**
- `-s`: library orientation; specified by user in sample table column
`kallisto_directionality`
- **samples.tsv**
- `-s`: library orientation; specified by user in sample table column `kallisto_directionality`
- **Output**
- Figures for biotypes and feature categories (`.pdf`)
- Feature counts table (custom `.tsv`); used in
......@@ -486,6 +488,11 @@ Prepare config file for [**MultiQC**](#third-party-software-used).
- **Input**
- Directories created during
[**prepare_files_for_report**](#prepare_files_for_report)
- **Parameters**
All parameters for this rule have to be specified in main `config.yaml`
- `--intro-text`
- `--custom-logo`
- `--url`
- **Output**
- Config file (`.yaml`); used in [**multiqc_report**](#multiqc_report)
......@@ -529,6 +536,8 @@ Target rule as required by [Snakemake][docs-snakemake-target-rule].
- Coverage files, one per strand and sample (`.bw`); used in
[**prepare_bigWig**](#prepare_bigwig)
### Sequencing mode-specific
> Steps described here have two variants, one with the specified names for
......@@ -544,16 +553,19 @@ Remove adapter sequences from reads with
- **Input**
- Reads file (`.fastq.gz`); from [**start**](#start)
- **Parameters**
- Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`,
- **samples.tsv**
- Adapters to be removed; specify in sample table columns `fq1_3p`, `fq1_5p`,
`fq2_3p`, `fq2_5p`
- **rule_config.yaml:**
- `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
- `-n 2`: search for all the given adapter sequences repeatedly, either until
no adapter match was found or until 2 rounds have been performed. (default 1)
- **Output**
- Reads file (`.fastq.gz`); used in
[**remove_polya_cutadapt**](#remove_polya_cutadapt)
- **Non-configurable & non-default**
- `-j 8`: use 8 threads
- `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
- `-n 2`: search for all the given adapter sequences repeatedly, either until
no adapter match was found or until 2 rounds have been performed. (default 1)
#### `remove_polya_cutadapt`
......@@ -564,17 +576,18 @@ Remove poly(A) tails from reads with
- Reads file (`.fastq.gz`); from
[**remove_adapters_cutadapt**](#remove_adapters_cutadapt)
- **Parameters**
- Poly(A) stretches to be removed; specify in sample table columns `fq1_polya`
and `fq2_polya`
- **samples.tsv**
- Poly(A) stretches to be removed; specify in sample table columns `fq1_polya` and `fq2_polya`
- **rule_config.yaml**
- `-m 10`: Discard processed reads that are shorter than 10 (default 0, that might cause problems in downstream programs)
- `-O 1`: minimal overlap of 1 (default: 3)
- **Output**
- Reads file (`.fastq.gz`); used in
[**genome_quantification_kallisto**](#genome_quantification_kallisto),
[**map_genome_star**](#map_genome_star) and
[**quantification_salmon**](#quantification_salmon)
- **Non-configurable & non-default**
- `-j 8`: use 8 threads
- `-m 10`: Discard processed reads that are shorter than 10
- `-O 1`: minimal overlap of 1 (default: 3)
#### `map_genome_star`
......@@ -586,15 +599,13 @@ Align short reads to reference genome and/or transcriptome with
[**remove_polya_cutadapt**](#remove_polya_cutadapt)
- Index; from [**create_index_star**](#create_index_star)
- **Parameters**
- `--outFilterMultimapNmax`: maximum number of multiple alignments allowed;
if exceeded, read is considered unmapped; specify in sample table column
`multimappers`
- `--alignEndsType`: one of `Local` (standard local alignment with
soft-clipping allowed) or `EndToEnd` (force end-to-end read alignment, do
not soft-clip); specify in sample table column `soft_clip`
- `--twopassMode`: one of `None` (1-pass mapping) or `Basic` (basic 2-pass
mapping, with all 1st-pass junctions inserted into the genome indices on
the fly); specify in sample table column `pass_mode`
- **samples.tsv**
- `--outFilterMultimapNmax`: maximum number of multiple alignments allowed; if exceeded, read is considered unmapped; specify in sample table column `multimappers`
- `--alignEndsType`: one of `Local` (standard local alignment with soft-clipping allowed) or `EndToEnd` (force end-to-end read alignment, do not soft-clip); specify in sample table column `soft_clip`
- `--twopassMode`: one of `None` (1-pass mapping) or `Basic` (basic 2-pass mapping, with all 1st-pass junctions inserted into the genome indices on the fly); specify in sample table column `pass_mode`
- **rule_config.yaml**
- `--outFilterMultimapScoreRange=0`: the score range below the maximum score for multimapping alignments (default 1)
- `--outFilterType=BySJout`: reduces the number of ”spurious” junctions
- **Output**
- Aligned reads file (`.bam`); used in
[**calculate_TIN_scores**](#calculate_TIN_scores),
......@@ -602,12 +613,9 @@ Align short reads to reference genome and/or transcriptome with
and [**star_rpm**](#star_rpm)
- STAR log file
- **Non-configurable & non-default**
- `--outFilterMultimapScoreRange=0`: the score range below the maximum score
for multimapping alignments (default 1)
- `--outSAMattributes=All`: NH HI AS nM NM MD jM jI MC ch
- `--outStd=BAM_SortedByCoordinate`: which output will be directed to `STDOUT` (default 'Log')
- `--outSAMtype=BAM SortedByCoordinate`: type of SAM/BAM output (default SAM)
- `--outFilterType=BySJout`: reduces the number of ”spurious” junctions
- `--outSAMattrRGline`: ID:rnaseq_pipeline SM: *sampleID*
#### `quantification_salmon`
......@@ -621,12 +629,15 @@ Estimate transcript- and gene-level expression with
- Filtered annotation file (`.gtf`)
- Index; from [**create_index_salmon**](#create_index_salmon)
- **Parameters**
- `libType`: see [Salmon manual][docs-salmon] for allowed values; specify in
sample table column `libtype`
- `--fldMean`: mean of distribution of fragment lengths; specify in sample
table column `mean` **(single-end only)**
- `--fldSD`: standard deviation of distribution of fragment lengths; specify
in sample table column `sd` **(single-end only)**
- **samples.tsv**
- `libType`: see [Salmon manual][docs-salmon] for allowed values; specify in sample table column `libtype`
- `--fldMean`: mean of distribution of fragment lengths; specify in sample table column `mean` **(single-end only)**
- `--fldSD`: standard deviation of distribution of fragment lengths; specify in sample table column `sd` **(single-end only)**
- **rule_config.yaml**
- `--seqBias`: [correct for sequence specific
biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias)
- `--validateMappings`: enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings).
- `--writeUnmappedNames`: write out the names of reads (or mates in paired-end reads) that do not map to the transcriptome. For paired-end this gives flags that indicate how a read failed to map **(paired-end only)**
- **Output**
- Gene expression table (`quant.sf`); used in
[**salmon_quantmerge_genes**](#salmon_quantmerge_genes)
......@@ -634,14 +645,7 @@ Estimate transcript- and gene-level expression with
[**salmon_quantmerge_transcripts**](#salmon_quantmerge_transcripts)
- `meta_info.json`
- `flenDist.txt`
- **Non-configurable & non-default**
- `--seqBias`: [correct for sequence specific
biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias)
- `--validateMappings`: enables selective alignment of the sequencing reads
when mapping them to the transcriptome; this can improve both the
sensitivity and specificity of mapping and, as a result, can [improve
quantification
accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings).
#### `genome_quantification_kallisto`
......@@ -653,16 +657,16 @@ Generate pseudoalignments of reads to transcripts with
[**remove_polya_cutadapt**](#remove_polya_cutadapt)
- Index; from [**create_index_kallisto**](#create_index_kallisto)
- **Parameters**
- `directionality`; specify in sample table column `kallisto_directionality`
- `-l`: mean of distribution of fragment lengths; specify in sample table
column `mean` **(single-end only)**
- `-s`: standard deviation of distribution of fragment lengths; specify in
sample table column `sd` **(single-end only)**
- **samples.tsv**
- `directionality`; specify in sample table column `kallisto_directionality`
- `-l`: mean of distribution of fragment lengths; specify in sample table column `mean` **(single-end only)**
- `-s`: standard deviation of distribution of fragment lengths; specify in sample table column `sd` **(single-end only)**
- **Output**
- Pseudoalignments file (`.sam`) and
- abundance (`.h5`)
used in [**kallisto_merge_genes**](#kallisto_merge_genes)
- **Non-configurable & non-default**
- `--single`: Quantify single-end reads **(single-end only)**
- `--pseudobam`: Save pseudoalignments to transcriptome to BAM file
[code-alfa]: <https://github.com/biocompibens/ALFA>
......
---
samples: "../input_files/samples.multiple_lanes.tsv"
rule_config: "../input_files/rule_config.yaml"
output_dir: "results"
log_dir: "logs"
kallisto_indexes: "results/kallisto_indexes"
......@@ -9,4 +10,4 @@
report_description: "No description provided by user"
report_logo: "../../images/logo.128px.png"
report_url: "https://zavolan.biozentrum.unibas.ch/"
...
\ No newline at end of file
...
---
samples: "../input_files/samples.tsv"
rule_config: "../input_files/rule_config.yaml"
output_dir: "results"
log_dir: "logs"
kallisto_indexes: "results/kallisto_indexes"
......@@ -9,4 +10,4 @@
report_description: "No description provided by user"
report_logo: "../../images/logo.128px.png"
report_url: "https://zavolan.biozentrum.unibas.ch/"
...
\ No newline at end of file
...
#############################################################################
#
# __________________________________________________________________
# | WARNING: ONLY CHANGE THIS FILE IF YOU KNOW WHAT YOU'RE DOING!!! |
# | ZARP DOES NOT GUARANTEE SENSIBLE RESULTS IF PARAMETERS |
# | ARE CHANGED HERE. |
# |__________________________________________________________________|
#
# RULE CONFIGURATION
#
# For RUN SPECIFIC PARAMETERS (sample specific parameters have to be
# defined in the samples table!)
#
# Specify path to this file in main config.yaml under key 'rule_config'
#
# One top-level keyword per RULE (not per tool, as one tool might be used
# with different settings by more than one rule)
#
# Parameters have to be specified exactly like they have to appear on the
# command line call (e.g. -n or --name)
#
# All values need to be QUOTED STRINGS; to specify flags (i.e., parameters
# without values), specify an empty string as value.
#
# Note: number of threads has to be set in the respective Snakefile
#
#############################################################################
# Specify parameters for individual rules:
################################################
# MAIN SNAKEFILE / SEQUENCING-MODE INDEPENDENT #
################################################
#start: No parameters to change here
fastqc:
create_index_star:
extract_transcriptome:
#concatenate_transcriptome_and_genome: No parameters to change here
create_index_salmon:
create_index_kallisto:
extract_transcripts_as_bed12:
index_genomic_alignment_samtools:
calculate_TIN_scores:
# Minimum number of reads mapped to a transcript (default 10, ZARP recommends 0)
-c: '0'
salmon_quantmerge_genes:
salmon_quantmerge_transcripts:
kallisto_merge_genes:
--verbose: ''
kallisto_merge_transcripts:
--verbose: ''
pca_salmon:
--verbose: ''
pca_kallisto:
--verbose: ''
star_rpm:
# rename_star_rpm_for_alfa: No parameters to change here
generate_alfa_index:
alfa_qc:
prepare_multiqc_config:
multiqc_report:
sort_bed_4_big:
prepare_bigWig:
##########################################
# SEQUENCING-MODE SPECIFIC #
# single-end: rule name without prefix, #
# paired-end: rule name with prefix 'pe' #
##########################################
remove_adapters_cutadapt:
# search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
-n: '2'
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
-m: '10'
pe_remove_adapters_cutadapt:
# search for all the given adapter sequences repeatedly, either until no adapter match was found or until n rounds have been performed. (default 1, ZARP recommends 2)
-n: '2'
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
-m: '10'
remove_polya_cutadapt:
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
-m: '10'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
-O: '1'
pe_remove_polya_cutadapt:
# Discard processed reads that are shorter than 10 (default 0, ZARP strongly recommends m > 0, because empty reads might cause problems in downstream programs)
-m: '10'
# Minimal overlap of adapter and read (default 3, ZARP recommends 1 in order to remove all 3' As)
-O: '1'
map_genome_star:
# the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
--outFilterMultimapScoreRange: '0'
# keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
--outFilterType: 'BySJout'
pe_map_genome_star:
# the score range below the maximum score for multimapping alignments (default 1, ZARP recommends 0)
--outFilterMultimapScoreRange: '0'
# keep only those reads that contain junctions that passed filtering into SJ.out.tab. (default 'Normal', ZARP recommends 'BySJout', as this reduces the number of ”spurious” junctions )
--outFilterType: 'BySJout'
quantification_salmon:
# correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
--seqBias: ''
# enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
--validateMappings: ''
pe_quantification_salmon:
# correct for sequence specific biases](https://salmon.readthedocs.io/en/latest/salmon.html#seqbias
--seqBias: ''
# enables selective alignment of the sequencing reads when mapping them to the transcriptome; this can improve both the sensitivity and specificity of mapping and, as a result, can [improve quantification accuracy](https://salmon.readthedocs.io/en/latest/salmon.html#validatemappings)
--validateMappings: ''
# write out the names of reads (or mates in paired-end reads) that do not map to the transcriptome. For paired-end this gives flags that indicate how a read failed to map
--writeUnmappedNames: ''
genome_quantification_kallisto:
pe_genome_quantification_kallisto:
cbaebdb67aee4784b64aff7fec9fda42 results/kallisto_indexes/homo_sapiens/kallisto.idx
0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
204865f645102587c4953fccb256797c results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv
7f8679a6e6622e1b611642b5735f357c results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
4e10114bb8f9096d594776181424a302 results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
dee7cdc194d5d0617552b7a3b5ad8dfb results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt
8e2e96e2d6b7f29940ad5de40662b7cb results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt
d0826904b8afa45352906ad9591f2bfb results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt
......@@ -11,7 +11,7 @@ bad9d837f9a988694cc7080ee6d2997a results/star_indexes/homo_sapiens/75/STAR_inde
0c0b013fb8cbb8f3cb7a7bf92f3b1544 results/star_indexes/homo_sapiens/75/STAR_index/geneInfo.tab
00dda17b3c3983873d1474e9a758d6e6 results/star_indexes/homo_sapiens/75/STAR_index/Genome
c0d91c3af633d9439bfd0160d11efe4d results/star_indexes/homo_sapiens/75/STAR_index/SA
27884e419e42a7c8b3b2f49543de0260 results/star_indexes/homo_sapiens/75/STAR_index/SAindex
a8dfc49713c053a8a1a2cc2527f15186 results/star_indexes/homo_sapiens/75/STAR_index/SAindex
bae93882f9148a6c55816b733c32a3a2 results/star_indexes/homo_sapiens/75/STAR_index/sjdbInfo.txt
875030141343fca11f0b5aa1a37e1b66 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.fromGTF.out.tab
ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.out.tab
......@@ -26,39 +26,39 @@ e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synt
c39fc9108e6f6c0df45acc9391daad9c results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc_data.txt
82c37e4cb9c1e167383d589ccb5c80b4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc.fo
2029b1ecea0c5fb3c54238813cf02a26 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/summary.txt
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_quality.png
56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_sequence_content.png
e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/sequence_length_distribution.png
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_quality.png
73a907996c12a3c39bea535588e65658 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_sequence_content.png
3a5ef8cfdbab5c8987941fdd46145ca4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_quality.png
42462f1beeecb7820682284f7d5518cf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_sequence_content.png
dc6b69c56474f492bbc9824631ac84d3 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_quality.png
1c5ddee8a651c196e1b0ecdd8b406e71 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_sequence_content.png
eedd2f45539f47e163c2b390ba6fbcfc results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png
d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.kallisto.pseudo.sam
500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_adapters_mate1.fastq
500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_polya_mate1.fastq
fdb8c6ddd39b606414b2785d6ec2da8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc_data.txt
3cb70940acdcca512207bd8613085538 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc.fo
fc276a1711cc35f7a9d5328bdbbab810 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/summary.txt
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_quality.png
56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_sequence_content.png
e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_quality.png
42462f1beeecb7820682284f7d5518cf results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_sequence_content.png
dc6b69c56474f492bbc9824631ac84d3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png
d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.kallisto.pseudo.sam
3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa
6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
......
cbaebdb67aee4784b64aff7fec9fda42 results/kallisto_indexes/homo_sapiens/kallisto.idx
0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
204865f645102587c4953fccb256797c results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv
7f8679a6e6622e1b611642b5735f357c results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
4e10114bb8f9096d594776181424a302 results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
dee7cdc194d5d0617552b7a3b5ad8dfb results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt
8e2e96e2d6b7f29940ad5de40662b7cb results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt
d0826904b8afa45352906ad9591f2bfb results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt
......@@ -11,7 +11,7 @@ bad9d837f9a988694cc7080ee6d2997a results/star_indexes/homo_sapiens/75/STAR_inde
0c0b013fb8cbb8f3cb7a7bf92f3b1544 results/star_indexes/homo_sapiens/75/STAR_index/geneInfo.tab
00dda17b3c3983873d1474e9a758d6e6 results/star_indexes/homo_sapiens/75/STAR_index/Genome
c0d91c3af633d9439bfd0160d11efe4d results/star_indexes/homo_sapiens/75/STAR_index/SA
27884e419e42a7c8b3b2f49543de0260 results/star_indexes/homo_sapiens/75/STAR_index/SAindex
a8dfc49713c053a8a1a2cc2527f15186 results/star_indexes/homo_sapiens/75/STAR_index/SAindex
bae93882f9148a6c55816b733c32a3a2 results/star_indexes/homo_sapiens/75/STAR_index/sjdbInfo.txt
875030141343fca11f0b5aa1a37e1b66 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.fromGTF.out.tab
ea36f062eedc7f54ceffea2b635a25a8 results/star_indexes/homo_sapiens/75/STAR_index/sjdbList.out.tab
......@@ -26,39 +26,39 @@ e90e31db1ce51d930645eb74ff70d21b results/samples/synthetic_10_reads_paired_synt
c39fc9108e6f6c0df45acc9391daad9c results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc_data.txt
82c37e4cb9c1e167383d589ccb5c80b4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/fastqc.fo
2029b1ecea0c5fb3c54238813cf02a26 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/summary.txt
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_quality.png
56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_sequence_content.png
e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/sequence_length_distribution.png
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_quality.png
73a907996c12a3c39bea535588e65658 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_sequence_content.png
3a5ef8cfdbab5c8987941fdd46145ca4 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_quality.png
42462f1beeecb7820682284f7d5518cf results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_base_sequence_content.png
dc6b69c56474f492bbc9824631ac84d3 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq1/synthetic_10_reads_paired_synthetic_10_reads_paired.fq1_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_quality.png
1c5ddee8a651c196e1b0ecdd8b406e71 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_base_sequence_content.png
eedd2f45539f47e163c2b390ba6fbcfc results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/fastqc/fq2/synthetic_10_reads_paired_synthetic_10_reads_paired.fq2_fastqc/Images/sequence_length_distribution.png
d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_paired_synthetic_10_reads_paired/quant_kallisto/synthetic_10_reads_paired_synthetic_10_reads_paired.pe.kallisto.pseudo.sam
500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_adapters_mate1.fastq
500dd49da40b16799aba62aa5cf239ba results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.remove_polya_mate1.fastq
fdb8c6ddd39b606414b2785d6ec2da8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc_data.txt
3cb70940acdcca512207bd8613085538 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/fastqc.fo
fc276a1711cc35f7a9d5328bdbbab810 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/summary.txt
310130cbb8bbb6517f37ea0ff6586d43 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/adapter_content.png
42741852cc110a151580bb3bb5180fc0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/duplication_levels.png
8b34217d5fd931966d9007a658570e67 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_n_content.png
848396c145d2157f34bbf86757f51abe results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_quality.png
56bd6a5f95196121173609eb70618166 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_sequence_content.png
e4c1a39967ec9547a2e4c71c97982ee0 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_gc_content.png
69b70e3f561b749bf10b186dd2480a8a results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png
b28aac49f537b8cba364b6422458ad28 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png
5b950b5dfe3c7407e9aac153db330a38 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png
caf24c834f9f8aa31473c3d5826227ac results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/adapter_content.png
909c316306050c8f7dfb9ad72dfe0334 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/duplication_levels.png
3f7d7acd0b42a4e3642f3cc8f81e7b8d results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_n_content.png
5475f0266800b9febf00979b8dc561e6 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_quality.png
42462f1beeecb7820682284f7d5518cf results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_base_sequence_content.png
dc6b69c56474f492bbc9824631ac84d3 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_gc_content.png
b5a5a126e3f85478abdac1074aaf2fe1 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_sequence_quality.png
f399fa5792cdfb72fac7ae2226723122 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/per_tile_quality.png
e342ab7fae5112b9ebca5a04cc6230a2 results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/fastqc/fq1/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.fq1_fastqc/Images/sequence_length_distribution.png
d41d8cd98f00b204e9800998ecf8427e results/samples/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1/quant_kallisto/synthetic_10_reads_mate_1_synthetic_10_reads_mate_1.se.kallisto.pseudo.sam
3ce47cb1d62482c5d62337751d7e8552 results/transcriptome/homo_sapiens/transcriptome.fa
6b44c507f0a1c9f7369db0bb1deef0fd results/alfa_indexes/homo_sapiens/75/ALFA/sorted_genes.stranded.ALFA_index
......
current_rule = 'pe_remove_adapters_cutadapt'
rule pe_remove_adapters_cutadapt:
'''
Remove adapters
......@@ -37,10 +38,22 @@ rule pe_remove_adapters_cutadapt:
adapter_3_mate2 = lambda wildcards:
get_sample('fq2_3p', search_id='index', search_value=wildcards.sample),
adapter_5_mate2 = lambda wildcards:
get_sample('fq2_5p', search_id='index', search_value=wildcards.sample)
get_sample('fq2_5p', search_id='index', search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
singularity:
"docker://zavolab/cutadapt:1.16-slim"
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
threads: 8
......@@ -49,22 +62,21 @@ rule pe_remove_adapters_cutadapt:
config["log_dir"],
"samples",
"{sample}",
"remove_adapters_cutadapt.pe.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"remove_adapters_cutadapt.pe.stdout.log")
current_rule + ".stdout.log")
shell:
"(cutadapt \
-j {threads} \
-m 10 \
-n 2 \
-a {params.adapter_3_mate1} \
-g {params.adapter_5_mate1} \
-A {params.adapter_3_mate2} \
-G {params.adapter_5_mate2} \
{params.additional_params} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
......@@ -72,6 +84,7 @@ rule pe_remove_adapters_cutadapt:
1> {log.stdout} 2>{log.stderr}"
current_rule = 'pe_remove_polya_cutadapt'
rule pe_remove_polya_cutadapt:
'''
Remove polyA tails
......@@ -120,10 +133,22 @@ rule pe_remove_polya_cutadapt:
get_sample(
'fq2_polya_5p',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
singularity:
"docker://zavolab/cutadapt:1.16-slim"
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
threads: 8
......@@ -132,22 +157,21 @@ rule pe_remove_polya_cutadapt:
config["log_dir"],
"samples",
"{sample}",
"remove_polya_cutadapt.pe.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"remove_polya_cutadapt.pe.stdout.log")
current_rule + ".stdout.log")
shell:
"(cutadapt \
-j {threads} \
-m 10 \
-O 1 \
-a {params.polya_3_mate1} \
-g {params.polya_5_mate1} \
-A {params.polya_3_mate2} \
-G {params.polya_5_mate2} \
{params.additional_params} \
-o {output.reads1} \
-p {output.reads2} \
{input.reads1} \
......@@ -155,6 +179,7 @@ rule pe_remove_polya_cutadapt:
1> {log.stdout} 2>{log.stderr}"
current_rule = 'pe_map_genome_star'
rule pe_map_genome_star:
'''
Map to genome using STAR
......@@ -171,8 +196,8 @@ rule pe_map_genome_star:
'index_size',
search_id='index',
search_value=wildcards.sample),
"STAR_index",
"chrNameLength.txt"),
"STAR_index",
"chrNameLength.txt"),
reads1 = os.path.join(
config["output_dir"],
"samples",
......@@ -213,7 +238,7 @@ rule pe_map_genome_star:
'index_size',
search_id='index',
search_value=wildcards.sample),
"STAR_index")),
"STAR_index")),
outFileNamePrefix = os.path.join(
config["output_dir"],
"samples",
......@@ -235,9 +260,26 @@ rule pe_map_genome_star:
'pass_mode',
search_id='index',
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--twopassMode',
'--genomeDir',
'--readFilesIn',
'--readFilesCommand',
'--outFilterMultimapNmax',
'--outFileNamePrefix',
'--outSAMattributes',
'--outStd',
'--outSAMtype',
'--outSAMattrRGline',
'--alignEndsType',
)
)
singularity:
"docker://zavolab/star:2.7.3a-slim"
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
threads: 12
......@@ -246,7 +288,7 @@ rule pe_map_genome_star:
config["log_dir"],
"samples",
"{sample}",
"map_genome_star.pe.stderr.log")
current_rule + ".stderr.log")
shell:
"(STAR \
......@@ -256,17 +298,18 @@ rule pe_map_genome_star:
--readFilesIn {input.reads1} {input.reads2} \
--readFilesCommand zcat \
--outFilterMultimapNmax {params.multimappers} \
--outFilterMultimapScoreRange 0 \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outFilterType BySJout \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) \
--alignEndsType {params.soft_clip} \
{params.additional_params} \
> {output.bam};) \
2> {log.stderr}"
current_rule = 'pe_quantification_salmon'
rule pe_quantification_salmon:
'''
Quantification at transcript and gene level using Salmon
......@@ -298,7 +341,7 @@ rule pe_quantification_salmon:
'kmer',
search_id='index',
search_value=wildcards.sample),
"salmon.idx")
"salmon.idx")
output:
gn_estimates = os.path.join(
......@@ -340,32 +383,44 @@ rule pe_quantification_salmon:
get_sample(
'libtype',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--libType',
'--fldMean',
'--fldSD',
'--index',
'--geneMap',
'-1',
'-2',
'-o',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"genome_quantification_salmon.pe.stderr.log"),
current_rule + ".stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"genome_quantification_salmon.pe.stdout.log"),
current_rule + ".stdout.log"),
threads: 6
singularity:
"docker://zavolab/salmon:1.1.0-slim"
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--writeUnmappedNames \
{params.additional_params} \
--index {input.index} \
--geneMap {input.gtf} \
-1 {input.reads1} \
......@@ -374,6 +429,7 @@ rule pe_quantification_salmon:
) 1> {log.stdout} 2> {log.stderr}"
current_rule = 'pe_genome_quantification_kallisto'
rule pe_genome_quantification_kallisto:
'''
Quantification at transcript and gene level using Kallisto
......@@ -396,7 +452,7 @@ rule pe_genome_quantification_kallisto:
'organism',
search_id='index',
search_value=wildcards.sample),
"kallisto.idx")
"kallisto.idx")
output:
pseudoalignment = os.path.join(
......@@ -424,10 +480,25 @@ rule pe_genome_quantification_kallisto:
get_sample(
'kallisto_directionality',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--single',
'-i',
'-o',
'-l',
'-s',
'--pseudobam',
'--fr-stranded',
'--rf-stranded',
)
)
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
"docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
threads: 8
......@@ -436,15 +507,16 @@ rule pe_genome_quantification_kallisto:
config["log_dir"],
"samples",
"{sample}",
"genome_quantification_kallisto.pe.stderr.log")
current_rule + ".stderr.log")
shell:
"(kallisto quant \
-i {input.index} \
-o {params.output_dir} \
--pseudobam \
-t {threads} \
{params.directionality}-stranded \
{params.additional_params} \
--pseudobam \
{input.reads1} {input.reads2} > {output.pseudoalignment}) \
2> {log.stderr}"
current_rule = 'remove_adapters_cutadapt'
rule remove_adapters_cutadapt:
'''
Remove adapters
......@@ -27,10 +28,22 @@ rule remove_adapters_cutadapt:
get_sample(
'fq1_5p',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
singularity:
"docker://zavolab/cutadapt:1.16-slim"
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
threads: 8
......@@ -39,24 +52,24 @@ rule remove_adapters_cutadapt:
config["log_dir"],
"samples",
"{sample}",
"remove_adapters_cutadapt.se.stderr.log"),
current_rule + ".se.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"remove_adapters_cutadapt.se.stdout.log")
current_rule + ".se.stdout.log")
shell:
"(cutadapt \
-j {threads} \
-m 10 \
-n 2 \
-a {params.adapters_3} \
-g {params.adapters_5} \
{params.additional_params} \
-o {output.reads} \
{input.reads}) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'remove_polya_cutadapt'
rule remove_polya_cutadapt:
'''
Remove ployA tails
......@@ -85,10 +98,22 @@ rule remove_polya_cutadapt:
get_sample(
'fq1_polya_5p',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'-a',
'-A',
'-g',
'-G',
'-o',
'-p',
)
)
singularity:
"docker://zavolab/cutadapt:1.16-slim"
"docker://quay.io/biocontainers/cutadapt:3.4--py37h73a75cf_1"
threads: 8
......@@ -97,25 +122,25 @@ rule remove_polya_cutadapt:
config["log_dir"],
"samples",
"{sample}",
"remove_polya_cutadapt.se.stderr.log"),
current_rule + ".se.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"remove_polya_cutadapt.se.stdout.log")
current_rule + ".se.stdout.log")
shell:
"(cutadapt \
-j {threads} \
-O 1 \
-m 10 \
-a {params.polya_3} \
-g {params.polya_5} \
{params.additional_params} \
-o {output.reads} \
{input.reads};) \
1> {log.stdout} 2> {log.stderr}"
current_rule = 'map_genome_star'
rule map_genome_star:
'''
Map to genome using STAR
......@@ -178,10 +203,27 @@ rule map_genome_star:
get_sample(
'pass_mode',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--twopassMode',
'--genomeDir',
'--readFilesIn',
'--readFilesCommand',
'--outFilterMultimapNmax',
'--outFileNamePrefix',
'--outSAMattributes',
'--outStd',
'--outSAMtype',
'--outSAMattrRGline',
'--alignEndsType',
)
)
singularity:
"docker://zavolab/star:2.7.3a-slim"
"docker://quay.io/biocontainers/star:2.7.8a--h9ee0642_1"
threads: 12
......@@ -190,27 +232,28 @@ rule map_genome_star:
config["log_dir"],
"samples",
"{sample}",
"map_genome_star.se.stderr.log")
current_rule + ".se.stderr.log")
shell:
"(STAR \
-- twopassMode {params.pass_mode} \
--twopassMode {params.pass_mode} \
--runThreadN {threads} \
--genomeDir {params.index} \
--readFilesIn {input.reads} \
--readFilesCommand zcat \
--outFilterMultimapNmax {params.multimappers} \
--outFilterMultimapScoreRange 0 \
--outFileNamePrefix {params.outFileNamePrefix} \
--outSAMattributes All \
--outStd BAM_SortedByCoordinate \
--outSAMtype BAM SortedByCoordinate \
--outFilterType BySJout \
--outSAMattrRGline ID:rnaseq_pipeline SM:{params.sample_id} \
--alignEndsType {params.soft_clip} > {output.bam};) \
--alignEndsType {params.soft_clip} \
{params.additional_params} \
> {output.bam};) \
2> {log.stderr}"
current_rule = 'quantification_salmon'
rule quantification_salmon:
'''
Quantification at transcript and gene level using Salmon
......@@ -289,32 +332,44 @@ rule quantification_salmon:
get_sample(
'sd',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--libType',
'--fldMean',
'--fldSD',
'--index',
'--geneMap',
'--unmatedReads',
'-o',
)
)
log:
stderr = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"quantification_salmon.se.stderr.log"),
current_rule + ".se.stderr.log"),
stdout = os.path.join(
config["log_dir"],
"samples",
"{sample}",
"quantification_salmon.se.stdout.log")
current_rule + ".se.stdout.log")
threads: 12
singularity:
"docker://zavolab/salmon:1.1.0-slim"
"docker://quay.io/biocontainers/salmon:1.4.0--h84f40af_1"
shell:
"(salmon quant \
--libType {params.libType} \
--seqBias \
--validateMappings \
--threads {threads} \
--fldMean {params.fraglen} \
--fldSD {params.fragsd} \
{params.additional_params} \
--index {input.index} \
--geneMap {input.gtf} \
--unmatedReads {input.reads} \
......@@ -322,6 +377,7 @@ rule quantification_salmon:
1> {log.stdout} 2> {log.stderr}"
current_rule = 'genome_quantification_kallisto'
rule genome_quantification_kallisto:
'''
Quantification at transcript and gene level using Kallisto
......@@ -377,7 +433,21 @@ rule genome_quantification_kallisto:
get_sample(
'kallisto_directionality',
search_id='index',
search_value=wildcards.sample)
search_value=wildcards.sample),
additional_params = parse_rule_config(
rule_config,
current_rule=current_rule,
immutable=(
'--single',
'-i',
'-o',
'-l',
'-s',
'--pseudobam',
'--fr-stranded',
'--rf-stranded',
)
)
threads: 8
......@@ -386,21 +456,22 @@ rule genome_quantification_kallisto:
config["log_dir"],
"samples",
"{sample}",
"genome_quantification_kallisto.se.stderr.log")
current_rule + ".se.stderr.log")
singularity:
"docker://zavolab/kallisto:0.46.1-slim"
"docker://quay.io/biocontainers/kallisto:0.46.2--h60f4f9f_2"
shell:
"(kallisto quant \
--single \
-i {input.index} \
-o {params.output_dir} \
--single \
-l {params.fraglen} \
-s {params.fragsd} \
--pseudobam \
-t {threads} \
{params.directionality}-stranded \
{params.additional_params} \
--pseudobam \
{input.reads} > {output.pseudoalignment};) \
2> {log.stderr}"
......@@ -98,12 +98,12 @@ module_order:
- cutadapt:
name: "Cutadapt: adapter removal"
path_filters:
- "*/*/remove_adapters_cutadapt*.stdout.log"
- "*/*/*remove_adapters_cutadapt*.stdout.log"
- cutadapt:
name: "Cutadapt: polyA tails removal"
path_filters:
- "*/*/remove_polya_cutadapt*.stdout.log"
- "*/*/*remove_polya_cutadapt*.stdout.log"
- star:
path_filters:
......@@ -123,7 +123,27 @@ module_order:
- kallisto:
path_filters:
- "*/*/genome_quantification_kallisto*.stderr.log"
- "*/*/*genome_quantification_kallisto*.stderr.log"
- zpca:
name: "zpca: salmon | gene expression"
path_filters:
- "*/zpca/pca_salmon_genes/*"
- zpca:
name: "zpca: salmon | transcript expression"
path_filters:
- "*/zpca/pca_salmon_transcripts/*"
- zpca:
name: "zpca: kallisto | gene expression"
path_filters:
- "*/zpca/pca_kallisto_genes/*"
- zpca:
name: "zpca: kallisto | transcript expression"
path_filters:
- "*/zpca/pca_kallisto_transcripts/*"
fn_clean_exts:
- '.fq1'
......