diff --git a/tests/input_files/samples.multiple_lanes.tsv b/tests/input_files/samples.multiple_lanes.tsv index 7968fb46b8331974de83555f0ea6d01b0fe08821..13aad1b1f4a034ba0490218aca28bea780b37f01 100644 --- a/tests/input_files/samples.multiple_lanes.tsv +++ b/tests/input_files/samples.multiple_lanes.tsv @@ -1,5 +1,5 @@ -sample seqmode fq1 fq2 index_size kmer fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean kallisto_directionality alfa_directionality alfa_plus alfa_minus multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p fq2_polya_3p fq2_polya_5p -synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane1/synthetic_split_lane1.mate_1.fastq.gz ../input_files/pe_lane1/synthetic_split_lane1.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT -synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane2/synthetic_split_lane2.mate_1.fastq.gz ../input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane1/synthetic_split_lane1.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane2/synthetic_split_lane2.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 --fr fr-firststrand str1 str2 10 EndToEnd None A AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX +sample seqmode fq1 fq2 index_size kmer fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean multimappers soft_clip pass_mode salmon_code fq1_polya_3p fq1_polya_5p fq2_polya_3p fq2_polya_5p +synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane1/synthetic_split_lane1.mate_1.fastq.gz ../input_files/pe_lane1/synthetic_split_lane1.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None ISF AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT +synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/pe_lane2/synthetic_split_lane2.mate_1.fastq.gz ../input_files/pe_lane2/synthetic_split_lane2.mate_2.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None ISF AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX TTTTTTTTTTTTTTT +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane1/synthetic_split_lane1.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None SF AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/se_lane2/synthetic_split_lane2.mate_1.fastq.gz XXXXXXXXXXXXXXX 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None SF AAAAAAAAAAAAAAA XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX XXXXXXXXXXXXXXX diff --git a/tests/input_files/samples.tsv b/tests/input_files/samples.tsv index 4b2630287454e8a7d4dbc7652f1897410a905ef5..db8af5b7930ac839ce395b638b7266e178712a60 100644 --- a/tests/input_files/samples.tsv +++ b/tests/input_files/samples.tsv @@ -1,3 +1,3 @@ -sample seqmode fq1 index_size kmer fq1_3p fq1_5p organism gtf genome sd mean multimappers soft_clip pass_mode libtype fq1_polya_3p fq1_polya_5p kallisto_directionality alfa_directionality alfa_plus alfa_minus fq2 fq2_3p fq2_5p fq2_polya_3p fq2_polya_5p -synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr fr-firststrand str1 str2 ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCGT XXXXXXXXXXXXX XXXXXXXXXXXXXXXXX TTTTTTTTTTTTTTTTT -synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None A AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX --fr fr-firststrand str1 str2 XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX +sample seqmode fq1 index_size kmer fq1_3p fq1_5p organism gtf genome sd mean multimappers soft_clip pass_mode salmon_code fq1_polya_3p fq1_polya_5p fq2 fq2_3p fq2_5p fq2_polya_3p fq2_polya_5p +synthetic_10_reads_paired_synthetic_10_reads_paired pe ../input_files/project1/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None ISF AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX ../input_files/project1/synthetic.mate_2.fastq.gz AGATCGGAAGAGCGT XXXXXXXXXXXXX XXXXXXXXXXXXXXXXX TTTTTTTTTTTTTTTTT +synthetic_10_reads_mate_1_synthetic_10_reads_mate_1 se ../input_files/project2/synthetic.mate_1.fastq.gz 75 31 AGATCGGAAGAGCACA XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/annotation.gtf ../input_files/homo_sapiens/genome.fa 100 250 10 EndToEnd None SF AAAAAAAAAAAAAAAAA XXXXXXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX XXXXXXXXXXXXX diff --git a/tests/input_files/samples_alfa.tsv b/tests/input_files/samples_alfa.tsv index af1835c28f16f31fe571797e3ae07355d1710e50..3fa91e6247704e3e8430b42c467dbbba208c32e2 100644 --- a/tests/input_files/samples_alfa.tsv +++ b/tests/input_files/samples_alfa.tsv @@ -1,5 +1,5 @@ -sample seqmode fq1 index_size kmer fq2 fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean multimappers soft_clip pass_mode libtype kallisto_directionality fq1_polya fq2_polya alfa_directionality alfa_plus alfa_minus -paired_end_R1_on_plus_sense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX GATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT fr-firststrand str1 str2 -paired_end_R1_on_plus_antisense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None A --rf AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT fr-secondstrand str2 str1 -paired_end_R1_on_minus_sense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None A --fr AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT fr-firststrand str1 str2 -paired_end_R1_on_minus_antisense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None A --rf AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT fr-secondstrand str2 str1 +sample seqmode fq1 index_size kmer fq2 fq1_3p fq1_5p fq2_3p fq2_5p organism gtf genome sd mean multimappers soft_clip pass_mode salmon_code fq1_polya fq2_polya +paired_end_R1_on_plus_sense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX GATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None ISF AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT +paired_end_R1_on_plus_antisense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None ISR AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT +paired_end_R1_on_minus_sense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None ISF AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT +paired_end_R1_on_minus_antisense pe XXXXXXXXXXXXX 75 31 XXXXXXXXXXXXX AGATCGGAAGAGCACA XXXXXXXXXXXXX AGATCGGAAGAGCGT XXXXXXXXXXXXX homo_sapiens ../input_files/homo_sapiens/quick_start.gtf XXXXXXXXXXXXX 100 250 10 EndToEnd None ISR AAAAAAAAAAAAAAAAA TTTTTTTTTTTTTTTTT diff --git a/workflow/Snakefile b/workflow/Snakefile index d46c511b58c1152403ff0b05749cab4a796e34f8..efa402ba99f5fd9ae20d51acca46324188173ca5 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -17,6 +17,20 @@ samples_table = pd.read_csv( sep="\t", ) +# dict for translation of "directionality parameters" +directionality_dict = { + "SF": + {"kallisto":"--fr-stranded", + "alfa": "forward", + "alfa_plus": "str1", + "alfa_minus": "str2"}, + "SR": + {"kallisto":"--rf-stranded", + "alfa": "reverse", + "alfa_plus": "str2", + "alfa_minus": "str1"}, +} + # Parse YAML rule config file if 'rule_config' in config and config['rule_config']: try: @@ -52,6 +66,12 @@ def get_sample(column_id, search_id=None, search_value=None): else: return str(samples_table[column_id][0]) +def get_directionality(salmon_code, tool): + """ Get directionality value for different tools""" + for key in directionality_dict.keys(): + if key in salmon_code: + return directionality_dict[key][tool] + def parse_rule_config(rule_config: dict, current_rule: str, immutable: Tuple[str, ...] = ()): """Get rule specific parameters from rule_config file""" @@ -1295,10 +1315,10 @@ rule rename_star_rpm_for_alfa: "{sample}_Signal.{unique}.{plus}.out.bg"), sample=wildcards.sample, unique=wildcards.unique, - plus=get_sample( - 'alfa_plus', + plus=get_directionality(get_sample( + 'salmon_code', search_id='index', - search_value=wildcards.sample)), + search_value=wildcards.sample),"alfa_plus")), minus = lambda wildcards: expand( os.path.join( @@ -1309,10 +1329,10 @@ rule rename_star_rpm_for_alfa: "{sample}_Signal.{unique}.{minus}.out.bg"), sample=wildcards.sample, unique=wildcards.unique, - minus=get_sample( - 'alfa_minus', + minus=get_directionality(get_sample( + 'salmon_code', search_id='index', - search_value=wildcards.sample)) + search_value=wildcards.sample), "alfa_minus")) output: plus = temp(os.path.join( @@ -1490,10 +1510,10 @@ rule alfa_qc: os.path.basename(input.minus), name = "{sample}", alfa_orientation = lambda wildcards: - get_sample( - 'alfa_directionality', - search_id='index', - search_value=wildcards.sample), + get_directionality(get_sample( + 'salmon_code', + search_id='index', + search_value=wildcards.sample),"alfa"), additional_params = parse_rule_config( rule_config, current_rule=current_rule, diff --git a/workflow/rules/paired_end.snakefile.smk b/workflow/rules/paired_end.snakefile.smk index 37c55460935a5bc1c86716f3ad69efc96e7e5fde..11428959b83b839a7562983aec5da88c9cc7c0db 100644 --- a/workflow/rules/paired_end.snakefile.smk +++ b/workflow/rules/paired_end.snakefile.smk @@ -390,7 +390,7 @@ rule pe_quantification_salmon: "{sample}.salmon.pe"), libType = lambda wildcards: get_sample( - 'libtype', + 'salmon_code', search_id='index', search_value=wildcards.sample), additional_params = parse_rule_config( @@ -489,10 +489,10 @@ rule pe_genome_quantification_kallisto: "{sample}", "quant_kallisto"), directionality = lambda wildcards: - get_sample( - 'kallisto_directionality', - search_id='index', - search_value=wildcards.sample), + get_directionality(get_sample( + 'salmon_code', + search_id='index', + search_value=wildcards.sample),"kallisto"), additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -529,7 +529,7 @@ rule pe_genome_quantification_kallisto: -i {input.index} \ -o {params.output_dir} \ -t {threads} \ - {params.directionality}-stranded \ + {params.directionality} \ {params.additional_params} \ --pseudobam \ {input.reads1} {input.reads2} > {output.pseudoalignment}) \ diff --git a/workflow/rules/single_end.snakefile.smk b/workflow/rules/single_end.snakefile.smk index e270aba84b8a70d69780aa5f31891b991dc8f54f..fd210337cbd6686614c89c412f89e4719ec242c4 100644 --- a/workflow/rules/single_end.snakefile.smk +++ b/workflow/rules/single_end.snakefile.smk @@ -329,7 +329,7 @@ rule quantification_salmon: "{sample}.salmon.se"), libType = lambda wildcards: get_sample( - 'libtype', + 'salmon_code', search_id='index', search_value=wildcards.sample), fraglen = lambda wildcards: @@ -442,10 +442,10 @@ rule genome_quantification_kallisto: search_id='index', search_value=wildcards.sample), directionality = lambda wildcards: - get_sample( - 'kallisto_directionality', - search_id='index', - search_value=wildcards.sample), + get_directionality(get_sample( + 'salmon_code', + search_id='index', + search_value=wildcards.sample),"kallisto"), additional_params = parse_rule_config( rule_config, current_rule=current_rule, @@ -484,7 +484,7 @@ rule genome_quantification_kallisto: -l {params.fraglen} \ -s {params.fragsd} \ -t {threads} \ - {params.directionality}-stranded \ + {params.directionality} \ {params.additional_params} \ --pseudobam \ {input.reads} > {output.pseudoalignment};) \