Skip to content
Snippets Groups Projects
Commit d04df571 authored by BIOPZ-Bak Maciej's avatar BIOPZ-Bak Maciej
Browse files

updated salmon transcriptome index generation

parent 4b1b5941
No related branches found
No related tags found
1 merge request!59Update salmon transcriptome index generation
Pipeline #10578 passed
......@@ -121,6 +121,7 @@ rule create_index_star:
--sjdbGTFfile {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
rule extract_transcriptome:
""" Create transcriptome from genome and gene annotations """
input:
......@@ -154,9 +155,49 @@ rule extract_transcriptome:
-g {input.genome} {input.gtf}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon:
rule extract_decoys_salmon:
"""
Create index for Salmon quantification
Extract names of the genome targets
"""
input:
genome = lambda wildcards:
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
output:
decoys = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"decoys.txt")
singularity:
"docker://bash:5.0.16"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_extract_decoys_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_extract_decoys_salmon.stdout.log")
threads: 1
shell:
"""
(grep "^>" <{input.genome} \
| cut -d " " -f 1 > {output.decoys} && \
sed -i.bak -e 's/>//g' {output.decoys}) \
1> {log.stdout} 2> {log.stderr}
"""
rule concatenate_transcriptome_and_genome:
"""
Concatenate genome and transcriptome
"""
input:
transcriptome = os.path.join(
......@@ -164,36 +205,78 @@ rule create_index_salmon:
"transcriptome",
"{organism}",
"transcriptome.fa",
),
genome = lambda wildcards:
samples_table['genome']
[samples_table['organism'] == wildcards.organism]
[0],
output:
genome_transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"genome_transcriptome.fa",
)
singularity:
"docker://bash:5.0.16"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_concatenate_transcriptome_and_genome.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_concatenate_transcriptome_and_genome.stdout.log")
shell:
"(cat {input.transcriptome} {input.genome} \
1> {output.genome_transcriptome}) \
1> {log.stdout} 2> {log.stderr}"
rule create_index_salmon:
"""
Create index for Salmon quantification
"""
input:
genome_transcriptome = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"genome_transcriptome.fa",
),
decoys = os.path.join(
config['output_dir'],
"transcriptome",
"{organism}",
"decoys.txt")
output:
index = directory(
os.path.join(
config['salmon_indexes'],
"{organism}",
"{kmer}",
"salmon.idx"))
params:
kmerLen = "{kmer}"
singularity:
"docker://zavolab/salmon:1.1.0-slim"
log:
stderr = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stderr.log"),
"{organism}_create_index_salmon.stderr.log"),
stdout = os.path.join(
config['log_dir'],
"{organism}_{kmer}_create_index_salmon.stdout.log")
"{organism}_create_index_salmon.stdout.log")
threads: 8
shell:
"(salmon index \
--transcripts {input.transcriptome} \
--transcripts {input.genome_transcriptome} \
--decoys {input.decoys} \
--index {output.index} \
--kmerLen {params.kmerLen} \
--threads {threads}) \
1> {log.stdout} 2> {log.stderr}"
......
This diff is collapsed.
This diff is collapsed.
......@@ -26,6 +26,18 @@
"threads":"1",
"mem":"1G"
},
"extract_decoys_salmon":
{
"time": "00:30:00",
"threads":"1",
"mem":"10G"
},
"concatenate_transcriptome_and_genome":
{
"time": "00:30:00",
"threads":"1",
"mem":"10G"
},
"create_index_salmon":
{
"time": "03:00:00",
......
results/kallisto_indexes/homo_sapiens/kallisto.idx
results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv
results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
results/salmon_indexes/homo_sapiens/salmon.idx/versionInfo.json
results/salmon_indexes/homo_sapiens/salmon.idx/duplicate_clusters.tsv
results/salmon_indexes/homo_sapiens/salmon.idx/info.json
results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt
results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt
results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt
......
cbaebdb67aee4784b64aff7fec9fda42 results/kallisto_indexes/homo_sapiens/kallisto.idx
0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/31/salmon.idx/versionInfo.json
51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/31/salmon.idx/duplicate_clusters.tsv
4c1ab7841bbd1a1e8e3b15e7750ecc38 results/salmon_indexes/homo_sapiens/31/salmon.idx/info.json
0ac1afd9a4f380afd70be75b21814c64 results/salmon_indexes/homo_sapiens/salmon.idx/versionInfo.json
51b5292e3a874119c0e1aa566e95d70c results/salmon_indexes/homo_sapiens/salmon.idx/duplicate_clusters.tsv
92549565e504901d09216347f0b407f0 results/salmon_indexes/homo_sapiens/salmon.idx/info.json
dee7cdc194d5d0617552b7a3b5ad8dfb results/star_indexes/homo_sapiens/75/STAR_index/chrLength.txt
8e2e96e2d6b7f29940ad5de40662b7cb results/star_indexes/homo_sapiens/75/STAR_index/chrNameLength.txt
d0826904b8afa45352906ad9591f2bfb results/star_indexes/homo_sapiens/75/STAR_index/chrName.txt
......
......@@ -303,7 +303,6 @@ rule pe_quantification_salmon:
os.path.join(
config["salmon_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx")
output:
......
......@@ -247,7 +247,6 @@ rule quantification_salmon:
os.path.join(
config["salmon_indexes"],
str(samples_table.loc[wildcards.sample, "organism"]),
str(samples_table.loc[wildcards.sample, "kmer"]),
"salmon.idx"),
gtf = lambda wildcards:
samples_table.loc[wildcards.sample, "gtf_filtered"]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment