diff --git a/envs/env_alfa.yaml b/envs/env_alfa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0faa646d7cd22384c8b7facee776cbb49c067ba6 --- /dev/null +++ b/envs/env_alfa.yaml @@ -0,0 +1,8 @@ +name: alfa +channels: + - biocomp + - conda-forge + - bioconda + - defaults +dependencies: + - alfa=1.1.1 diff --git a/snakemake/Snakefile b/snakemake/Snakefile index b541c1e992f2c1a127eeff1956832ef93ae03eed..187a19adf60b480f61b69907ac3ac85928e7915f 100644 --- a/snakemake/Snakefile +++ b/snakemake/Snakefile @@ -130,3 +130,87 @@ rule create_index_kallisto: chmod -R 777 {params.output_dir}; \ kallisto index -i {output.index} {input.transcriptome}) &> {log}" +rule generate_alfa_index: + ''' Generate ALFA index files from sorted GTF file ''' + input: + gtf = "/Users/dominikburri/Server_worker/polyasite/datasets/ref_transcriptome/refdata-cellranger-GRCh38-3.0.0/genes/genes.sorted.gtf" + output: + index_stranded = "alfa_index/sorted_genes.stranded.ALFA_index", + index_unstranded = "alfa_index/sorted_genes.unstranded.ALFA_index" + params: + chr_len = "CHR_LENGTH_FILE", + genome_index = "sorted_genes", + out_dir = directory("alfa_index/") + threads: + 1 + conda: + "../envs/env_alfa.yaml" + log: + "generate_alfa_index.log" + shell: + "alfa -a {input.gtf} \ + -g {params.genome_index} \ + -p {threads} \ + -o {params.out_dir} &> {log}" + +rule star_rpm: + ''' Create bedgraph coverage with STARs RPM normalisation ''' + input: + "/scicore/home/zavolan/burri0000/test_alfa/sample/chr21.bam" + output: + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str1.out.bg", + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str2.out.bg" + params: + out_dir = "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/", + filenameprefix = "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_" + singularity: + "docker://zavolab/star:2.6.0a" + threads: 4 + log: "logs/star_rpm.log" + shell: + """ + (mkdir -p {params.out_dir}; \ + chmod -R 777 {params.out_dir}; \ + STAR \ + --runMode inputAlignmentsFromBAM \ + --runThreadN {threads} \ + --inputBAMfile {input} \ + --outWigType "bedGraph" \ + --outWigStrand "Stranded" \ + --outWigNorm "RPM" \ + --outFileNamePrefix {params.filenameprefix}) &> {log} + """ + +rule run_alfa_bg_stranded: + ''' Run ALFA from stranded bedgraph files ''' + input: + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str1.out.bg", + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str2.out.bg" + output: + directory("output/") + params: + genome_index = "sorted_genes" + shell: + """ + alfa -g {params.genome_index} \ + -bedgraph {input} \ + -s forward; \ + alfa -g {params.genome_index} \ + -bedgraph {input} \ + -s reverse \ + -o {output} + """ + +rule run_alfa_bg_unstranded: + ''' Run ALFA from unstranded bedgraph files ''' + input: + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str1.out.bg", + "/scicore/home/zavolan/burri0000/test_alfa/sample/star_rpm/STAR_Signal.UniqueMultiple.str2.out.bg" + output: + params: + genome_index = "sorted_genes" + shell: + """ + alfa -g {params.genome_index} \ + -bedgraph {input} \ + """ \ No newline at end of file