From cb46aa34af6b6a728528e8793d317287bacb54c9 Mon Sep 17 00:00:00 2001 From: fgypas <fgypas@gmail.com> Date: Fri, 20 Dec 2019 15:56:48 +0100 Subject: [PATCH] Remove prepare_annotation directory. Rename process_data to snakemake. --- .gitignore | 2 +- prepare_annotation/.gitignore | 6 - prepare_annotation/Snakefile | 177 ---------------- prepare_annotation/cluster.json | 23 --- prepare_annotation/config.yaml | 43 ---- prepare_annotation/other.fa | 195 ------------------ process_data/.gitignore | 7 - process_data/create_snakemake_flowchart.sh | 1 - process_data/run_snakefile.sh | 13 -- .../fg_extract_transcripts.py | 0 {process_data => snakemake}/Snakefile | 0 {process_data => snakemake}/cluster.json | 0 {process_data => snakemake}/config.yaml | 0 .../create_snakemake_flowchart.sh | 0 .../paired_end.snakemake | 0 .../preprocessing.snakefile | 0 .../run_snakefile.sh | 0 .../single_end.snakefile | 0 18 files changed, 1 insertion(+), 466 deletions(-) delete mode 100644 prepare_annotation/.gitignore delete mode 100644 prepare_annotation/Snakefile delete mode 100644 prepare_annotation/cluster.json delete mode 100644 prepare_annotation/config.yaml delete mode 100644 prepare_annotation/other.fa delete mode 100644 process_data/.gitignore delete mode 100755 process_data/create_snakemake_flowchart.sh delete mode 100755 process_data/run_snakefile.sh rename {prepare_annotation/scripts => scripts}/fg_extract_transcripts.py (100%) rename {process_data => snakemake}/Snakefile (100%) rename {process_data => snakemake}/cluster.json (100%) rename {process_data => snakemake}/config.yaml (100%) rename {prepare_annotation => snakemake}/create_snakemake_flowchart.sh (100%) rename {process_data => snakemake}/paired_end.snakemake (100%) rename {process_data => snakemake}/preprocessing.snakefile (100%) rename {prepare_annotation => snakemake}/run_snakefile.sh (100%) rename {process_data => snakemake}/single_end.snakefile (100%) diff --git a/.gitignore b/.gitignore index 8b13789..e43b0f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ - +.DS_Store diff --git a/prepare_annotation/.gitignore b/prepare_annotation/.gitignore deleted file mode 100644 index 06cd409..0000000 --- a/prepare_annotation/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -.* -Log.out -results -logs -dag.png -nohup.out diff --git a/prepare_annotation/Snakefile b/prepare_annotation/Snakefile deleted file mode 100644 index 94c9db7..0000000 --- a/prepare_annotation/Snakefile +++ /dev/null @@ -1,177 +0,0 @@ -configfile: "config.yaml" - -localrules: download_genome, assemble_genome, download_annotation, assemble_annotation, finish - -################################################################################# -### Final rule -################################################################################# - -rule finish: - input: - STAR_index = os.path.join(config["output_dir"], "STAR_index"), - other_RNA_idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx"), - salmon_index = os.path.join(config["output_dir"], "filtered_transcripts_salmon.idx") - -################################################################################# -### Download genome -################################################################################# - -rule download_genome: - params: - sequences = expand(config["genome"]), - output: - genome_dir = os.path.join(config["output_dir"], "genome") - singularity: - "docker://zavolab/ubuntu:18.04" - log: - os.path.join(config["local_log"], "download_genome.log") - shell: - "(wget --directory-prefix {output.genome_dir} {params.sequences}) &> {log}" - -################################################################################# -### Assemble genome -################################################################################# - -rule assemble_genome: - input: - genome_dir = os.path.join(config["output_dir"], "genome") - output: - genome = os.path.join(config["output_dir"], "genome.fa") - params: - genome = os.path.join(config["output_dir"], "genome.fa.gz") - singularity: - "docker://zavolab/ubuntu:18.04" - log: - os.path.join(config["local_log"], "assemble_genome.log") - shell: - "(cat {input.genome_dir}/* > {params.genome}; \ - zcat {params.genome} | sed \'s/\s.*//\' > {output.genome}; \ - rm {params.genome}; \ - ) &>{log}" - -################################################################################# -### Download annotation -################################################################################# - -rule download_annotation: - params: - annotation = expand(config["annotation"]), - output: - annotation_dir = os.path.join(config["output_dir"], "annotation") - singularity: - "docker://zavolab/ubuntu:18.04" - log: - os.path.join(config["local_log"], "download_annotation.log") - shell: - "(wget --directory-prefix {output.annotation_dir} {params.annotation}) &> {log}" - -################################################################################# -### Assemble annotation -################################################################################# - -rule assemble_annotation: - input: - annotation_dir = os.path.join(config["output_dir"], "annotation") - output: - annotation = os.path.join(config["output_dir"], "annotation.gtf") - params: - annotation = os.path.join(config["output_dir"], "annotation.gtf.gz") - singularity: - "docker://zavolab/ubuntu:18.04" - log: - os.path.join(config["local_log"], "assemble_annotation.log") - shell: - "(cat {input.annotation_dir}/* > {params.annotation}; \ - zcat {params.annotation} > {output.annotation}; \ - rm {params.annotation}; \ - ) &>{log}" - -################################################################################# -### ToDo: Download other RNA -################################################################################# - -################################################################################# -### Generate segemehl index for other RNAs -################################################################################# - -rule generate_segemehl_index_other_RNAs: - input: - sequence = config["other_RNA"] - output: - other_RNA_idx = os.path.join(config["output_dir"], "other_RNAs_sequence.idx") - log: - os.path.join(config["local_log"], "generate_segemehl_index_other_RNAs.log") - singularity: - "docker://zavolab/segemehl:0.2.0" - shell: - "(segemehl.x -x {output.other_RNA_idx} -d {input.sequence}) &> {log}" - -################################################################################# -### Index genome STAR -################################################################################# - - - -################################################################################## -### Filter protein coding and lncRNA transcripts -################################################################################## - -rule filter_transcripts: - input: - script = "scripts/fg_extract_transcripts.py", - annotation = os.path.join(config["output_dir"], "annotation.gtf") - output: - annotation = os.path.join(config["output_dir"], "filtered_transcripts.gtf") - params: - transcript_biotype = "\"protein_coding,lincRNA,antisense_RNA,retained_intron,sense_intronic\"" - singularity: - "docker://zavolab/python_htseq:3.6.5_0.10.0" - log: - os.path.join(config["local_log"], "filter_transcripts.log") - shell: - "(python {input.script} \ - --gtf {input.annotation} \ - --out {output.annotation} \ - --transcript_biotype {params.transcript_biotype}) &> {log}" - -################################################################################## -### Extract transcript sequences -################################################################################## - -rule extract_sequences: - input: - annotation = os.path.join(config["output_dir"], "filtered_transcripts.gtf"), - genome = os.path.join(config["output_dir"], "genome.fa") - output: - transcripts = os.path.join(config["output_dir"], "filtered_transcripts.fa") - singularity: - "docker://zavolab/cufflinks:2.2.1" - log: - os.path.join(config["local_log"],"extract_sequences.log") - shell: - "(gffread {input.annotation} \ - -g {input.genome} \ - -w {output.transcripts}) &> {log}" - -################################################################################## -### Index salmon -################################################################################## - -rule index_salmon: - input: - transcripts = os.path.join(config["output_dir"], "filtered_transcripts.fa") - output: - index = os.path.join(config["output_dir"], "filtered_transcripts_salmon.idx") - params: - kmerLen = config["kmerLen"], - singularity: - "docker://zavolab/salmon:0.11.0" - log: - os.path.join(config["local_log"],"index_salmon.log") - threads: 8 - shell: - "(salmon index \ - --transcripts {input.transcripts} \ - --index {output.index} \ - --kmerLen {params.kmerLen} \ - --threads {threads}) &> {log}" diff --git a/prepare_annotation/cluster.json b/prepare_annotation/cluster.json deleted file mode 100644 index a6bfdd5..0000000 --- a/prepare_annotation/cluster.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "__default__" : - { - "queue": "6hours", - "time": "05:00:00", - "threads": "1", - "mem": "4G", - "name": "{rule}.{wildcards}", - "out": "$PWD/logs/cluster_log/{rule}.{wildcards}-%j-%N.out" - }, - "generate_segemehl_index_other_RNAs": - { - "time": "06:00:00", - "threads":"8", - "mem":"50G" - }, - "index_genome_STAR": - { - "time": "06:00:00", - "threads":"8", - "mem":"75G" - } -} diff --git a/prepare_annotation/config.yaml b/prepare_annotation/config.yaml deleted file mode 100644 index 92249c9..0000000 --- a/prepare_annotation/config.yaml +++ /dev/null @@ -1,43 +0,0 @@ ---- - ############################################################################## - ### Annotation - ############################################################################## - genome: ["ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.1.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.2.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.3.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.4.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.5.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.6.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.7.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.8.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.9.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.10.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.11.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.12.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.13.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.14.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.15.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.16.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.17.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.18.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.19.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.20.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.21.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.22.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.X.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.Y.fa.gz", - "ftp://ftp.ensembl.org/pub/release-89/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna_sm.chromosome.MT.fa.gz"] - annotation: ["ftp://ftp.ensembl.org/pub/release-89/gtf/homo_sapiens/Homo_sapiens.GRCh38.89.chr.gtf.gz"] - other_RNA: "other.fa" - ############################################################################## - ### Output and log directories - ############################################################################## - output_dir: "results" - local_log: "logs/local_log" - cluster_log: "logs/cluster_log" - ############################################################################## - ### Options - ############################################################################## - sjdbOverhang: 100 - kmerLen: 31 -... diff --git a/prepare_annotation/other.fa b/prepare_annotation/other.fa deleted file mode 100644 index 819e246..0000000 --- a/prepare_annotation/other.fa +++ /dev/null @@ -1,195 +0,0 @@ ->RNA45SN1 -GCTGACACGCTGTCCTCTGGCGACCTGTCGCTGGAGAGGTTGGGCCTCCGGATGCGCGCGGGGCTCTGGC -CTACCGGTGACCCGGCTAGCCGGCCGCGCTCCTGCTTGAGCCGCCTGCCGGGGCCCGCGGGCCTGCTGTT -CTCTCGCGCGTCCGAGCGTCCCGACTCCCGGTGCCGGCCCGGGTCCGGGTCTCTGACCCACCCGGGGGCG -GCGGGGAAGGCGGCGAGGGCCACCGTGCCCCCGTGCGCTCTCCGCTGCGGGCGCCCGGGGCGGCCGCGAC -AACCCCACCCCGCTGGCTCCGTGCCGTGCGTGTCAGGCGTTCTCGTCTCCGCGGGGTTGTCCGCCGCCCC -TTCCCCGGAGTGGGGGGTTGGCCGGAGCCGATCGGCTCGCTGGCCGGCCGGCCGGCCTCCGCTCCCGGGG -GGCTCTTCGTGATCGATGTGGTGACGTCGTGCTCTCCCGGGCCGGGTCCGAGCCGCGACGGGCGAGGGGC -GGACGTTCGTGGCGAACGGGACCGTCCTTCTCGCTCCGCCCCGCGGGGGTCCCCTCGTCTCTCCTCTCCC -CGCCCGCCGGCGGTGCGTGTGGGAAGGCGTGGGGTGCGGACCCCGGCCCGACCTCGCCGTCCCGCCCGCC -GCCTTCTGCGTCGCGGGGCGGGCCGGCGGGGTCCTCTGACGCGGCAGACAGCCCTCGCTGTCGCCTCCAG -TGGTTGTCGACTTGCGGGCGGCCCCCCTCCGCGGCGGTGGGGGTGCCGTCCCGCCGGCCCGTCGTGCTGC -CCTCTCGGGGGGTTTGCGCGAGCGTCGGCTCCGCCTGGGCCCTTGCGGTGCTCCTGGAGCGCTCCGGGTT -GTCCCTCAGGTGCCCGAGGCCGAACGGTGGTGTGTCGTTCCCGCCCCCGGCGCCCCCTCCTCCGGTCGCC -GCCGCGGTGTCCGCGCGTGGGTCCTGAGGGAGCTCGTCGGTGTGGGGTTCGAGGCGGTTTGAGTGAGACG -AGACGAGACGCGCCCCTCCCACGCGGGGAAGGGCGCCCGCCTGCTCTCGGTGAGCGCACGTCCCGTGCTC -CCCTCTGGCGGGTGCGCGCGGGCCGTGTGAGCGATCGCGGTGGGTTCGGGCCGGTGTGACGCGTGCGCCG -GCCGGCCGCCGAGGGGCTGCCGTTCTGCCTCCGACCGGTCGTGTGTGGGTTGACTTCGGAGGCGCTCTGC -CTCGGAAGGAAGGAGGTGGGTGGACGGGGGGGCCTGGTGGGGTTGCGCGCACGCGCGCACCGGCCGGGCC -CCCGCCCTGAACGCGAACGCTCGAGGTGGCCGCGCGCAGGTGTTTCCTCGTACCGCAGGGCCCCCTCCCT -TCCCCAGGCGTCCCTCGGCGCCTCTGCGGGCCCGAGGAGGAGCGGCTGGCGGGTGGGGGGAGTGTGACCC -ACCCTCGGTGAGAAAAGCCTTCTCTAGCGATCTGAGAGGCGTGCCTTGGGGGTACCGGATCCCCCGGGCC -GCCGCCTCTGTCTCTGCCTCCGTTATGGTAGCGCTGCCGTAGCGACCCGCTCGCAGAGGACCCTCCTCCG -CTTCCCCCTCGACGGGGTTGGGGGGGAGAAGCGAGGGTTCCGCCGGCCACCGCGGTGGTGGCCGAGTGCG -GCTCGTCGCCTACTGTGGCCCGCGCCTCCCCCTTCCGAGTCGGGGGAGGATCCCGCCGGGCCGGGCCCGG -CGTTCCCAGCGGGTTGGGACGCGGCGGCCGGCGGGCGGTGGGTGTGCGCGCCCGGCGCTCTGTCCGGCGC -GTGACCCCCTCCGCCGCGAGTCGGCTCTCCGCCCGCTCCCGTGCCGAGTCGTGACCGGTGCCGACGACCG -CGTTTGCGTGGCACGGGGTCGGGCCCGCCTGGCCCTGGGAAAGCGTCCCACGGTGGGGGCGCGCCGGTCT -CCCGGAGCGGGACCGGGTCGGAGGATGGACGAGAATCACGAGCGACGGTGGTGCGGGCGTGTCGGGTTCG -TGGCTGCGGTCGCTCCGGGGCCCCCGGTGGCGGGGCCCCGGGGCTCGCGAGGCGGTTCTCGGTGGGGGCC -GAGGGCCGTCCGGCGTCCCAGGCGGGGCGCCGCGGGACCGCCCTCGTGTCTGTGGCGGTGGGATCCCGCG -GCCGTGTTTTCCTGGTGGCCCGGCCGTGCCTGAGGTTTCTCCCCGAGCCGCCGCCTCTGCGGGCTCCCGG -GTGCCCTTGCCCTCGCGGTCCCCGGCCCTCGCCCGTCTGTGCCCTCTTCCCCGCCCGCCGCCCGCCGATC -CTCTTCTTCCCCCCGAGCGGCTCACCGGCTTCACGTCCGTTGGTGGCCCCGCCTGGGACCGAACCCGGCA -CCGCCTCGTGGGGCGCCGCCGCCGGCCACTGATCGGCCCGGCGTCCGCGTCCCCCGGCGCGCGCCTTGGG -GACCGGGTCGGTGGCGCCCCGCGTGGGGCCCGGTGGGCTTCCCGGAGGGTTCCGGGGGTCGGCCTGCGGC -GCGTGCGGGGGAGGAGACGGTTCCGGGGGACCGGCCGCGACTGCGGCGGCGGTGGTGGGGGCAGCCGCGG -GGATCGCCGAGGGCCGGTCGGCCGCCCCGGGTGCCGCGCGGTGCCGCCGGCGGCGGTGAGGCCCCGCGCG -TGTGTCCCGGCCGCGGTCGGCCGCGCTCGAGGGGTCCCCGTGGCGTCCCCTTCCCCGCCGGCCGCCTTTC -TCGCGCCTTCCCCGTCGCCCCGGCCTCGCCCGTGGTCTCTCGTCTTCTCCCGGCCCGCTCTTCCGAACCG -GGTCGGCGCGTCCCCCGGGTGCGCCTCGCTTCCCGGGCCTGCCGCGGCCCTTCCCCGAGGCGTCCGTCCC -GGGCGTCGGCGTCGGGGAGAGCCCGTCCTCCCCGCGTGGCGTCGCCCCGTTCGGCGCGCGCGTGCGCCCG -AGCGCGGCCCGGTGGTCCCTGCCGGACAGGCGTTCGTGCGACGTGTGGCGTGGGTCGACCTCCGCCTTGC -CGGTCGCTCGCCCTTTCCCCGGGTCGGGGGGTGGGGCCCGGGCCGGGGCCTCGGCCCCGGTCGCGGTCCC -CCGTCCCGGGCGGGGGCGGGCGCGCCGGCCGGCCTCGGTCGGCCCTCCCTTGGCCGTCGTGTGGCGTGTG -CCACCCCTGCGCCCGCGCCCGCCGGCGGGGCTCGGAGCCGGGCTTCGGCCGGGCCCCGGGCCCTCGACCG -GACCGGTGCGCGGGCGCTGCGGCCGCACGGCGCGACTGTCCCCGGGCCGGGCACCGCGGTCCGCCTCTCG -CTCGCCGCCCGGACGTCGGGGCCGCCCCGCGGGGCGGGCGGAGCGCCGTCCCCGCCTCGCCGCCGCCCGC -GGGCGCCGGCCGCGCGCGCGCGCGCGTGGCCGCCGGTCCCTCCCGGCCGCCGGGCGCGGGTCGGGCCGTC -CGCCTCCTCGCGGGCGGGCGCGACGAAGAAGCGTCGCGGGTCTGTGGCGCGGGGCCCCGGTGGTCGTGTC -GCGTGGGGGGCGGGTGGTTGGGGCGTCCGGTTCGCCGCGCCCCGCCCCGGCCCCACCGGTCCCGGCCGCC -GCCCCCGCGCCCGCTCGCTCCCTCCCGTCCGCCCGTCCGCGGCCCGTCCGTCCGTCCGTCGTCCTCCTCG -CTTGCGGGGCGCCGGGCCCGTCCTCGCGAGGCCCCCCGGCCGGCCGTCCGGCCGCGTCGGGGCCTCGCCG -CGCTCTACCTTACCTACCTGGTTGATCCTGCCAGTAGCATATGCTTGTCTCAAAGATTAAGCCATGCATG -TCTGAGTACGCACGGCCGGTACAGTGAAACTGCGAATGGCTCATTAAATCAGTTATGGTTCCTTTGGTCG -CTCGCTCCTCTCCTACTTGGATAACTGTGGTAATTCTAGAGCTAATACATGCCGACGGGCGCTGACCCCC -TTCGCGGGGGGGATGCGTGCATTTATCAGATCAAAACCAACCCGGTCAGCCCCTCTCCGGCCCCGGCCGG -GGGGCGGGCGCCGGCGGCTTTGGTGACTCTAGATAACCTCGGGCCGATCGCACGCCCCCCGTGGCGGCGA -CGACCCATTCGAACGTCTGCCCTATCAACTTTCGATGGTAGTCGCCGTGCCTACCATGGTGACCACGGGT -GACGGGGAATCAGGGTTCGATTCCGGAGAGGGAGCCTGAGAAACGGCTACCACATCCAAGGAAGGCAGCA -GGCGCGCAAATTACCCACTCCCGACCCGGGGAGGTAGTGACGAAAAATAACAATACAGGACTCTTTCGAG -GCCCTGTAATTGGAATGAGTCCACTTTAAATCCTTTAACGAGGATCCATTGGAGGGCAAGTCTGGTGCCA -GCAGCCGCGGTAATTCCAGCTCCAATAGCGTATATTAAAGTTGCTGCAGTTAAAAAGCTCGTAGTTGGAT -CTTGGGAGCGGGCGGGCGGTCCGCCGCGAGGCGAGCCACCGCCCGTCCCCGCCCCTTGCCTCTCGGCGCC -CCCTCGATGCTCTTAGCTGAGTGTCCCGCGGGGCCCGAAGCGTTTACTTTGAAAAAATTAGAGTGTTCAA -AGCAGGCCCGAGCCGCCTGGATACCGCAGCTAGGAATAATGGAATAGGACCGCGGTTCTATTTTGTTGGT -TTTCGGAACTGAGGCCATGATTAAGAGGGACGGCCGGGGGCATTCGTATTGCGCCGCTAGAGGTGAAATT -CTTGGACCGGCGCAAGACGGACCAGAGCGAAAGCATTTGCCAAGAATGTTTTCATTAATCAAGAACGAAA -GTCGGAGGTTCGAAGACGATCAGATACCGTCGTAGTTCCGACCATAAACGATGCCGACCGGCGATGCGGC -GGCGTTATTCCCATGACCCGCCGGGCAGCTTCCGGGAAACCAAAGTCTTTGGGTTCCGGGGGGAGTATGG -TTGCAAAGCTGAAACTTAAAGGAATTGACGGAAGGGCACCACCAGGAGTGGAGCCTGCGGCTTAATTTGA -CTCAACACGGGAAACCTCACCCGGCCCGGACACGGACAGGATTGACAGATTGATAGCTCTTTCTCGATTC -CGTGGGTGGTGGTGCATGGCCGTTCTTAGTTGGTGGAGCGATTTGTCTGGTTAATTCCGATAACGAACGA -GACTCTGGCATGCTAACTAGTTACGCGACCCCCGAGCGGTCGGCGTCCCCCAACTTCTTAGAGGGACAAG -TGGCGTTCAGCCACCCGAGATTGAGCAATAACAGGTCTGTGATGCCCTTAGATGTCCGGGGCTGCACGCG -CGCTACACTGACTGGCTCAGCGTGTGCCTACCCTACGCCGGCAGGCGCGGGTAACCCGTTGAACCCCATT -CGTGATGGGGATCGGGGATTGCAATTATTCCCCATGAACGAGGAATTCCCAGTAAGTGCGGGTCATAAGC -TTGCGTTGATTAAGTCCCTGCCCTTTGTACACACCGCCCGTCGCTACTACCGATTGGATGGTTTAGTGAG -GCCCTCGGATCGGCCCCGCCGGGGTCGGCCCACGGCCCTGGCGGAGCGCTGAGAAGACGGTCGAACTTGA -CTATCTAGAGGAAGTAAAAGTCGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTAACGGAGC -CCGGAGGGCGAGGCCCGCGGCGGCGCCGCCGCCGCCGCGCGCTTCCCTCCGCACACCCACCCCCCCACCG -CGACGCGGCGCGTGCGCGGGCGGGGCCCGCGTGCCCGTTCGTTCGCTCGCTCGTTCGTTCGCCGCCCGGC -CCCGCCGGCCGCGAGAGCCGGAGAACTCGGGAGGGAGACGGGGGAGAGAGAGAGAGAGAGAGAAAGAGAA -AGAAGGGCGTGTCGTTGGTGTGCGCGTGTCGTGGGGCCGGCGGGCGGCGGGGAGCGGTCCCCGGCCGCGG -CCCCGACGACGTGGGTGTCGGCGGGCGCGGGGGCGGTTCTCGGCGGCGTCGCGGCGGGTCTGGGGGGGTC -TCGGTGCCCTCCTCCCCGCCGGGGCCCGTCGTCCGGCCCCGCCGCGCCGGCTCCCCGTCTTCGGGGCCGG -CCGGATTCCCGTCGCCTCCGCCGCGCCGCTCCGCGCCGCCGGGCACGGCCCCGCTCGCTCTCCCCGGCCT -TCCCGCTAGGGCGTCTCGAGGGTCGGGGGCCGGACGCCGGTCCCCTCCCCCGCCTCCTCGTCCGCCCCCC -CGCCGTCCAGGTACCTAGCGCGTTCCGGCGCGGAGGTTTAAAGACCCCTTGGGGGGATCGCCCGTCCGCC -CGTGGGTCGGGGGCGGTGGTGGGCCCGCGGGGGAGTCCCGTCGGGAGGGGCCCGGCCCCTCCCGCGCCTC -CACCGCGGACTCCGCTCCCCGGCCGGGGCCGCGCCGCCGCCGCCGCCGCGGCGGCCGTCGGGTGGGGGCT -TTACCCGGCGGCCGTCGCGCGCCTGCCGCGCGTGTGGCGTGCGCCCCGCGCCGTGGGGGCGGGAACCCCC -GGGCGCCTGTGGGGTGGTGTCCGCGCTCGCCCCCGCGTGGGCGGCGCGCGCCTCCCCGTGGTGTGAAACC -TTCCGACCCCTCTCCGGAGTCCGGTCCCGTTTGCTGTCTCGTCTGGCCGGCCTGAGGCAACCCCCTCTCC -TCTTGGGCGGGGGGGGGGGGGACGTGCCGCGCCAGGAAGGGCCTCCTCCCGGTGCGTCGTCGGGAGCGCC -CTCGCCAAATCGACCTCGTACGACTCTTAGCGGTGGATCACTCGGCTCGTGCGTCGATGAAGAACGCAGC -TAGCTGCGAGAATTAATGTGAATTGCAGGACACATTGATCATCGACACTTCGAACGCACTTGCGGCCCCG -GGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCTTGCCGATCAATCGCCCCCGGGGGTGCCTCCGGGC -TCCTCGGGGTGCGCGGCTGGGGGTTCCCTCGCAGGGCCCGCCGGGGGCCCTCCGTCCCCCTAAGCGCAGA -CCCGGCGGCGTCCGCCCTCCTCTTGCCGCCGCGCCCGCCCCTTCCCCCTCCCCCCGCGGGCCCTGCGTGG -TCACGCGTCGGGTGGCGGGGGGGAGAGGGGGGCGCGCCCGGCTGAGAGAGACGGGGAGGGCGGCGCCGCC -GCCGCCCGCGAAGACGGAGAGGGAAAGAGAGAGCCGGCTCGGGCCGAGTTCCCGTGGCCGCCGCCTGCGG -TCCGGGTTCCTCCCTCGGGGGGCTCCCTCGCGCCGCGCGCGGCTCGGGGTTCGGGGTTCGTCGGCCCCGG -CCGGGTGGAAGGTCCCGTGCCCGTCGTCGTCGTCGTCGTCGCGCGTCGTCGGCGGTGGGGGCGTGTTGCG -TGCGGTGTGGTGGTGGGGGAGGAGGAAGGCGGGTCCGGAAGGGGAAGGGTGCCGGCGGGGAGAGAGGGTC -GGGGGAGCGCGTCCCGGTCGCCGCGGTTCGCCGCCCGCCCCCGGTGGCGGCCCGGCGTCCGGCCGACCGC -CGCTCCCGCGCCCCTCCTCCTCCCCGCCGCCCCTCCTCCGAGGCCCCGCCCGTCCTCCTCGCCCTCCCCG -CGCGTACGCGCGCCCGCCCGCCCGGCTCGCCTCGCGGCGCGTCGGCCGGGGCCGGGAGCCCGCCCCGCGG -CCCGCCCGGCCGCGCCCGTGGCCGCGGCGCCGGGGTTCGCGTGTCCCCGGCGGCGACCCGCGGGACGCCG -CGGTGTCGTCCGCCGTCGCGCGCCCGCCTCCGGCTCGCGGCCGCGCCGCGCCGCGCCGGGGCCCCGTCCC -GAGCTTCCGCGTCGGGGCGGGGCGGCTCCGCCGCCGCGTCCTCGGACCCGTCCCCCCGACCTCCGCGGGG -GAGACGGGTCGGGGCGTGCGGCGCCCGTCCCGCCCCCGGCCCGTGCCCCTCCCTCCGGTCGTCCCGCTCC -GGCGGGGCGGCGCGGGGGTGCCGCCGGCCGCGCGCTCTCTCTCCCGTCGCCTCTCCCCCTCGCCGGGCCC -GTCTCCCGACGGAGCGTCGGGCGGGCGGTCGGGCCGGCGCGATTCCGTCCGTCCGTCCGCCGAGCGGCCC -GTCCCCCTCCGAGACGCGACCTCAGATCAGACGTGGCGACCCGCTGAATTTAAGCATATTAGTCAGCGGA -GGAGAAGAAACTAACCAGGATTCCCTCAGTAACGGCGAGTGAACAGGGAAGAGCCCAGCGCCGAATCCCC -GCCCCGCGGCGGGGCGCGGGACATGTGGCGTACGGAAGACCCGCTCCCCGGCGCCGCTCGTGGGGGGCCC -AAGTCCTTCTGATCGAGGCCCAGCCCGTGGACGGTGTGAGGCCGGTAGCGGCCCCCGGCGCGCCGGGCCC -GGGTCTTCCCGGAGTCGGGTTGCTTGGGAATGCAGCCCAAAGCGGGTGGTAAACTCCATCTAAGGCTAAA -TACCGGCACGAGACCGATAGTCAACAAGTACCGTAAGGGAAAGTTGAAAAGAACTTTGAAGAGAGAGTTC -AAGAGGGCGTGAAACCGTTAAGAGGTAAACGGGTGGGGTCCGCGCAGTCCGCCCGGAGGATTCAACCCGG -CGGCGGGTCCGGCCGTGTCGGCGGCCCGGCGGATCTTTCCCGCCCCCCGTTCCTCCCGACCCCTCCACCC -GCCCTCCCTTCCCCCGCCGCCCCTCCTCCTCCTCCCCGGAGGGGGCGGGCTCCGGCGGGTGCGGGGGTGG -GCGGGCGGGGCCGGGGGTGGGGTCGGCGGGGGACCGTCCCCCGACCGGCGACCGGCCGCCGCCGGGCGCA -TTTCCACCGCGGCGGTGCGCCGCGACCGGCTCCGGGACGGCTGGGAAGGCCCGGCGGGGAAGGTGGCTCG -GGGGGCCCCGTCCGTCCGTCCGTCCGTCCTCCTCCTCCCCCGTCTCCGCCCCCCGGCCCCGCGTCCTCCC -TCGGGAGGGCGCGCGGGTCGGGGCGGCGGCGGCGGCGGCGGTGGCGGCGGCGGCGGCGGCGGCGGGACCG -AAACCCCCCCCGAGTGTTACAGCCCCCCCGGCAGCAGCACTCGCCGAATCCCGGGGCCGAGGGAGCGAGA -CCCGTCGCCGCGCTCTCCCCCCTCCCGGCGCCCACCCCCGCGGGGAATCCCCCGCGAGGGGGGTCTCCCC -CGCGGGGGCGCGCCGGCGTCTCCTCGTGGGGGGGCCGGGCCACCCCTCCCACGGCGCGACCGCTCTCCCA -CCCCTCCTCCCCGCGCCCCCGCCCCGGCGACGGGGGGGGTGCCGCGCGCGGGTCGGGGGGCGGGGCGGAC -TGTCCCCAGTGCGCCCCGGGCGGGTCGCGCCGTCGGGCCCGGGGGAGGTTCTCTCGGGGCCACGCGCGCG -TCCCCCGAAGAGGGGGACGGCGGAGCGAGCGCACGGGGTCGGCGGCGACGTCGGCTACCCACCCGACCCG -TCTTGAAACACGGACCAAGGAGTCTAACACGTGCGCGAGTCGGGGGCTCGCACGAAAGCCGCCGTGGCGC -AATGAAGGTGAAGGCCGGCGCGCTCGCCGGCCGAGGTGGGATCCCGAGGCCTCTCCAGTCCGCCGAGGGC -GCACCACCGGCCCGTCTCGCCCGCCGCGCCGGGGAGGTGGAGCACGAGCGCACGTGTTAGGACCCGAAAG -ATGGTGAACTATGCCTGGGCAGGGCGAAGCCAGAGGAAACTCTGGTGGAGGTCCGTAGCGGTCCTGACGT -GCAAATCGGTCGTCCGACCTGGGTATAGGGGCGAAAGACTAATCGAACCATCTAGTAGCTGGTTCCCTCC -GAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGACCCGACGCACCCCCGCCACGCAGTTTTATCCGGTA -AAGCGAATGATTAGAGGTCTTGGGGCCGAAACGATCTCAACCTATTCTCAAACTTTAAATGGGTAAGAAG -CCCGGCTCGCTGGCGTGGAGCCGGGCGTGGAATGCGAGTGCCTAGTGGGCCACTTTTGGTAAGCAGAACT -GGCGCTGCGGGATGAACCGAACGCCGGGTTAAGGCGCCCGATGCCGACGCTCATCAGACCCCAGAAAAGG -TGTTGGTTGATATAGACAGCAGGACGGTGGCCATGGAAGTCGGAATCCGCTAAGGAGTGTGTAACAACTC -ACCTGCCGAATCAACTAGCCCTGAAAATGGATGGCGCTGGAGCGTCGGGCCCATACCCGGCCGTCGCCGG -CAGTCGAGAGTGGACGGGAGCGGCGGGGGCGGCGCGCGCGCGCGCGCGTGTGGTGTGCGTCGGAGGGCGG -CGGCGGCGGCGGCGGCGGGGGTGTGGGGTCCTTCCCCCGCCCCCCCCCCCACGCCTCCTCCCCTCCTCCC -GCCCACGCCCCGCTCCCCGCCCCCGGAGCCCCGCGGACGCTACGCCGCGACGAGTAGGAGGGCCGCTGCG -GTGAGCCTTGAAGCCTAGGGCGCGGGCCCGGGTGGAGCCGCCGCAGGTGCAGATCTTGGTGGTAGTAGCA -AATATTCAAACGAGAACTTTGAAGGCCGAAGTGGAGAAGGGTTCCATGTGAACAGCAGTTGAACATGGGT -CAGTCGGTCCTGAGAGATGGGCGAGCGCCGTTCCGAAGGGACGGGCGATGGCCTCCGTTGCCCTCGGCCG -ATCGAAAGGGAGTCGGGTTCAGATCCCCGAATCCGGAGTGGCGGAGATGGGCGCCGCGAGGCGTCCAGTG -CGGTAACGCGACCGATCCCGGAGAAGCCGGCGGGAGCCCCGGGGAGAGTTCTCTTTTCTTTGTGAAGGGC -AGGGCGCCCTGGAATGGGTTCGCCCCGAGAGAGGGGCCCGTGCCTTGGAAAGCGTCGCGGTTCCGGCGGC -GTCCGGTGAGCTCTCGCTGGCCCTTGAAAATCCGGGGGAGAGGGTGTAAATCTCGCGCCGGGCCGTACCC -ATATCCGCAGCAGGTCTCCAAGGTGAACAGCCTCTGGCATGTTGGAACAATGTAGGTAAGGGAAGTCGGC -AAGCCGGATCCGTAACTTCGGGATAAGGATTGGCTCTAAGGGCTGGGTCGGTCGGGCTGGGGCGCGAAGC -GGGGCTGGGCGCGCGCCGCGGCTGGACGAGGCGCCGCCGCCCCCCCCACGCCCGGGGCACCCCCCTCGCG -GCCCTCCCCCGCCCCACCCCGCGCGCGCCGCTCGCTCCCTCCCCGCCCCGCGCCCTCTCTCTCTCTCTCT -CCCCCGCTCCCCGTCCTCCCCCCTCCCCGGGGGAGCGCCGCGTGGGGGCGGCGGCGGGGGGAGAAGGGTC -GGGGCGGCAGGGGCCGGCGGCGGCCCGCCGCGGGGCCCCGGCGGCGGGGGCACGGTCCCCCGCGAGGGGG -GCCCGGGCACCCGGGGGGCCGGCGGCGGCGGCGACTCTGGACGCGAGCCGGGCCCTTCCCGTGGATCGCC -CCAGCTGCGGCGGGCGTCGCGGCCGCCCCCGGGGAGCCCGGCGGGCGCCGGCGCGCCCCCCCCCCCACCC -CACGTCTCGTCGCGCGCGCGTCCGCTGGGGGCGGGGAGCGGTCGGGCGGCGGCGGTCGGCGGGCGGCGGG -GCGGGGCGGTTCGTCCCCCCGCCCTACCCCCCCGGCCCCGTCCGCCCCCCGTTCCCCCCTCCTCCTCGGC -GCGCGGCGGCGGCGGCGGGCGGCGGAGGGGCCGCGGGCCGGTCCCCCCCGCCGGGTCCGCCCCCGGGGCC -GCGGTTCCGCGCGGCGCCTCGCCTCGGCCGGCGCCTAGCAGCCGACTTAGAACTGGTGCGGACCAGGGGA -ATCCGACTGTTTAATTAAAACAAAGCATCGCGAAGGCCCGCGGCGGGTGTTGACGCGATGTGATTTCTGC -CCAGTGCTCTGAATGTCAAAGTGAAGAAATTCAATGAAGCGCGGGTAAACGGCGGGAGTAACTATGACTC -TCTTAAGGTAGCCAAATGCCTCGTCATCTAATTAGTGACGCGCATGAATGGATGAACGAGATTCCCACTG -TCCCTACCTACTATCCAGCGAAACCACAGCCAAGGGAACGGGCTTGGCGGAATCAGCGGGGAAAGAAGAC -CCTGTTGAGCTTGACTCTAGTCTGGCACGGTGAAGAGACATGAGAGGTGTAGAATAAGTGGGAGGCCCCC -GGCGCCCCCCCGGTGTCCCCGCGAGGGGCCCGGGGCGGGGTCCGCCGGCCCTGCGGGCCGCCGGTGAAAT -ACCACTACTCTGATCGTTTTTTCACTGACCCGGTGAGGCGGGGGGGCGAGCCCCGAGGGGCTCTCGCTTC -TGGCGCCAAGCGCCCGGCCGCGCGCCGGCCGGGCGCGACCCGCTCCGGGGACAGTGCCAGGTGGGGAGTT -TGACTGGGGCGGTACACCTGTCAAACGGTAACGCAGGTGTCCTAAGGCGAGCTCAGGGAGGACAGAAACC -TCCCGTGGAGCAGAAGGGCAAAAGCTCGCTTGATCTTGATTTTCAGTACGAATACAGACCGTGAAAGCGG -GGCCTCACGATCCTTCTGACCTTTTGGGTTTTAAGCAGGAGGTGTCAGAAAAGTTACCACAGGGATAACT -GGCTTGTGGCGGCCAAGCGTTCATAGCGACGTCGCTTTTTGATCCTTCGATGTCGGCTCTTCCTATCATT -GTGAAGCAGAATTCACCAAGCGTTGGATTGTTCACCCACTAATAGGGAACGTGAGCTGGGTTTAGACCGT -CGTGAGACAGGTTAGTTTTACCCTACTGATGATGTGTTGTTGCCATGGTAATCCTGCTCAGTACGAGAGG -AACCGCAGGTTCAGACATTTGGTGTATGTGCTTGGCTGAGGAGCCAATGGGGCGAAGCTACCATCTGTGG -GATTATGACTGAACGCCTCTAAGTCAGAATCCCGCCCAGGCGGAACGATACGGCAGCGCCGCGGAGCCTC -GGTTGGCCTCGGATAGCCGGTCCCCCGCCTGTCCCCGCCGGCGGGCCGCCCCCCCCTCCACGCGCCCCGC -GCGCGCGGGAGGGCGCGTGCCCCGCCGCGCGCCGGGACCGGGGTCCGGTGCGGAGTGCCCTTCGTCCTGG -GAAACGGGGCGCGGCTGGAAAGGCGGCCGCCCCCTCGCCCGTCACGCACCGCACGTTCGTGGGGAACCTG -GCGCTAAACCATTCGTAGACGACCTGCTTCTGGGTCGGGGTTTCGTACGTAGCAGAGCAGCTCCCTCGCT -GCGATCTATTGAAAGTCAGCCCTCGACACAAGGGTTTGTCCGCGCGCGCGCGCGCGCGTGCGTGCGGGGG -GCCCGGCGGGGCGTGCGCGTCCGGCGCCGTCCGTCCTTCCGTTCGTCTTCCTCCCTCCCGGCCTCTCCCG -CCGACCGCGGGCGTGGTGGTGGGGGTGGGGGGGAGGGCGCGCGACCCCGGTCGGCGCGCCCCGCTTCTTC -GGTTCCCGCCTCCTCCCCGTTCACCGCCGGGGCGGCTCGTCCGCTCCGGGCCGGGACGGGGTCCGGGGAG -CGTGGTTTGGGAGCCGCGGAGGCGGCCGCGCCGAGCCGGGCCCGTGGCCCGCCGGTCCCCGTCCCGGGGG -TTGGCCGCGCGGGCCCCGGTGGGGCGGCCACCCGGGGTCCCGGCCCTCGCG ->RNA5S1 -GTCTACGGCCATACCACCCTGAACGCGCCCGATCTCGTCTGATCTCGGAAGCTAAGCAGGGTCGGGCCTG -GTTAGTACTTGGATGGGAGACCGCCTGGGAATACCGGGTGCTGTAGGCTTT diff --git a/process_data/.gitignore b/process_data/.gitignore deleted file mode 100644 index 58c3d7f..0000000 --- a/process_data/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -.* -Log.out -results -logs -dag.png -nohup.out -samples diff --git a/process_data/create_snakemake_flowchart.sh b/process_data/create_snakemake_flowchart.sh deleted file mode 100755 index ce71a9a..0000000 --- a/process_data/create_snakemake_flowchart.sh +++ /dev/null @@ -1 +0,0 @@ -snakemake --dag -np | dot -Tpng > dag.png diff --git a/process_data/run_snakefile.sh b/process_data/run_snakefile.sh deleted file mode 100755 index ceec8a8..0000000 --- a/process_data/run_snakefile.sh +++ /dev/null @@ -1,13 +0,0 @@ -# set -e - -mkdir -p logs/cluster_log -mkdir -p logs/local_log - -snakemake \ ---cluster-config cluster.json \ ---cluster "sbatch --cpus-per-task={cluster.threads} --mem={cluster.mem} --qos={cluster.queue} --time={cluster.time} --job-name={cluster.name} -o {cluster.out} -p scicore" \ ---cores 256 \ --p \ ---rerun-incomplete \ ---use-singularity \ ---singularity-args "--bind ${PWD}" diff --git a/prepare_annotation/scripts/fg_extract_transcripts.py b/scripts/fg_extract_transcripts.py similarity index 100% rename from prepare_annotation/scripts/fg_extract_transcripts.py rename to scripts/fg_extract_transcripts.py diff --git a/process_data/Snakefile b/snakemake/Snakefile similarity index 100% rename from process_data/Snakefile rename to snakemake/Snakefile diff --git a/process_data/cluster.json b/snakemake/cluster.json similarity index 100% rename from process_data/cluster.json rename to snakemake/cluster.json diff --git a/process_data/config.yaml b/snakemake/config.yaml similarity index 100% rename from process_data/config.yaml rename to snakemake/config.yaml diff --git a/prepare_annotation/create_snakemake_flowchart.sh b/snakemake/create_snakemake_flowchart.sh similarity index 100% rename from prepare_annotation/create_snakemake_flowchart.sh rename to snakemake/create_snakemake_flowchart.sh diff --git a/process_data/paired_end.snakemake b/snakemake/paired_end.snakemake similarity index 100% rename from process_data/paired_end.snakemake rename to snakemake/paired_end.snakemake diff --git a/process_data/preprocessing.snakefile b/snakemake/preprocessing.snakefile similarity index 100% rename from process_data/preprocessing.snakefile rename to snakemake/preprocessing.snakefile diff --git a/prepare_annotation/run_snakefile.sh b/snakemake/run_snakefile.sh similarity index 100% rename from prepare_annotation/run_snakefile.sh rename to snakemake/run_snakefile.sh diff --git a/process_data/single_end.snakefile b/snakemake/single_end.snakefile similarity index 100% rename from process_data/single_end.snakefile rename to snakemake/single_end.snakefile -- GitLab