diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..5f220b6e8a975e88d5885f47558ce460c5ae5f3f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +##### BASE ##### +FROM python:3.10-slim-buster + +##### VARIABLES ##### +WORKDIR /Users/terminal-fragment-selector + +COPY requirements.txt /Users/terminal-fragment-selector/requirements.txt +COPY requirements_dev.txt /Users/terminal-fragment-selector/requirements_dev.txt + +##### INSTALL ##### +RUN pip install -r /Users/terminal-fragment-selector/requirements.txt +RUN pip install -r /Users/terminal-fragment-selector/requirements_dev.txt + + + + diff --git a/frag_selec.nf b/frag_selec.nf new file mode 100644 index 0000000000000000000000000000000000000000..1fc9bdc7290f3a18d3aa28d66a6f82a962110b5a --- /dev/null +++ b/frag_selec.nf @@ -0,0 +1,126 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl=2 + +```c +/* + * Define the input parameters"""Takes as input FASTA file + of cDNA sequences, a CSV/TSV with sequence + counts, and mean and std. dev. of fragment + lengths and 4 nucleotide probabilities + for the cuts. Outputs most terminal + fragment (within desired length range) + for each sequence.""" + */ +params.fasta_file = "$projectDir/tests/test_files/test,fasta" +params.counts_file = "$projectDir/tests/test_files/test.csv" +params.sep = "$projectDir/data/yeast/sep/sep.csv" +params.outdir = "results" + + +/* Log some information for the user */ + +log.info """\ + R N A S E Q - N F P I P E L I N E + =================================== + fasta_file : ${params.fasta_file} + counts_file : ${params.counts_file} + outdir : ${params.outdir} + """ + .stripIndent() + +/* + * Define the `file_validation` process: + * Validate input files exist and are the correct format + */ + +process file_validation { + + input: + path fasta_file + path counts_file + path sep + + output: + tuple: fasta dict and sequence for counts file + + script: + """ + salmon index --threads $task.cpus -t $transcriptome -i index + """ +} + + +/* + * Define the get_cut_number process: + * Get the number of cuts for a particular sequence + */ + +process get_cut_number { + + tag "get_cut_numbern on $n_cuts" + publishDir "${params.outdir}/get_cut_number", mode:'copy' + + input: + path index + tuple val(n_cuts), path(seq_len, mean) + + output: + path(n_cuts) + + script: + """ + + """ +} + + +/* + * Define the fragmentation process: + * Fragment cDNA sequences and select terminal fragment + */ + +process fragmentation { + + tag "fragmentation on $fasta, seq_counts, nuc_probs, mu_length, std" + + input: + dict val(fasta), pd.DataFrame(seq_counts), dict(nuc_probs),int(mu_length),int(std) + + output: + path("term_frags") + + script: + """ + mkdir fastqc_${sample_id}_logs + + """ +} + + + +/* Start the job: + * initialize variables + */ + +Channel + .fromFilePairs( params.reads, checkIfExists:true ) + .set { read_pairs_ch } + + +/* The "main" function: + * Use CLI arguments to fragment sequences and output text file with selected terminal fragments + */ + +workflow { + file_validation_ch=file_validation(params.fasta_file, params.counts_file, params.sep) + get_cut_number_ch = get_cut_number(seq_len, mean) + framentation_ch = fregmentation(fasta, seq_counts, nuc_probs, mu_length, std) } + + +/* Book keeping upon workflow completion */ +workflow.onComplete { + log.info (workflow.success ? "\nDone! Open the following report in your browser --> $params.outdir/multiqc/multiqc_report.html\n" : "Oops .. something went wrong") + ) +} +``` diff --git a/requirements.txt b/requirements.txt index 70de6ebc12f5dc4d32809ef34959efe77fbf47ea..8a9674c77d5ed060f30942792d2e3f14540bd1a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ argparse -biopython >= 1.78 +biopython numpy >= 1.23.3 pandas >= 1.4.4 \ No newline at end of file