Skip to content
Snippets Groups Projects
Commit 061f425f authored by Boris Jurič's avatar Boris Jurič
Browse files

Make src/poly_a.py runnable, added fasta processing. Initial transcript generation workflow.

parent 557be05b
No related branches found
No related tags found
No related merge requests found
Pipeline #14073 failed
#!/usr/bin/env nextflow
params.fasta = "/inputs/ref_genome.fa"
params.gtf = "/inputs/ref_annotation.gtf"
params.outdir = "/tests"
params.tail_length = "100"
params.weights = "1.0 0.0 0.0 0.0"
project_dir = projectDir
process getTranscriptGTF {
input:
path gtf from params.gtf
output:
file "transcripts.gtf" into transcripts_gtf
"""
awk '\$3 == "transcript"' ${gtf} > transcripts.gtf
"""
}
process getTranscripts {
input:
path fasta from params.fasta
file gtf from transcripts_gtf
output:
file "transcripts.fa" into transcripts_fa
"""
bedtools getfasta -fi ${fasta} -bed ${gtf} -fo transcripts.fa
"""
}
process addPolyA {
publishDir params.outdir, mode: 'copy', pattern: 'transcripts_with_polyA.fa'
input:
file transcripts from transcripts_fa
output:
file "transcripts_with_polyA.fa" into transcripts_polyA
script:
"""
python3 $project_dir/poly_a.py -i ${transcripts} -o transcripts_with_polyA.fa --length ${params.tail_length} --weights ${params.weights}
"""
}
"""Generate a poly(A) tail."""
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from random import choices
from typing import (List, Tuple)
import argparse
def generate_poly_a(
......@@ -65,3 +69,25 @@ def generate_poly_a(
norm_weights: List[float] = [freq/s for freq in weights]
tail_bases: List[str] = choices(bases, weights=norm_weights, k=length)
return "".join(tail_bases)
def main():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('-i', '--input', required=True,
help='Fasta file to add tails to')
parser.add_argument('-o', '--output', default="out.fa",
help='Name of the output Fasta file (default = out.fa)')
parser.add_argument('--length', default=100, type=int,
help='Length of the desired tail (max value = 200)')
parser.add_argument('--weights', default=(0.914, 0.028, 0.025, 0.033), type=float, nargs='+',
help="Tuple of relative A, C, G and U frequencies in the tail")
args = parser.parse_args()
new_records = []
for fasta in SeqIO.parse(open(args.input),'fasta'):
new_seq = fasta.seq + Seq(generate_poly_a(args.length, args.weights))
new_records.append(SeqRecord(new_seq, id=fasta.id, description=fasta.description))
print(new_records[0].seq)
SeqIO.write(new_records, args.output, "fasta")
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment