diff --git a/src/poly_a.py b/src/poly_a.py index e5dd9b6ebb9ff05d347620042cbe0b46225858b0..b04574470c1bb290fd35f22fb83949563f390e3a 100644 --- a/src/poly_a.py +++ b/src/poly_a.py @@ -70,23 +70,32 @@ def generate_poly_a( tail_bases: List[str] = choices(bases, weights=norm_weights, k=length) return "".join(tail_bases) + def main(): + """Append poly A tails to fasta sequences and write to file + + This function iterates over `input` fasta sequences from a specified file, + appends tails of given `length` and composition specified by `weights`, + and writes them to file specified as `output`. + """ + parser = argparse.ArgumentParser(description='Process some integers.') parser.add_argument('-i', '--input', required=True, - help='Fasta file to add tails to') + help='Fasta file to add tails to') parser.add_argument('-o', '--output', default="out.fa", - help='Name of the output Fasta file (default = out.fa)') + help='Name of the output Fasta file (default = out.fa)') parser.add_argument('--length', default=100, type=int, - help='Length of the desired tail (max value = 200)') + help='Length of the desired tail (max value = 200)') parser.add_argument('--weights', default=(0.914, 0.028, 0.025, 0.033), type=float, nargs='+', - help="Tuple of relative A, C, G and U frequencies in the tail") + help="Tuple of relative A, C, G and U frequencies in the tail") args = parser.parse_args() - new_records = [] + new_records = [] for fasta in SeqIO.parse(open(args.input),'fasta'): new_seq = fasta.seq + Seq(generate_poly_a(args.length, args.weights)) new_records.append(SeqRecord(new_seq, id=fasta.id, description=fasta.description)) SeqIO.write(new_records, args.output, "fasta") + if __name__ == '__main__': main()