minor changes based on pylint

3d754385 · Samuel Mondal · 6f5e54c4 · 3d754385 · 3d754385 · 3d754385
Commit 3d754385 authored Dec 13, 2022 by Samuel Mondal
--- a/gtf_processing/pre_bedtools.py
+++ b/gtf_processing/pre_bedtools.py
-import pandas as pd
-import argparse
-from gtfparse import read_gtf
-
 """This script defines a BED from exon annotation in a GTF, to get exon coordinates for use in bedtools. It also ensures that the concatenation happens in the correct order, regardless of the strandedness of the transcript.

    Args:
@@ -11,6 +7,10 @@ from gtfparse import read_gtf
        BED file with the format: chr, start, end, transcript_id, score, strand, gene_id
 """

+import argparse
+import pandas as pd
+from gtfparse import read_gtf
+
 parser = argparse.ArgumentParser(
    prog = 'pre_bedtools',
    description = 'extracts ordered information from gtf file and for transcripts in the negative strand, flips the order in which exons are ordered.')
@@ -31,4 +31,3 @@ gtf_df_pos = gtf_exons[gtf_exons["strand"] == "+"]
 gtf_df_pos = gtf_df_pos.sort_values(['transcript_id','start'],ascending=True).groupby('transcript_id').head(len(gtf_df_pos. transcript_id))

 pd.concat([gtf_df_pos, gtf_df_neg]).to_csv(args.output_bed_file,sep="\t",index=False) #gtf_df_pos and gtf_df_neg must be dataframes
-
--- a/sequence_extractor/cli.py
+++ b/sequence_extractor/cli.py
+""" command line script to be run on output fasta file from bedtools getfasta """
 import argparse
 import logging
 from exon_concatenation import exon_concatenation
@@ -14,10 +15,18 @@ parser.add_argument('--output_file_name',
 args = parser.parse_args()

 def main():
+    """Runs on the output from bedtools and concatenates the exons together and adds a polyA tail and outputs a fasta file.
+
+    Args:
+        None: this will run on its own by taking the information from argparse
+
+    Returns:
+        A fasta file with a single entry for each transcript ID with polyA tail being added onto the sequence at 3'end
+    """
    LOG.info("sequence_extractor begins")
    fasta_list = exon_concatenation(args.input_fasta_file)
    final_list = poly_a_addition_to_fasta_list(fasta_list)
-    with open(args.output_file_name, 'w') as fasta_out:
+    with open(args.output_file_name, 'w', encoding="utf-8") as fasta_out:
        fasta_out.write('\n'.join('%s\n%s' % x for x in final_list))
    LOG.info("sequence_extractor ends")


--- a/sequence_extractor/exon_concatenation.py
+++ b/sequence_extractor/exon_concatenation.py
+"""Script containing the function to concatenate exons and output the results in a list of tuples"""
 def exon_concatenation(
-	post_bedtools_fasta: str
+    post_bedtools_fasta: str,
 ) -> list:
    """Concatenate all sequences starting with identical transcripit ID and outputs it as a list with sequence header (Transcript ID) and concatenated sequences as tuples.

@@ -9,10 +10,10 @@ def exon_concatenation(
    Returns:
        A list containing transcript ID and concatenated exons in tuples.
    """
-    with open(post_bedtools_fasta,'r') as fa:
+    with open(post_bedtools_fasta,'r', encoding="utf-8") as fasta:
        annotation = []
        fasta_format_list = []
-        for line1,line2 in zip(fa,fa):
+        for line1,line2 in zip(fasta,fasta):
            if len(annotation) == 0:
                annotation.append(line1[0:16])
                read = line2[:-1]

--- a/sequence_extractor/poly_a.py
+++ b/sequence_extractor/poly_a.py
+""" This script contains two functions and the first function is called by the second function and used to add poly A tail to the concatenated exon"""
 import numpy as np
 # To do: Taking probabilities of nucleotides from user and raising error if sum != 1
 def poly_a_generator(
@@ -11,9 +12,9 @@ def poly_a_generator(
 	Returns:
 		RNA with polyA tail added to its 3' end.
 	"""
-	listA = ['A','T','G','C']
-	polyA = ''.join(np.random.choice(listA,250,p=[0.914,0.028,0.025,0.033]))
-	return (exon+polyA)
+    list_of_nucleotides = ['A','T','G','C']
+    poly_a_string = ''.join(np.random.choice(list_of_nucleotides,250,p=[0.914,0.028,0.025,0.033]))
+    return exon+poly_a_string

 def poly_a_addition_to_fasta_list(
 	fasta_list: list,