diff --git a/sequence_extractor/pre_bedtools.py b/sequence_extractor/pre_bedtools.py index 13433b47c70e8a9215ebc1e4fc65952f223dc531..0f3a48d29f2ef171d2bcd544962cb27c25ea5970 100755 --- a/sequence_extractor/pre_bedtools.py +++ b/sequence_extractor/pre_bedtools.py @@ -14,6 +14,36 @@ for x in range(len(feat)): bed = {"chr":exons[0],"start":exons[3],"end":exons[4],"transcript_id":superlist,"score":exons[5],"strand":exons[6],"gene_id":idlist} +class bed: + def__init__(self, exons, chr, start, end, transcript_id, score, strand, gene_id): + self.exons = exons + self.chr = exons[0] + self.start = exons[3] + self.end = exons[4] + self.transcript_id = superlist + self.score = exons[5] + self.strand = exons[6] + self.gene_id = idList + +"""Creates BED from GTF for bedtools. + + This class defines a BED from exon annotation from a GTF, for use in bedtools to get sequences with transcript ID as header. + Parameters + ---------- + arg1 : GTF file. + + Returns + ------- + Class + A class which defines columns in standard BED format. + + + + Raises + ------ + TypeError + ValueError: Not all columns found in GTF. + """ bed = pd.DataFrame(bed) bed.to_csv("bed_file.bed",sep="\t",index=False) bed[(bed["gene_id"]=="ENSG00000160072")|(bed["gene_id"]== "ENSG00000142611")|(bed["gene_id"]=="ENSG00000232596")].to_csv("test.bed",sep="\t",index=False,header=None)