Skip to content
Snippets Groups Projects
Commit 10c1f29f authored by Gina's avatar Gina
Browse files

adding class

parent 96e83ffc
No related branches found
No related tags found
1 merge request!25Class
import pandas as pd
def exon_extraction_from_gtf(gtf_filename,output_filename):
gtf = pd.read_table(gtf_filename,skiprows=5,header=None)
exons = gtf[gtf[2]=="exon"]
features = list(exons[8])
gtf = pd.read_table("Homo_sapiens.GRCh38.107.gtf.gz",skiprows=5,header=None)
transcript_id_list = []
gene_id_list = []
for x in range(len(features)):
newlist = features[x].split(";")
transcript_id_list.append(str(newlist[2])[16:-1])
gene_id_list.append(str(newlist[0])[9:-1])
bed = {"chr":exons[0],"start":exons[3],"end":exons[4],"transcript_id":transcript_id_list,"score":exons[5],"strand":exons[6],"gene_id":gene_id_list}
bed = pd.DataFrame(bed)
bed.to_csv(output_filename,sep="\t",index=False)
exons = gtf[gtf[2]=="exon"]
feat = list(exons[8])
superlist = []
idlist = []
for x in range(len(feat)):
newlist = feat[x].split(";")
superlist.append(str(newlist[2])[16:-1])
idlist.append(str(newlist[0])[9:-1])
<<<<<<< HEAD
bed = {"chr":exons[0],"start":exons[3],"end":exons[4],"transcript_id":superlist,"score":exons[5],"strand":exons[6],"gene_id":idlist}
class bed:
......@@ -52,7 +47,3 @@ class bed:
bed = pd.DataFrame(bed)
bed.to_csv("bed_file.bed",sep="\t",index=False)
bed[(bed["gene_id"]=="ENSG00000160072")|(bed["gene_id"]== "ENSG00000142611")|(bed["gene_id"]=="ENSG00000232596")].to_csv("test.bed",sep="\t",index=False,header=None)
=======
# This line is used to generate a test file from some of the manually selected gene_ids for now (Plans to make it choose randonly in future)
bed[(bed["gene_id"]=="ENSG00000160072")|(bed["gene_id"]== "ENSG00000142611")|(bed["gene_id"]=="ENSG00000232596")].to_csv("test.bed",sep="\t",index=False,header=None)
>>>>>>> f35b4d0acdb49bce8ccda3f322fb4b5486737b75
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment