diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..14c4f123ed3e8ce2c38692cb7fbea30d09590c5b --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,481 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "96ccbf23-4a46-4bae-bef2-fdf4c2466ad3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "3157488c-08cf-4f10-95f6-745613382000", + "metadata": {}, + "outputs": [], + "source": [ + "from cdna.cdna import cDNA_Gen" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6f5bc24-e454-49ad-b1e3-1f5b63c66ab2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ceb225a6-3a63-49e7-a0d3-a92256012261", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "INFO:root:Extracted GTF attributes: ['Accessibility_Energy', 'Hybridization_Energy', 'Interaction_Energy', 'Number_of_binding_sites', 'Binding_Probability']\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[SeqRecord(seq=Seq('ATCCATAAAAAAAAA'), id='Transcript_1', name='Transcript copy number: 8.0', description='', dbxrefs=[]), SeqRecord(seq=Seq('CATCTCAAAAAGTCT'), id='Transcript_1', name='Transcript copy number: 4.0', description='', dbxrefs=[]), SeqRecord(seq=Seq('AAAAAAAAAAAAAAA'), id='Transcript_2', name='Transcript copy number: 11.0', description='', dbxrefs=[])]\n" + ] + } + ], + "source": [ + "test_path = \"/Users/ericboittier/Documents/github/cdna-generator/test_files/\"\n", + "gtf = test_path+\"Example_GTF_Input.gtf\"\n", + "cpn = test_path+\"copy_number_input.csv\"\n", + "fasta = test_path+\"yeast_example.fa\"\n", + "\n", + "G = cDNA_Gen(fasta, gtf, cpn, output_fasta=\"test_files/cDNA.fasta\", output_csv=\"test_files/cDNA.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "24976988-4a28-4932-8f53-d8ac7bb018f8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>index</th>\n", + " <th>ID of transcript</th>\n", + " <th>ID of parent transcript</th>\n", + " <th>Transcript copy number</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>0</td>\n", + " <td>1</td>\n", + " <td>1</td>\n", + " <td>12</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>1</td>\n", + " <td>2</td>\n", + " <td>1</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>2</td>\n", + " <td>3</td>\n", + " <td>2</td>\n", + " <td>33</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>3</td>\n", + " <td>4</td>\n", + " <td>3</td>\n", + " <td>11</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>4</td>\n", + " <td>5</td>\n", + " <td>4</td>\n", + " <td>55</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " index ID of transcript ID of parent transcript Transcript copy number\n", + "0 0 1 1 12\n", + "1 1 2 1 11\n", + "2 2 3 2 33\n", + "3 3 4 3 11\n", + "4 4 5 4 55" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G.df_input_CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f7fdfef3-58b5-45c5-bd0f-e215a3b13636", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>seqname</th>\n", + " <th>source</th>\n", + " <th>feature</th>\n", + " <th>start</th>\n", + " <th>end</th>\n", + " <th>score</th>\n", + " <th>strand</th>\n", + " <th>frame</th>\n", + " <th>Accessibility_Energy</th>\n", + " <th>Hybridization_Energy</th>\n", + " <th>Interaction_Energy</th>\n", + " <th>Number_of_binding_sites</th>\n", + " <th>Binding_Probability</th>\n", + " <th>Normalized_Binding_Probability</th>\n", + " <th>Transcript_Copy_Number</th>\n", + " <th>priming_site</th>\n", + " <th>compliment</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Transcript_1</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>10</td>\n", + " <td>25</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.49</td>\n", + " <td>-9.76</td>\n", + " <td>-8.74</td>\n", + " <td>2</td>\n", + " <td>0.12</td>\n", + " <td>0.705882</td>\n", + " <td>8.0</td>\n", + " <td>(T, T, T, T, T, T, T, T, T, A, T, G, G, A, T)</td>\n", + " <td>ATCCATAAAAAAAAA</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Transcript_1</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>640</td>\n", + " <td>655</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.71</td>\n", + " <td>-9.12</td>\n", + " <td>-8.34</td>\n", + " <td>2</td>\n", + " <td>0.05</td>\n", + " <td>0.294118</td>\n", + " <td>4.0</td>\n", + " <td>(A, G, A, C, T, T, T, T, T, G, A, G, A, T, G)</td>\n", + " <td>CATCTCAAAAAGTCT</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Transcript_2</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>3</td>\n", + " <td>18</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.21</td>\n", + " <td>-5.12</td>\n", + " <td>-2.34</td>\n", + " <td>1</td>\n", + " <td>0.15</td>\n", + " <td>1.000000</td>\n", + " <td>11.0</td>\n", + " <td>(T, T, T, T, T, T, T, T, T, T, T, T, T, T, T)</td>\n", + " <td>AAAAAAAAAAAAAAA</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Transcript_3</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.21</td>\n", + " <td>-5.12</td>\n", + " <td>-2.34</td>\n", + " <td>1</td>\n", + " <td>0.25</td>\n", + " <td>1.000000</td>\n", + " <td>33.0</td>\n", + " <td>None</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Transcript_4</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.21</td>\n", + " <td>-5.12</td>\n", + " <td>-2.34</td>\n", + " <td>1</td>\n", + " <td>0.15</td>\n", + " <td>1.000000</td>\n", + " <td>11.0</td>\n", + " <td>None</td>\n", + " <td>None</td>\n", + " </tr>\n", + " <tr>\n", + " <th>5</th>\n", + " <td>Transcript_5</td>\n", + " <td>RIBlast</td>\n", + " <td>Priming_site</td>\n", + " <td>5</td>\n", + " <td>35</td>\n", + " <td>NaN</td>\n", + " <td>+</td>\n", + " <td>0</td>\n", + " <td>1.21</td>\n", + " <td>-5.12</td>\n", + " <td>-2.34</td>\n", + " <td>1</td>\n", + " <td>0.15</td>\n", + " <td>1.000000</td>\n", + " <td>55.0</td>\n", + " <td>None</td>\n", + " <td>None</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " seqname source feature start end score strand frame \\\n", + "0 Transcript_1 RIBlast Priming_site 10 25 NaN + 0 \n", + "1 Transcript_1 RIBlast Priming_site 640 655 NaN + 0 \n", + "2 Transcript_2 RIBlast Priming_site 3 18 NaN + 0 \n", + "3 Transcript_3 RIBlast Priming_site 5 35 NaN + 0 \n", + "4 Transcript_4 RIBlast Priming_site 5 35 NaN + 0 \n", + "5 Transcript_5 RIBlast Priming_site 5 35 NaN + 0 \n", + "\n", + " Accessibility_Energy Hybridization_Energy Interaction_Energy \\\n", + "0 1.49 -9.76 -8.74 \n", + "1 1.71 -9.12 -8.34 \n", + "2 1.21 -5.12 -2.34 \n", + "3 1.21 -5.12 -2.34 \n", + "4 1.21 -5.12 -2.34 \n", + "5 1.21 -5.12 -2.34 \n", + "\n", + " Number_of_binding_sites Binding_Probability \\\n", + "0 2 0.12 \n", + "1 2 0.05 \n", + "2 1 0.15 \n", + "3 1 0.25 \n", + "4 1 0.15 \n", + "5 1 0.15 \n", + "\n", + " Normalized_Binding_Probability Transcript_Copy_Number \\\n", + "0 0.705882 8.0 \n", + "1 0.294118 4.0 \n", + "2 1.000000 11.0 \n", + "3 1.000000 33.0 \n", + "4 1.000000 11.0 \n", + "5 1.000000 55.0 \n", + "\n", + " priming_site compliment \n", + "0 (T, T, T, T, T, T, T, T, T, A, T, G, G, A, T) ATCCATAAAAAAAAA \n", + "1 (A, G, A, C, T, T, T, T, T, G, A, G, A, T, G) CATCTCAAAAAGTCT \n", + "2 (T, T, T, T, T, T, T, T, T, T, T, T, T, T, T) AAAAAAAAAAAAAAA \n", + "3 None None \n", + "4 None None \n", + "5 None None " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "G.df_input_GTF" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e7fe7be-d134-4c1e-8eea-55273c79e39d", + "metadata": {}, + "outputs": [], + "source": [ + "G.add_sequences()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2eb8f224-0292-4bb0-9e27-ef3bc3676f1d", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'G' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mG\u001b[49m\u001b[38;5;241m.\u001b[39mdf_input_GTF\n", + "\u001b[0;31mNameError\u001b[0m: name 'G' is not defined" + ] + } + ], + "source": [ + "G.df_input_GTF" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4a482960-0e05-4355-b91c-fc7f51c138c2", + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'G' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [4]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m G\u001b[38;5;241m.\u001b[39mdf_input_GTF[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mG\u001b[49m\u001b[38;5;241m.\u001b[39mdf_input_GTF\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m row: foo(row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseqname\u001b[39m\u001b[38;5;124m\"\u001b[39m], \n\u001b[1;32m 3\u001b[0m row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart\u001b[39m\u001b[38;5;124m\"\u001b[39m], \n\u001b[1;32m 4\u001b[0m row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mend\u001b[39m\u001b[38;5;124m\"\u001b[39m]), \n\u001b[1;32m 5\u001b[0m axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 6\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'G' is not defined" + ] + } + ], + "source": [ + "G.df_input_GTF[\"A\"] = G.df_input_GTF.apply(\n", + " lambda row: foo(row[\"seqname\"], \n", + " row[\"start\"], \n", + " row[\"end\"]), \n", + " axis=1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "304fd031-524a-4fc3-9322-9b3dd55268fb", + "metadata": {}, + "outputs": [], + "source": [ + "# G.df_input_GTF[\"A\"] = G.df_input_GTF.apply(lambda row: foo(row), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8f7a36d-d768-4daf-8705-4cab5e06b562", + "metadata": {}, + "outputs": [], + "source": [ + "G.df_input_GTF[G.df_input_GTF[\"seqname\"]==\"Transcript_1\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b0d7f2e4-c590-49a9-89eb-b53144dba9e8", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/cdna/cdna.py b/cdna/cdna.py index 1aaa2262b5cadfde403169af50ca6c47a97143e2..edd15c4432d0f1c1dae902715126c4b3d95376dd 100644 --- a/cdna/cdna.py +++ b/cdna/cdna.py @@ -1,44 +1,28 @@ import sys -import pandas as pd +import warnings +import pandas as pd +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord from gtfparse import read_gtf +# ignore warnings from read_gtf +warnings.filterwarnings(action='ignore', category=FutureWarning) -# returns GTF with essential columns such as "feature", "seqname", "start", "end" -# alongside the names of any optional keys which appeared in the attribute column -df_input_GTF = read_gtf("Example_GTF_Input.gtf") -df_input_CSV = pd.read_csv("copy_number_input.csv") - -df_input_CSV = df_input_CSV.reset_index() # make sure indexes pair with number of rows - -df_input_GTF['Binding_Probability'] = pd.to_numeric(df_input_GTF['Binding_Probability']) # convert to numeric -df_normalization_bind_probablility = df_input_GTF.groupby('seqname')['Binding_Probability'].sum() # extract binding probablility - -# Add New columns to the existing DataFrame -df_input_GTF["Normalized_Binding_Probability"] = '' -df_input_GTF["Transcript_Copy_Number"] = '' - - -# Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe -for index, row in df_input_GTF.iterrows(): - # GTF transcript ID - id_GTF = str(row['seqname']) - # CVS transcript ID - id_CSV = str(row['seqname']).split('_')[1] - # Calculate Normalized_Binding_Probability and add to GTF dataframe - df_input_GTF.loc[index, 'Normalized_Binding_Probability'] = row['Binding_Probability'] / df_normalization_bind_probablility[id_GTF] - # Calculate Normalized_Binding_Probability and add to GTF dataframe - csv_transcript_copy_number = df_input_CSV.loc[df_input_CSV['ID of transcript'] == int(id_CSV), 'Transcript copy number'].iloc[0] - df_input_GTF.loc[index,'Transcript_Copy_Number'] = round(csv_transcript_copy_number * df_input_GTF.loc[index,'Normalized_Binding_Probability']) - - -def translate(res): - translate_dict = {"A": "T", "U": "A", "G": "C", "C": "G"} +def compliment(res): + translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"} if res not in translate_dict.keys(): - print("cDNA residue not A,T,U or G ") - sys.exit(1) + print(f"Unknown character, {res}") + sys.exit(1) return translate_dict[res] +def seq_compliment(sequence): + if sequence is None: + return None + _ = "".join([compliment(char) for char in str(sequence)])[::-1] # reverse string + return _ + class cDNA_Gen: def __init__( @@ -50,52 +34,93 @@ class cDNA_Gen: self.cpn = cpn self.output_fasta = output_fasta self.output_csv = output_csv + # variables - self.prime_sites = [] - self.fasta_seq = "" - self.fasta_id = "" - self.copy_numbers = {} + self.fasta_dict = None + self.fasta_records = None self.run() def run(self): + self.read_csv() self.read_fasta() self.read_gtf() + self.add_sequences() + self.add_compliment() + self.add_records() + self.write_fasta() + + def add_records(self): + self.fasta_records = [] + for index, row in self.df_input_GTF.iterrows(): + if row["compliment"] is not None: + copy_number = row["Transcript_Copy_Number"] + record = SeqRecord( + Seq(row["compliment"]), + row["seqname"], + f"Transcript copy number: {copy_number}", + "") + self.fasta_records.append(record) + + def add_sequences(self): + self.df_input_GTF["priming_site"] = self.df_input_GTF.apply( + lambda row: self.read_primingsite(row["seqname"], + row["start"], + row["end"]), + axis=1) + + def add_compliment(self): + self.df_input_GTF["compliment"] = self.df_input_GTF["priming_site"].apply( + lambda x: seq_compliment(x)) + + def read_primingsite(self, sequence, start, end): + if sequence not in self.fasta_dict.keys(): + return None + _ = self.fasta_dict[sequence].seq[start:end] + return _ - def order_priming_sites(self): - pass + def read_fasta(self): + record = SeqIO.parse(self.fasta, "fasta") + records = list(record) + self.fasta_dict = {x.name: x for x in records} - def generate_cdna(self): - pass + def read_csv(self): + df_input_CSV = pd.read_csv(self.cpn, index_col=False) + df_input_CSV = df_input_CSV.reset_index() # make sure indexes pair with number of rows + self.df_input_CSV = df_input_CSV - def read_fasta(self): - fasta = open(self.fasta).readlines() - self.fasta_id = fasta[0] - print(fasta[0]) - self.fasta_seq = "".join([_.rstrip() for _ in fasta[1:]]) def read_gtf(self): - with open(self.gtf) as gtf_file: - gtf_lines = gtf_file.readlines() - for line in gtf_lines[:1000]: - if not line.startswith("#"): - temp_gtf = GTF_entry(line) - temp_gtf.set_sequence(self.fasta_seq) - self.prime_sites.append(temp_gtf) + # returns GTF with essential columns such as "feature", "seqname", "start", "end" + # alongside the names of any optional keys which appeared in the attribute column + df_input_GTF = read_gtf(self.gtf) + df_input_GTF['Binding_Probability'] = pd.to_numeric(df_input_GTF['Binding_Probability']) # convert to numeric + df_normalization_bind_probablility = df_input_GTF.groupby('seqname')['Binding_Probability'].sum() # extract binding probablility + + # # Add New columns to the existing DataFrame + # df_input_GTF["Normalized_Binding_Probability"] = '' + # df_input_GTF["Transcript_Copy_Number"] = '' + + # Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe + for index, row in df_input_GTF.iterrows(): + # GTF transcript ID + id_GTF = str(row['seqname']) + # CVS transcript ID + id_CSV = str(row['seqname']).split('_')[1] + # Calculate Normalized_Binding_Probability and add to GTF dataframe + df_input_GTF.loc[index, 'Normalized_Binding_Probability'] = row['Binding_Probability'] / df_normalization_bind_probablility[id_GTF] + # Calculate Normalized_Binding_Probability and add to GTF dataframe + csv_transcript_copy_number = self.df_input_CSV.loc[self.df_input_CSV['ID of transcript'] == int(id_CSV), 'Transcript copy number'].iloc[0] + df_input_GTF.loc[index,'Transcript_Copy_Number'] = round(csv_transcript_copy_number * df_input_GTF.loc[index,'Normalized_Binding_Probability']) + + self.df_input_GTF = df_input_GTF def write_fasta(self): - pass + print(self.fasta_records) + SeqIO.write(self.fasta_records, self.output_fasta, "fasta") - def read_copy_numbers(self): - with open(self.cpn) as cpn_file: - cpn_lines = cpn_file.readlines() - for line in cpn_lines: - csv = line.split(",") - trans_id = csv[0] - if trans_id: - gene_id = csv[1] - count = csv[2] - self.copy_numbers[gene_id] = count + def write_csv(self): + pass def return_output(self): return self.output_fasta, self.output_csv @@ -104,5 +129,4 @@ class cDNA_Gen: if __name__ == "__main__": import argparse - pass diff --git a/setup.py b/setup.py index eca847b3176505320c62244c884b1c4878234d3b..96ecc09a8223aebbcef30db4096998562b1f0976 100644 --- a/setup.py +++ b/setup.py @@ -10,4 +10,4 @@ setup( version='1.0.0', packages=find_packages(), # this will autodetect Python packages from the directory tree, e.g., in `code/` install_requires=[], # add here packages that are required for your package to run, including version or range of versions - +) diff --git a/test_files/cDNA.fasta b/test_files/cDNA.fasta new file mode 100644 index 0000000000000000000000000000000000000000..1b8d3989aede7948effa7afb4c43a10eac63ebb1 --- /dev/null +++ b/test_files/cDNA.fasta @@ -0,0 +1,6 @@ +>Transcript_1 +ATCCATAAAAAAAAA +>Transcript_1 +CATCTCAAAAAGTCT +>Transcript_2 +AAAAAAAAAAAAAAA