diff --git a/Untitled.ipynb b/Untitled.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..14c4f123ed3e8ce2c38692cb7fbea30d09590c5b
--- /dev/null
+++ b/Untitled.ipynb
@@ -0,0 +1,481 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96ccbf23-4a46-4bae-bef2-fdf4c2466ad3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3157488c-08cf-4f10-95f6-745613382000",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from cdna.cdna import cDNA_Gen"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6f5bc24-e454-49ad-b1e3-1f5b63c66ab2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ceb225a6-3a63-49e7-a0d3-a92256012261",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:root:Extracted GTF attributes: ['Accessibility_Energy', 'Hybridization_Energy', 'Interaction_Energy', 'Number_of_binding_sites', 'Binding_Probability']\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[SeqRecord(seq=Seq('ATCCATAAAAAAAAA'), id='Transcript_1', name='Transcript copy number: 8.0', description='', dbxrefs=[]), SeqRecord(seq=Seq('CATCTCAAAAAGTCT'), id='Transcript_1', name='Transcript copy number: 4.0', description='', dbxrefs=[]), SeqRecord(seq=Seq('AAAAAAAAAAAAAAA'), id='Transcript_2', name='Transcript copy number: 11.0', description='', dbxrefs=[])]\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_path = \"/Users/ericboittier/Documents/github/cdna-generator/test_files/\"\n",
+    "gtf = test_path+\"Example_GTF_Input.gtf\"\n",
+    "cpn = test_path+\"copy_number_input.csv\"\n",
+    "fasta = test_path+\"yeast_example.fa\"\n",
+    "\n",
+    "G = cDNA_Gen(fasta, gtf, cpn, output_fasta=\"test_files/cDNA.fasta\", output_csv=\"test_files/cDNA.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "24976988-4a28-4932-8f53-d8ac7bb018f8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>index</th>\n",
+       "      <th>ID of transcript</th>\n",
+       "      <th>ID of parent transcript</th>\n",
+       "      <th>Transcript copy number</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>3</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>4</td>\n",
+       "      <td>5</td>\n",
+       "      <td>4</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   index  ID of transcript  ID of parent transcript  Transcript copy number\n",
+       "0      0                 1                        1                      12\n",
+       "1      1                 2                        1                      11\n",
+       "2      2                 3                        2                      33\n",
+       "3      3                 4                        3                      11\n",
+       "4      4                 5                        4                      55"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "G.df_input_CSV"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "f7fdfef3-58b5-45c5-bd0f-e215a3b13636",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>seqname</th>\n",
+       "      <th>source</th>\n",
+       "      <th>feature</th>\n",
+       "      <th>start</th>\n",
+       "      <th>end</th>\n",
+       "      <th>score</th>\n",
+       "      <th>strand</th>\n",
+       "      <th>frame</th>\n",
+       "      <th>Accessibility_Energy</th>\n",
+       "      <th>Hybridization_Energy</th>\n",
+       "      <th>Interaction_Energy</th>\n",
+       "      <th>Number_of_binding_sites</th>\n",
+       "      <th>Binding_Probability</th>\n",
+       "      <th>Normalized_Binding_Probability</th>\n",
+       "      <th>Transcript_Copy_Number</th>\n",
+       "      <th>priming_site</th>\n",
+       "      <th>compliment</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Transcript_1</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>10</td>\n",
+       "      <td>25</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.49</td>\n",
+       "      <td>-9.76</td>\n",
+       "      <td>-8.74</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>0.705882</td>\n",
+       "      <td>8.0</td>\n",
+       "      <td>(T, T, T, T, T, T, T, T, T, A, T, G, G, A, T)</td>\n",
+       "      <td>ATCCATAAAAAAAAA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Transcript_1</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>640</td>\n",
+       "      <td>655</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.71</td>\n",
+       "      <td>-9.12</td>\n",
+       "      <td>-8.34</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>0.294118</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>(A, G, A, C, T, T, T, T, T, G, A, G, A, T, G)</td>\n",
+       "      <td>CATCTCAAAAAGTCT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Transcript_2</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>3</td>\n",
+       "      <td>18</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.21</td>\n",
+       "      <td>-5.12</td>\n",
+       "      <td>-2.34</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.15</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>(T, T, T, T, T, T, T, T, T, T, T, T, T, T, T)</td>\n",
+       "      <td>AAAAAAAAAAAAAAA</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Transcript_3</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>5</td>\n",
+       "      <td>35</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.21</td>\n",
+       "      <td>-5.12</td>\n",
+       "      <td>-2.34</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.25</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>33.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Transcript_4</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>5</td>\n",
+       "      <td>35</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.21</td>\n",
+       "      <td>-5.12</td>\n",
+       "      <td>-2.34</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.15</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>11.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>Transcript_5</td>\n",
+       "      <td>RIBlast</td>\n",
+       "      <td>Priming_site</td>\n",
+       "      <td>5</td>\n",
+       "      <td>35</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>+</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1.21</td>\n",
+       "      <td>-5.12</td>\n",
+       "      <td>-2.34</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.15</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>None</td>\n",
+       "      <td>None</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        seqname   source       feature  start  end  score strand frame  \\\n",
+       "0  Transcript_1  RIBlast  Priming_site     10   25    NaN      +     0   \n",
+       "1  Transcript_1  RIBlast  Priming_site    640  655    NaN      +     0   \n",
+       "2  Transcript_2  RIBlast  Priming_site      3   18    NaN      +     0   \n",
+       "3  Transcript_3  RIBlast  Priming_site      5   35    NaN      +     0   \n",
+       "4  Transcript_4  RIBlast  Priming_site      5   35    NaN      +     0   \n",
+       "5  Transcript_5  RIBlast  Priming_site      5   35    NaN      +     0   \n",
+       "\n",
+       "  Accessibility_Energy Hybridization_Energy Interaction_Energy  \\\n",
+       "0                 1.49                -9.76              -8.74   \n",
+       "1                 1.71                -9.12              -8.34   \n",
+       "2                 1.21                -5.12              -2.34   \n",
+       "3                 1.21                -5.12              -2.34   \n",
+       "4                 1.21                -5.12              -2.34   \n",
+       "5                 1.21                -5.12              -2.34   \n",
+       "\n",
+       "  Number_of_binding_sites  Binding_Probability  \\\n",
+       "0                       2                 0.12   \n",
+       "1                       2                 0.05   \n",
+       "2                       1                 0.15   \n",
+       "3                       1                 0.25   \n",
+       "4                       1                 0.15   \n",
+       "5                       1                 0.15   \n",
+       "\n",
+       "   Normalized_Binding_Probability  Transcript_Copy_Number  \\\n",
+       "0                        0.705882                     8.0   \n",
+       "1                        0.294118                     4.0   \n",
+       "2                        1.000000                    11.0   \n",
+       "3                        1.000000                    33.0   \n",
+       "4                        1.000000                    11.0   \n",
+       "5                        1.000000                    55.0   \n",
+       "\n",
+       "                                    priming_site       compliment  \n",
+       "0  (T, T, T, T, T, T, T, T, T, A, T, G, G, A, T)  ATCCATAAAAAAAAA  \n",
+       "1  (A, G, A, C, T, T, T, T, T, G, A, G, A, T, G)  CATCTCAAAAAGTCT  \n",
+       "2  (T, T, T, T, T, T, T, T, T, T, T, T, T, T, T)  AAAAAAAAAAAAAAA  \n",
+       "3                                           None             None  \n",
+       "4                                           None             None  \n",
+       "5                                           None             None  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "G.df_input_GTF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e7fe7be-d134-4c1e-8eea-55273c79e39d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "G.add_sequences()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2eb8f224-0292-4bb0-9e27-ef3bc3676f1d",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'G' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mG\u001b[49m\u001b[38;5;241m.\u001b[39mdf_input_GTF\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'G' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "G.df_input_GTF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4a482960-0e05-4355-b91c-fc7f51c138c2",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "NameError",
+     "evalue": "name 'G' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "Input \u001b[0;32mIn [4]\u001b[0m, in \u001b[0;36m<cell line: 1>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m G\u001b[38;5;241m.\u001b[39mdf_input_GTF[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mG\u001b[49m\u001b[38;5;241m.\u001b[39mdf_input_GTF\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;28;01mlambda\u001b[39;00m row: foo(row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseqname\u001b[39m\u001b[38;5;124m\"\u001b[39m], \n\u001b[1;32m      3\u001b[0m                     row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart\u001b[39m\u001b[38;5;124m\"\u001b[39m], \n\u001b[1;32m      4\u001b[0m                     row[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mend\u001b[39m\u001b[38;5;124m\"\u001b[39m]), \n\u001b[1;32m      5\u001b[0m     axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m\n\u001b[1;32m      6\u001b[0m )\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'G' is not defined"
+     ]
+    }
+   ],
+   "source": [
+    "G.df_input_GTF[\"A\"] = G.df_input_GTF.apply(\n",
+    "    lambda row: foo(row[\"seqname\"], \n",
+    "                    row[\"start\"], \n",
+    "                    row[\"end\"]), \n",
+    "    axis=1\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "304fd031-524a-4fc3-9322-9b3dd55268fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# G.df_input_GTF[\"A\"] = G.df_input_GTF.apply(lambda row: foo(row), axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8f7a36d-d768-4daf-8705-4cab5e06b562",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "G.df_input_GTF[G.df_input_GTF[\"seqname\"]==\"Transcript_1\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0d7f2e4-c590-49a9-89eb-b53144dba9e8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/cdna/cdna.py b/cdna/cdna.py
index 1aaa2262b5cadfde403169af50ca6c47a97143e2..edd15c4432d0f1c1dae902715126c4b3d95376dd 100644
--- a/cdna/cdna.py
+++ b/cdna/cdna.py
@@ -1,44 +1,28 @@
 import sys
-import pandas as pd
+import warnings
 
+import pandas as pd
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
 from gtfparse import read_gtf
 
+# ignore warnings from read_gtf
+warnings.filterwarnings(action='ignore', category=FutureWarning) 
 
-# returns GTF with essential columns such as "feature", "seqname", "start", "end"
-# alongside the names of any optional keys which appeared in the attribute column
-df_input_GTF = read_gtf("Example_GTF_Input.gtf")
-df_input_CSV = pd.read_csv("copy_number_input.csv")
-
-df_input_CSV = df_input_CSV.reset_index()  # make sure indexes pair with number of rows
-
-df_input_GTF['Binding_Probability'] = pd.to_numeric(df_input_GTF['Binding_Probability']) # convert to numeric
-df_normalization_bind_probablility = df_input_GTF.groupby('seqname')['Binding_Probability'].sum() # extract binding probablility
-
-# Add New columns to the existing DataFrame
-df_input_GTF["Normalized_Binding_Probability"] = ''
-df_input_GTF["Transcript_Copy_Number"] = ''
-
-
-# Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe
-for index, row in df_input_GTF.iterrows():
-    # GTF transcript ID 
-    id_GTF = str(row['seqname'])                
-    # CVS transcript ID 
-    id_CSV = str(row['seqname']).split('_')[1]  
-    # Calculate Normalized_Binding_Probability and add to GTF dataframe
-    df_input_GTF.loc[index, 'Normalized_Binding_Probability'] = row['Binding_Probability'] / df_normalization_bind_probablility[id_GTF]
-    # Calculate Normalized_Binding_Probability and add to GTF dataframe
-    csv_transcript_copy_number = df_input_CSV.loc[df_input_CSV['ID of transcript'] == int(id_CSV), 'Transcript copy number'].iloc[0]
-    df_input_GTF.loc[index,'Transcript_Copy_Number'] = round(csv_transcript_copy_number * df_input_GTF.loc[index,'Normalized_Binding_Probability'])
-    
-
-def translate(res):
-    translate_dict = {"A": "T", "U": "A", "G": "C", "C": "G"}
+def compliment(res):
+    translate_dict = {"A": "T", "T": "A", "U": "A", "G": "C", "C": "G"}
     if res not in translate_dict.keys():
-        print("cDNA residue not A,T,U or G ")
-        sys.exit(1)
+        print(f"Unknown character, {res}")
+        sys.exit(1) 
     return translate_dict[res]
 
+def seq_compliment(sequence):
+    if sequence is None:
+        return None
+    _ = "".join([compliment(char) for char in str(sequence)])[::-1] #  reverse string
+    return _
+
 
 class cDNA_Gen:
     def __init__(
@@ -50,52 +34,93 @@ class cDNA_Gen:
         self.cpn = cpn
         self.output_fasta = output_fasta
         self.output_csv = output_csv
+
         # variables
-        self.prime_sites = []
-        self.fasta_seq = ""
-        self.fasta_id = ""
-        self.copy_numbers = {}
+        self.fasta_dict = None
+        self.fasta_records = None
 
         self.run()
 
     def run(self):
+        self.read_csv()
         self.read_fasta()
         self.read_gtf()
+        self.add_sequences()
+        self.add_compliment()
+        self.add_records()
+        self.write_fasta()
+
+    def add_records(self):
+        self.fasta_records = []
+        for index, row in self.df_input_GTF.iterrows():
+            if row["compliment"] is not None:
+                copy_number = row["Transcript_Copy_Number"]
+                record = SeqRecord(
+                    Seq(row["compliment"]), 
+                    row["seqname"], 
+                    f"Transcript copy number: {copy_number}",
+                     "")
+                self.fasta_records.append(record)
+
+    def add_sequences(self):
+        self.df_input_GTF["priming_site"] = self.df_input_GTF.apply(
+            lambda row: self.read_primingsite(row["seqname"], 
+                            row["start"], 
+                            row["end"]), 
+            axis=1)
+
+    def add_compliment(self):
+        self.df_input_GTF["compliment"] = self.df_input_GTF["priming_site"].apply(
+            lambda x: seq_compliment(x))
+
+    def read_primingsite(self, sequence, start, end):
+        if sequence not in self.fasta_dict.keys():
+            return None
+        _ = self.fasta_dict[sequence].seq[start:end]
+        return _
 
-    def order_priming_sites(self):
-        pass
+    def read_fasta(self):
+        record = SeqIO.parse(self.fasta, "fasta")
+        records = list(record)
+        self.fasta_dict = {x.name: x for x in records}
 
-    def generate_cdna(self):
-        pass
+    def read_csv(self):
+        df_input_CSV = pd.read_csv(self.cpn, index_col=False)
+        df_input_CSV = df_input_CSV.reset_index()  # make sure indexes pair with number of rows
+        self.df_input_CSV = df_input_CSV
 
-    def read_fasta(self):
-        fasta = open(self.fasta).readlines()
-        self.fasta_id = fasta[0]
-        print(fasta[0])
-        self.fasta_seq = "".join([_.rstrip() for _ in fasta[1:]])
 
     def read_gtf(self):
-        with open(self.gtf) as gtf_file:
-            gtf_lines = gtf_file.readlines()
-            for line in gtf_lines[:1000]:
-                if not line.startswith("#"):
-                    temp_gtf = GTF_entry(line)
-                    temp_gtf.set_sequence(self.fasta_seq)
-                    self.prime_sites.append(temp_gtf)
+        # returns GTF with essential columns such as "feature", "seqname", "start", "end"
+        # alongside the names of any optional keys which appeared in the attribute column
+        df_input_GTF = read_gtf(self.gtf)
+        df_input_GTF['Binding_Probability'] = pd.to_numeric(df_input_GTF['Binding_Probability']) # convert to numeric
+        df_normalization_bind_probablility = df_input_GTF.groupby('seqname')['Binding_Probability'].sum() # extract binding probablility
+
+        # # Add New columns to the existing DataFrame
+        # df_input_GTF["Normalized_Binding_Probability"] = ''
+        # df_input_GTF["Transcript_Copy_Number"] = ''
+
+        # Adds Normalized_Binding_Probability and Transcript_Copy_Number to each transcript in the dataframe
+        for index, row in df_input_GTF.iterrows():
+            # GTF transcript ID 
+            id_GTF = str(row['seqname'])                
+            # CVS transcript ID 
+            id_CSV = str(row['seqname']).split('_')[1]  
+            # Calculate Normalized_Binding_Probability and add to GTF dataframe
+            df_input_GTF.loc[index, 'Normalized_Binding_Probability'] = row['Binding_Probability'] / df_normalization_bind_probablility[id_GTF]
+            # Calculate Normalized_Binding_Probability and add to GTF dataframe
+            csv_transcript_copy_number = self.df_input_CSV.loc[self.df_input_CSV['ID of transcript'] == int(id_CSV), 'Transcript copy number'].iloc[0]
+            df_input_GTF.loc[index,'Transcript_Copy_Number'] = round(csv_transcript_copy_number * df_input_GTF.loc[index,'Normalized_Binding_Probability'])
+            
+        self.df_input_GTF = df_input_GTF
 
     def write_fasta(self):
-        pass
+        print(self.fasta_records)
+        SeqIO.write(self.fasta_records, self.output_fasta, "fasta")
 
-    def read_copy_numbers(self):
-        with open(self.cpn) as cpn_file:
-            cpn_lines = cpn_file.readlines()
-            for line in cpn_lines:
-                csv = line.split(",")
-                trans_id = csv[0]
-                if trans_id:
-                    gene_id = csv[1]
-                    count = csv[2]
-                    self.copy_numbers[gene_id] = count
+    def write_csv(self):
+        pass
 
     def return_output(self):
         return self.output_fasta, self.output_csv
@@ -104,5 +129,4 @@ class cDNA_Gen:
 
 if __name__ == "__main__":
     import argparse
-
     pass
diff --git a/setup.py b/setup.py
index eca847b3176505320c62244c884b1c4878234d3b..96ecc09a8223aebbcef30db4096998562b1f0976 100644
--- a/setup.py
+++ b/setup.py
@@ -10,4 +10,4 @@ setup(
     version='1.0.0',
     packages=find_packages(),  # this will autodetect Python packages from the directory tree, e.g., in `code/`
     install_requires=[],  # add here packages that are required for your package to run, including version or range of versions
-
+)
diff --git a/test_files/cDNA.fasta b/test_files/cDNA.fasta
new file mode 100644
index 0000000000000000000000000000000000000000..1b8d3989aede7948effa7afb4c43a10eac63ebb1
--- /dev/null
+++ b/test_files/cDNA.fasta
@@ -0,0 +1,6 @@
+>Transcript_1
+ATCCATAAAAAAAAA
+>Transcript_1
+CATCTCAAAAAGTCT
+>Transcript_2
+AAAAAAAAAAAAAAA