diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..0be791f97a0ff41cfcb9194d1ac6fbdea9665f90
Binary files /dev/null and b/.DS_Store differ
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..26d33521af10bcc7fd8cea344038eaaeb78d0ef5
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/Generate_cDNA.iml b/.idea/Generate_cDNA.iml
new file mode 100644
index 0000000000000000000000000000000000000000..d0876a78d06ac03b5d78c8dcdb95570281c6f1d6
--- /dev/null
+++ b/.idea/Generate_cDNA.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000000000000000000000000000000000000..105ce2da2d6447d11dfe32bfb846c3d5b199fc99
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000000000000000000000000000000000000..7ba73c25da2261de84065478d9e30daadbe7ae3c
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000000000000000000000000000000000000..2859f751a6abdb8dc90e753aac87246408d80272
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/Generate_cDNA.iml" filepath="$PROJECT_DIR$/.idea/Generate_cDNA.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000000000000000000000000000000..94a25f7f4cb416c083d265558da75d457237d671
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Generate_cDNA.py b/Generate_cDNA.py
deleted file mode 100644
index c06fddb82546d7cc2ef25002b7828e10fab78f42..0000000000000000000000000000000000000000
--- a/Generate_cDNA.py
+++ /dev/null
@@ -1,237 +0,0 @@
-#!/usr/bin/env python3
-#Author: Suvarnan Selliah
-
-class GeneratecDNA:
-
-    def generatecDNA(fasta, gtf, cp_nr, my_output_fasta="cDNA.fasta", my_output_csv="cDNA.csv", placeholder = "ph-file.csv"):
-
-        cDNA_transcript_id = 1
-        #READING INPUT FILES / PART I
-        #open files
-        with open(fasta, 'r') as fa, open(gtf, 'r') as gt, open(cp_nr, 'r') as cp:
-
-            #read fasta-file
-            for myFastaline in fa:
-
-                #search for transcript id and transcript sequence in fasta-file
-                fasta_id = ""
-                fasta_seq = ""
-                fasta_id_found = False
-                fasta_seq_found = False
-                currentFastaString = myFastaline
-                if fasta_id_found == False:
-                    position_of_start = currentFastaString.find('>')
-                    if position_of_start != 0:
-                        continue
-                    elif position_of_start == 0:
-                        fasta_id = myFastaline
-                        fasta_id = fasta_id.replace(">", "")
-                        fasta_id_found = True
-                        continue
-                    else:
-                        print("FASTA: Start position in fasta file not found")
-                        break
-                if fasta_id_found == True and fasta_seq_found == False:
-                    while fasta_seq_found == False:
-                        currentFastaString = fa.readline()
-                        zero_position = currentFastaString[0]
-                        if zero_position == ";":
-                            continue
-                        elif zero_position == ">":
-                            print("FASTA: No Sequence after headline")
-                            break
-                        else:
-                            fasta_seq = currentFastaString
-                            fasta_seq_found = True
-
-                #starting to work with gtf-file
-                #defining variables for gtf-file
-                gtf_seqname = ''
-                gtf_start = 0
-                gtf_end = 0
-                gtf_score = 0.0
-                gtf_info_found = False
-                gtf_entries = 0
-                gtf_list_of_lines = []
-                gtf_prob_list = []
-
-                #defining variables for csv-file
-                csv_trans_id = ""
-                csv_gene_id = ""
-                csv_count = 0
-                csv_info_found = False
-
-                if fasta_id_found == True and fasta_seq_found == True:
-                    # search for transcript id from fasta-file in gtf-file
-                    for myGTFline in gt:
-                        currentGTFString = myGTFline
-                        gtf_list = currentGTFString.split('\t')
-                        gtf_seqname = gtf_list[0]
-                        if gtf_seqname == fasta_id:
-                            gtf_entries += 1
-                            gtf_start = gtf_list[3]
-                            gtf_end = gtf_list[4]
-                            gtf_score = gtf_list[5]
-                            gtf_temp_list = []
-                            gtf_temp_list.append(gtf_start)
-                            gtf_temp_list.append(gtf_end)
-                            gtf_temp_list.append(gtf_score)
-                            gtf_list_of_lines.append(gtf_temp_list)
-                            gtf_prob_list.append(gtf_score)
-                        else:
-                            continue
-                    if gtf_entries != 0:
-                        gtf_info_found = True
-                        fasta_id_found = False
-                        fasta_seq_found = False
-                    assert gtf_info_found, "Sequence ID from fasta-file not found in gtf-file"
-
-                    #search copy number of transcript in 3. file/csv-file
-                    for myCSVline in cp:
-                        currentCSVString = myCSVline
-                        csv_list = currentCSVString.split(',')
-                        csv_trans_id = csv_list[0]
-                        if csv_trans_id == fasta_id:
-                            csv_gene_id = csv_list[1]
-                            csv_count = csv_list[2]
-                            csv_info_found = True
-                            gtf_info_found = False
-                            break
-                        else:
-                            continue
-                    assert csv_info_found, "Data (TranscriptID,GeneID,Count) from csv-file/3.file not found"
-
-                    #COMPUTATION & OUTPUT / PART II
-                    #set score to 0 for primimg sites close to the end of sequence
-                    seq_len = len(fasta_seq)
-                    seq_len -= 22
-                    gtf_list_of_lines_len = len(gtf_list_of_lines)
-                    for i in range(gtf_list_of_lines_len):
-                        if gtf_list_of_lines[i][1] > seq_len:
-                            gtf_list_of_lines[i][2] = 0.0
-                            gtf_prob_list[i] = 0.0
-
-                    #assign priming sites (according to score) to copy number
-                    sum_of_score = 0.0
-                    for i in gtf_prob_list:
-                        sum_of_score += i
-                    one_score = 100/sum_of_score
-                    norm_list = []
-                    gtf_prob_list_len = len(gtf_prob_list)
-                    for i in range(gtf_prob_list_len):
-                        norm_list.append((one_score * gtf_prob_list[i]))
-                    one_norm = csv_count / 100
-                    distr_RNA_to_prim_sites = []
-                    norm_list_len = len(norm_list)
-                    for i in range(norm_list_len):
-                        distr_RNA_to_prim_sites.append((one_norm * norm_list[i]))
-                    total_RNA_number = 0
-                    distr_RNA_to_prim_sites_len = len(distr_RNA_to_prim_sites)
-                    for i in range(distr_RNA_to_prim_sites_len):
-                        total_RNA_number += distr_RNA_to_prim_sites[i]
-                    new_distr_RNA_to_prim_sites = []
-                    if total_RNA_number != csv_count:
-                        for i in range(distr_RNA_to_prim_sites_len):
-                            new_distr_RNA_to_prim_sites.append(int(distr_RNA_to_prim_sites[i]))
-                    new_distr_RNA_to_prim_sites_len = len(new_distr_RNA_to_prim_sites)
-                    counter = 0
-                    while total_RNA_number != csv_count:
-                        new_distr_RNA_to_prim_sites[counter] = round(distr_RNA_to_prim_sites[counter])
-                        counter += 1
-                        assert counter <= new_distr_RNA_to_prim_sites_len, "Calculated RNA transcripts (assigned to priming sites) are more than initial count"
-
-                    #order the priming sites
-                    prim_sites_ordered = []
-                    for i in range(gtf_list_of_lines_len):
-                        prim_sites_ordered.append(gtf_list_of_lines[i][0])
-                    prim_sites_ordered.sort()
-
-                    #searching for 2 priming sites
-                    prim_sites_ordered_len = len(prim_sites_ordered)
-                    ph_1 = 0
-                    ph_2 = 0
-                    for i in range(0, (prim_sites_ordered_len-1), 2):
-                        if prim_sites_ordered[i] == 0.0:
-                            continue
-                        else:
-                            search_for_1 = prim_sites_ordered[i]
-                            search_for_2 = prim_sites_ordered[i+1]
-                            for j in range(gtf_list_of_lines_len):
-                                if gtf_list_of_lines[j][0] == search_for_1:
-                                    ph_1 = j
-                                if gtf_list_of_lines[j][0] == search_for_2:
-                                    ph_2 = j
-                            #making cDNA and comparing in library
-                            start_1 = gtf_list_of_lines[ph_1][0]
-                            start_2 = gtf_list_of_lines[ph_2][0]
-                            start_1 -= 1
-                            start_2 -= 1
-                            trans_between_prim_sites = fasta_seq[start_1:start_2]
-                            cDNA = ''
-                            for element in range(0, len(trans_between_prim_sites)):
-                                if trans_between_prim_sites[element] == 'A':
-                                    cDNA[element] = 'T'
-                                elif trans_between_prim_sites[element] == 'U':
-                                    cDNA[element] = 'A'
-                                elif trans_between_prim_sites[element] == 'G':
-                                    cDNA[element] = 'C'
-                                elif trans_between_prim_sites[element] == 'C':
-                                    cDNA[element] = 'G'
-                                else:
-                                    assert False, "cDNA synthesis failed, position is not A,U,G or C in transcript"
-                            # open output files
-                            if i == 0:
-                                with open(my_output_fasta, 'a') as myfasta, open(my_output_csv, 'a') as mycsv:
-                                    myfasta.write(">" + string(cDNA_transcript_id))
-                                    myfasta.write("\n")
-                                    myfasta.write(cDNA)
-                                    mycsv.write(",".join([cDNA_transcript_id, csv_gene_id, new_distr_RNA_to_prim_sites[ph_1]]))
-                            else:
-                                found_cDNA_id = ''
-                                found_cDNA_id_bool = False
-                                with open(my_output_fasta, 'r') as myfasta, open(my_output_csv, 'r') as mycsv, open(placeholder, 'w') as phf:
-                                    for myline in myfasta:
-                                        pos = myline.find('>')
-                                        if pos != 0:
-                                            continue
-                                        if pos == 0:
-                                            fid = myline
-                                            fid = fid.replace(">", "")
-                                            fbool = False
-                                            while fbool == False:
-                                                myline = myfasta.readline()
-                                                zer = myline[0]
-                                                if zer == ";":
-                                                    continue
-                                                elif zer == ">":
-                                                    assert False, "Error in searching cDNA in output fasta-file"
-                                                else:
-                                                    fseq = myline
-                                                    fbool = True
-                                            if fseq ==cDNA:
-                                                found_cDNA_id = fid
-                                                found_cDNA_id_bool = True
-                                                break
-                                    if found_cDNA_id_bool == True:
-                                        for myline_csv_out in mycsv:
-                                            csvlist = myline_csv_out.split(',')
-                                            csvcDNAid = csvlist[0]
-                                            if csvcDNAid == found_cDNA_id:
-                                                csvgeneid = csvlist[1]
-                                                csvcDNAcount = csvlist[2]
-                                                csvcDNAcount += new_distr_RNA_to_prim_sites[ph_1]
-                                                phf.write(",".join([csvcDNAid, csvgeneid, csvcDNAcount]))
-                                            else:
-                                                phf.write(myline_csv_out)
-                                if found_cDNA_id_bool == True:
-                                    with open(my_output_csv, 'w') as mycsv, open(placeholder, 'r') as phf:
-                                        for myplaceholder in phf:
-                                            mycsv.write(myplaceholder)
-                                else:
-                                    cDNA_transcript_id += 1
-                                    with open(my_output_fasta, 'a') as myfasta, open(my_output_csv, 'a') as mycsv:
-                                        myfasta.write(">" + string(cDNA_transcript_id))
-                                        myfasta.write("\n")
-                                        myfasta.write(cDNA)
-                                        mycsv.write(",".join(
-                                            [cDNA_transcript_id, csv_gene_id, new_distr_RNA_to_prim_sites[ph_1]]))
\ No newline at end of file
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000000000000000000000000000000000000..d5499e763f555733f046b040fdaa0e7b30afd2c7
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Suvarnan Selliah and Ruth Eneida Montano Crespo
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..6354e709f6f3684d9fc0a62279323b5244ea0399
--- /dev/null
+++ b/README.md
@@ -0,0 +1,15 @@
+
+## cDNA generator
+
+This script generates cDNA copies of transcripts allowing for the priming of DNA synthesis at transcript-internal sites.
+
+Input:
+
+* fasta-formatted file of transcript sequences
+* gtf-formatted file with potential priming sites for individual transcripts, with associated probabilities
+* file with the copy number of each unique transcript subjected to the cDNA synthesis
+
+Output:
+
+* fasta-formatted files with DNA copies of the transcripts, ending at the one of the possible priming sites for each transcript. Priming sites are sampled in proportion to their probability. Each copy of a unique transcript is independently sampled, but only unique DNA sequences are saved to the output file.
+* Csv-formatted file with the copy number of each unique DNA copy.
\ No newline at end of file
diff --git a/generatecDNA-nf.nf b/generatecDNA-nf.nf
new file mode 100644
index 0000000000000000000000000000000000000000..100a08a0acc3c4b1e0ce454a4ec674fd48e05658
--- /dev/null
+++ b/generatecDNA-nf.nf
@@ -0,0 +1,25 @@
+#!/usr/bin/env nextflow
+
+/* Path to transcript sequences sampled (fasta formatted) */
+params.sampledTranscriptSeq = "$baseDir/tests/*.fasta"
+/* Path to potential priming sites for individual transcripts (gtf formatted) */
+params.internalPrimingSites = "$baseDir/tests/*.gtf"
+/* Path to copy number of each unique transcript subjected to the cDNA synthesis (csv: transcriptID,count) */
+params.transcriptCounts = "$baseDir/tests/*.csv"
+
+process generatecDNA {
+
+    input:
+	path sampledTranscriptSeq from params.sampledTranscriptSeq
+	path internalPrimingSites from params.internalPrimingSites
+    path transcriptCounts from params.transcriptCounts
+
+    output:
+/* O5. Path to unique cDNA sequences */
+	path "cDNA_Seq"	into cDNAseq_ch
+/* O6. Path to cDNA count table */
+	path "cDNA_Count" into cDNA_Count_ch
+
+    """
+    python $baseDir/generatecDNA/generatecDNA.py -rna ${sampledTranscriptSeq}  -gtf ${internalPrimingSites} -cnr ${transcriptCounts}
+	"""
\ No newline at end of file
diff --git a/generatecDNA.egg-info/PKG-INFO b/generatecDNA.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..ccabcbf3f3825340e7447fb6861d5f584ba273ce
--- /dev/null
+++ b/generatecDNA.egg-info/PKG-INFO
@@ -0,0 +1,13 @@
+Metadata-Version: 2.1
+Name: generatecDNA
+Version: 0.1.0
+Summary: Generates cDNA copies of RNA transcript from internal priming sites
+Home-page: https://git.scicore.unibas.ch/zavolan_group/pipelines/scrna-seq-simulation.git
+Author: Suvarnan Selliah and Ruth Eneida Montano Crespo
+Author-email: r.montanocrespo@unibas.ch
+License: MIT
+Platform: UNKNOWN
+License-File: LICENSE.md
+
+UNKNOWN
+
diff --git a/generatecDNA.egg-info/SOURCES.txt b/generatecDNA.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..94fdf5c7097bd4461c3adc36d54f2506c4401897
--- /dev/null
+++ b/generatecDNA.egg-info/SOURCES.txt
@@ -0,0 +1,11 @@
+LICENSE.md
+README.md
+setup.py
+generatecDNA/__init__.py
+generatecDNA/generatecDNA-cli.py
+generatecDNA/generatecDNA.py
+generatecDNA.egg-info/PKG-INFO
+generatecDNA.egg-info/SOURCES.txt
+generatecDNA.egg-info/dependency_links.txt
+generatecDNA.egg-info/entry_points.txt
+generatecDNA.egg-info/top_level.txt
\ No newline at end of file
diff --git a/generatecDNA.egg-info/dependency_links.txt b/generatecDNA.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/generatecDNA.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/generatecDNA.egg-info/entry_points.txt b/generatecDNA.egg-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..7f34db363c7308d02bd1d1edacc7a0fb77c52283
--- /dev/null
+++ b/generatecDNA.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+generatecDNA = generatecDNA.__main__:main
+
diff --git a/generatecDNA.egg-info/top_level.txt b/generatecDNA.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..06a294c32bfe9755537b48a4f1f835a1052d80ad
--- /dev/null
+++ b/generatecDNA.egg-info/top_level.txt
@@ -0,0 +1 @@
+generatecDNA
diff --git a/generatecDNA/.DS_Store b/generatecDNA/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..d26fe4d4827233bf0a8017f5f274df73be301a99
Binary files /dev/null and b/generatecDNA/.DS_Store differ
diff --git a/generatecDNA/__init__.py b/generatecDNA/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dde8fb9c9bece6a973113aaa11f2aad7aef1925e
--- /dev/null
+++ b/generatecDNA/__init__.py
@@ -0,0 +1,3 @@
+"""This is the __init__ function."""
+
+__version__ = "0.1.0"
diff --git a/generatecDNA/__pycache__/__init__.cpython-310.pyc b/generatecDNA/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f05da8021066e73461eaab38e654999c8cde7044
Binary files /dev/null and b/generatecDNA/__pycache__/__init__.cpython-310.pyc differ
diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-310.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3fa119fa372e6138678c2cb12a3cb877b3da94ba
Binary files /dev/null and b/generatecDNA/__pycache__/generatecDNA.cpython-310.pyc differ
diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f4322b4d9bb4b2f764b84c23b40cd483e44a35fc
Binary files /dev/null and b/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc differ
diff --git a/generatecDNA/generatecDNA-cli.py b/generatecDNA/generatecDNA-cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..46009bfbc33f17e2589eec56dcecd31c8c65022b
--- /dev/null
+++ b/generatecDNA/generatecDNA-cli.py
@@ -0,0 +1,37 @@
+"""Command-line interface client."""
+
+import argparse
+import generatecDNA as gn
+
+
+def main() -> None:
+    """Entry point for CLI executable."""
+    parser = argparse.ArgumentParser(description="cDNA generator")
+
+    parser.add_argument(
+        "-rna",
+        type=str,
+        metavar="",
+        help="Path file to fasta file with RNA sequence")
+    parser.add_argument(
+        "-gtf",
+        type=str,
+        metavar="",
+        help="Path file to gtf file")
+    parser.add_argument(
+        "-cnr",
+        type=str,
+        metavar="",
+        help="Path file to copy number file")
+
+    args = parser.parse_args()
+
+    Generator = gn.GeneratecDNA(
+        fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr)
+    Generator.generatecDNA(
+        fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr)
+    print("Done")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/generatecDNA/generatecDNA.py b/generatecDNA/generatecDNA.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f470ab53458d2cf6d823df859ec6d7009683592
--- /dev/null
+++ b/generatecDNA/generatecDNA.py
@@ -0,0 +1,258 @@
+"""Package contains utilities to generate cDNA
+ as part of the workflow to simulate scRNAseq.
+
+Class:
+    GeneratecDNA: contains one method
+        generatecDNA: takes as input fasta-formatted file &
+        gtf-formatted-file & csv-formatted file,
+        outputs fasta-formatted file with cDNA ID and unique cDNA sequence &
+        csv-formatted file with cDNA ID and copy number
+"""
+
+import random
+
+
+class GeneratecDNA:
+    """Contains function to generate cDNA.
+
+    Args:
+        input files: path to fasta-file (RNA_ID & RNA_Seq),
+        gtf-file (RNA_ID & Priming sites & Probability),
+        csv-file (RNA_ID & copy number)
+
+    Attributes:
+        fastaFile: RNA_ID & RNA_Seq
+        gtf: RNA_ID & Priming sites & Probability
+        cp_nr: RNA_ID & copy number
+    """
+
+    def __init__(self, fastaFile, gtf, cp_nr) -> str:
+        """Class intructor."""
+        self.fastaFile = fastaFile
+        self.gtf = gtf
+        self.cp_nr = cp_nr
+
+    def generatecDNA(self, fastaFile, gtf, cp_nr):
+        """Generate cDNA.
+
+        Args:
+            fastaFile (str): RNA_ID & RNA_Seq
+            gtf (str): RNA_ID & Priming sites & Probability
+            cp_nr (str): RNA_ID & copy number
+
+        Returns:
+            cDNA.fasta: cDNA_ID & cDNA sequence
+            cDNA.csv: cDNA_ID & copy number
+        """
+        # defining global variables
+        gtfFileInputDict = {}
+        csvFileInputDict = {}
+        fastaInputDict = {}
+        # READING INPUT FILES / PART I
+        # open gtf file
+        with open(gtf, 'r') as gt:
+            # read gtf file
+            for mygtfline in gt:
+                currentGTFString = mygtfline
+                gtf_list = currentGTFString.split('\t')
+                gtf_seqname = gtf_list[0]
+                gtf_start = gtf_list[3]
+                gtf_end = gtf_list[4]
+                gtf_score = gtf_list[5]
+                my_temp_list_1 = [int(gtf_start),
+                                  int(gtf_end), float(gtf_score)]
+                if gtf_seqname in gtfFileInputDict:
+                    my_temp_list_2 = gtfFileInputDict[gtf_seqname]
+                    my_temp_list_2.append(my_temp_list_1)
+                    gtfFileInputDict[gtf_seqname] = my_temp_list_2
+                else:
+                    gtfFileInputDict[gtf_seqname] = [my_temp_list_1]
+        print(gtfFileInputDict)
+        # open csv file
+        with open(cp_nr, 'r') as cp:
+            # read csv file
+            for mycsvline in cp:
+                currentcsvstring = mycsvline
+                csv_list = currentcsvstring.split(',')
+                csv_trans_id = csv_list[0]
+                csv_count = csv_list[2]
+                csv_count = csv_count.replace("\n", "")
+                """ trans id should be always new,
+                    otherwise unhash csv_current_count
+                    in defining variables section.
+                if csv_trans_id in csvFileInputDict:
+                    csv_current_count = csvFileInputDict[csv_trans_id]
+                    csv_current_count += csv_count
+                    csvFileInputDict[csv_trans_id] = csv_current_count
+                else:
+                    csvFileInputDict[csv_trans_id] = csv_count
+                """
+                csvFileInputDict[csv_trans_id] = int(csv_count)
+        print(csvFileInputDict)
+        # open fasta file
+        with open(fastaFile, 'r') as fa:
+            # defining variables
+            fasta_id = ""
+            fasta_seq = ""
+            fasta_id_found = False
+            fasta_seq_found = False
+            # read fasta file
+            for myfastaline in fa:
+                currentfastastring = myfastaline
+                # find fasta ID
+                if not fasta_id_found and not fasta_seq_found:
+                    position_of_start = currentfastastring.find('>')
+                    if position_of_start != 0:
+                        continue
+                    elif position_of_start == 0:
+                        fasta_id = myfastaline
+                        fasta_id = fasta_id.replace(">", "")
+                        fasta_id = fasta_id.replace("\n", "")
+                        # I don't know, how the sequence id is formatted and
+                        # which part thereof is equal to the transcript ID
+                        # in the csv-formatted file and gtf-formatted file
+                        # temp_fasta_list_1 = fasta_id.split('\t')
+                        # fasta_id = temp_fasta_list_1[0]
+                        fasta_id_found = True
+                        continue
+                    else:
+                        print("FASTA: Start position in fasta file not found")
+                        break
+                # find fasta sequence
+                if fasta_id_found and not fasta_seq_found:
+                    while not fasta_seq_found:
+                        zero_position = currentfastastring[0]
+                        if zero_position == ";":
+                            currentfastastring = fa.readline()
+                        elif zero_position == ">":
+                            assert False, "FASTA: No Sequence after headline"
+                        else:
+                            fasta_seq = currentfastastring
+                            fasta_seq_found = True
+                if fasta_id_found and fasta_seq_found:
+                    fastaInputDict[fasta_id] = fasta_seq
+                    fasta_id_found = False
+                    fasta_seq_found = False
+                    fasta_id = ""
+                    fasta_seq = ""
+        print(fastaInputDict)
+        # COMPUTATION OF INPUT FILES / PART II
+        outputFastaDict = {}
+        outputCSVDict = {}
+        # starting Loop1: read fasta dict
+        for (k, v) in fastaInputDict.items():
+            rna_seq = v
+            # search for transcript ID in gtf-file to get
+            # priming sites and scores
+            if k in gtfFileInputDict:
+                gtfList = gtfFileInputDict[k]
+            else:
+                assert False, "Fasta-ID from fasta-file not found in gtf-file"
+            # Excluding priming sites within 40 bases
+            # at the beginning of the transcript and
+            # ordering priming sites on the RNA sequence in gtf-dict
+            # sorting
+            gtfList.sort(key=lambda x: x[0])
+            # elimination
+            for i in gtfList:
+                if i[0] <= 40:
+                    gtfList.remove(i)
+            # search for transcript ID in csv-file
+            # to get copy number of transcript
+            if k in csvFileInputDict:
+                actual_count = csvFileInputDict[k]
+            else:
+                assert False, "Fasta-ID from fasta-file not found in csv-file"
+            # random choosing
+            scores = []
+            for i in gtfList:
+                scores.append(i[2])
+            print("gtfList: ", gtfList)
+            print("scores: ", scores)
+            my_weighted_list = random.choices(
+                gtfList, weights=scores, k=actual_count)
+            # counts per priming site
+            counts_per_priming_site = []
+            for i in range(0, len(gtfList)):
+                counts_per_priming_site.append(0)
+            for i in range(0, len(gtfList)):
+                counts_per_priming_site[i] = my_weighted_list.count(gtfList[i])
+            print("counts: ", counts_per_priming_site)
+            # Loop2: through gtfList to create cDNA starting on priming sites
+            # according to counts per priming sites
+            counter_cDNA = 0
+            for i in gtfList:
+                cDNA_3_5 = ""
+                counter_cDNA += 1
+                cDNA_ID = "-".join([k, "cDNA", str(counter_cDNA)])
+                if counter_cDNA == 1:
+                    end = i[1]
+                    # create 3' to 5' cDNA
+                    for j in range(0, int(end)):
+                        if rna_seq[j] == "A":
+                            cDNA_3_5 = cDNA_3_5 + "T"
+                        elif rna_seq[j] == "U":
+                            cDNA_3_5 = cDNA_3_5 + "A"
+                        elif rna_seq[j] == "G":
+                            cDNA_3_5 = cDNA_3_5 + "C"
+                        elif rna_seq[j] == "C":
+                            cDNA_3_5 = cDNA_3_5 + "G"
+                        else:
+                            print(
+                                k, rna_seq, gtfList, i,
+                                cDNA_ID, counts_per_priming_site)
+                            assert False, "cDNA synthesis failed, position " \
+                                          "is not A,U,G or C in transcript"
+                else:
+                    previous_end = end + 1
+                    this_end = i[1]
+                    # create 3' to 5' cDNA
+                    for j in range(int(previous_end), int(this_end)):
+                        if rna_seq[j] == "A":
+                            cDNA_3_5 = cDNA_3_5 + "T"
+                        elif rna_seq[j] == "U":
+                            cDNA_3_5 = cDNA_3_5 + "A"
+                        elif rna_seq[j] == "G":
+                            cDNA_3_5 = cDNA_3_5 + "C"
+                        elif rna_seq[j] == "C":
+                            cDNA_3_5 = cDNA_3_5 + "G"
+                        else:
+                            print(
+                                k, rna_seq, gtfList, i,
+                                cDNA_ID, counts_per_priming_site)
+                            assert False, "cDNA synthesis failed, " \
+                                          "position is not A,U,G or C " \
+                                          "in transcript"
+                # reverse sequence to 5' to 3'
+                cDNA_5_3 = cDNA_3_5[::-1]
+                if counts_per_priming_site[(counter_cDNA - 1)] == 0:
+                    continue
+                elif cDNA_5_3 in outputCSVDict:
+                    new_count = outputCSVDict[cDNA_5_3]
+                    new_count += counts_per_priming_site[(counter_cDNA - 1)]
+                    outputCSVDict[cDNA_5_3] = new_count
+                else:
+                    outputFastaDict[cDNA_5_3] = cDNA_ID
+                    outputCSVDict[cDNA_5_3] = \
+                        counts_per_priming_site[(counter_cDNA - 1)]
+        # WRITING OUTPUT FILES / PART III
+        # write fasta-file and csv-formatted file
+        with open("cDNA.fasta", 'w') as myFa, open("cDNA.csv", 'w') as myCO:
+            firstLine = True
+            for (k, v) in outputFastaDict.items():
+                headline = "".join([">", v])
+                csvLine = ",".join([v, str(outputCSVDict[k])])
+                if firstLine:
+                    myFa.write(headline)
+                    myFa.write("\n")
+                    myFa.write(k)
+                    myCO.write(csvLine)
+                    firstLine = False
+                else:
+                    myFa.write("\n")
+                    myFa.write(headline)
+                    myFa.write("\n")
+                    myFa.write(k)
+                    myCO.write("\n")
+                    myCO.write(csvLine)
+        return myFa, myCO
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..335dd934586c5ecf0dcf68d61b13e5914f9a398d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,28 @@
+"""cDNA Generator.
+
+@author: Suvarnan Selliah & Ruth Montano
+"""
+
+from setuptools import setup, find_packages
+
+with open("README.md", "r") as f:
+    long_description = f.read()
+
+setup(
+    name='generatecDNA',
+    url=('https://git.scicore.unibas.ch/'
+         'zavolan_group/pipelines/scrna-seq-simulation.git'),
+    author='Suvarnan Selliah and Ruth Eneida Montano Crespo',
+    author_email='s.selliah@unibas.ch,r.montanocrespo@unibas.ch',
+    description=('Generates cDNA copies of RNA transcript'
+                 'from internal priming sites'),
+    license='MIT',
+    version='0.1.0',
+    packages=find_packages(),
+    install_requires=[],
+
+    entry_points={
+        'console_scripts': [
+            'generatecDNA = generatecDNA.__main__:main'
+        ]
+    })
diff --git a/tests/.DS_Store b/tests/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..9f52ee6408fed3d2e48fd284a905747184f46c18
Binary files /dev/null and b/tests/.DS_Store differ
diff --git a/tests/__pycache__/test.cpython-310-pytest-6.2.5.pyc b/tests/__pycache__/test.cpython-310-pytest-6.2.5.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eb8b9d2fd21bd6ecbf4ff82a5f60b0e6ef2056f4
Binary files /dev/null and b/tests/__pycache__/test.cpython-310-pytest-6.2.5.pyc differ
diff --git a/tests/fasta_example.fasta b/tests/fasta_example.fasta
new file mode 100644
index 0000000000000000000000000000000000000000..1a25b150101d1ca5424abce4593aa908d9469ebd
--- /dev/null
+++ b/tests/fasta_example.fasta
@@ -0,0 +1,21 @@
+>RNA_1
+;some comment from RNA_1
+CCCGGGGAUAACAACCCCGUGGUCUUUGAAGCCCUAGGCAAUGUGUGACAUUCCACCCGAACACCAUGCAUCCUUUGAUCGCAUUGGGCAGGGGACGUCGCUCCACUGGCGUAACACGAAGAGCAUUGGGUGUAACUUCCGAGAGGAGAUUGAGGGUGUGGAGCGACCUGGACCUCACUCCCCCUCAUGCACAGCGUGGACGGAAUCUGAUUUUCGUAUCGAGAAUAUAAAGUACUGUAGCGGCUCCCAUCGGUUGUGGUGUGGUCCAGCCGUGGGUGGACAUCUACCGUGAUUGCAGGGACUUUUCCUGAGAAAUGGCUACUGCAUUUGCACACGUCGCACUGUGACCCCAGGAUGUAUGACCAACCGCCUAGGCGCGGAUCAGGUCUUGACCCUGAUAUGAUUGGGUGUUGGCCCUGCCUGCCUCGAUUAGGGCACUACCACCACGCUCUCACGUCUCUGUUACCCUGGCAAUAAUGCUGGUGAUCUGGGGUGGCUAAUCGAGGGUAUUUACUAUGUGGGCUCAUCGAUGACGGGGCAGUACUCGUUUAAAGCAACCGCGCAGAAGUAGGACGUCCAAGAAUCCUCGCAAGAUAUAACAGUGUUCAUUAGAGUGUUGCUAUUUGCAAUUUUGAAGGGUGUGUUACGUACGUCUCAGGCGUCCACGGCUCCUUCCUACACUACACGCAGUCCCUAGCAACACAGGGUCUUAUCCACUCAACUAAUAGAACGCGUAUAAGACAUAGACCUUCUAGAACUACGAGUAAAUGGCCCAUGUUAUGCAUCAAUCGACUUCAUCCUCGAUGAGAUUGUAGUGCUCAAGCUUCUUAGGCACUAGCACGAUUGACCUGCCCAGAAUGGUGAAUUGCUGCGCUACAAACAAGGGUGACUCCGGCACAAGCUUGCUCCUGUAGAGUCACGGGUUACGCAAAGCGGUACGGUCAUGGACUGUCAGGCUGGUUUUUUGGCACGCUCCGGGACCCGAUACUGGCAUCGGGGA
+>RNA_2
+GGCUAGACUCACCAGGGAGCUUGGGUUGUACAUCGUACGGCUGGCCAUGGGGUCAGCAAUAUUAGUUAGGCUCAGCUCGUUUUUGGUCCCCAUGUUCGUCCCCCCUGGUUACAGUAUCCACGGUUUCUUCUGCGCAAACAAGGUAGCCGGACAAACAAGCUGGUGGCCUGGGGGUCGACAUGUUACACUCUGAAGGGUUAAGUCACUUCCCAUCGACUGCGUGGGCAUCUUUAUACAGCAGCGCUUGAAACCCCAAGAACGUAGGCUGCGCCCCUCCCUGCUCAAGGCCGUGGCUCUGUCAUCUGCUAAGUGAGCGAGGAGUGUGAUACGUUGCCAUUAAGCGUCUUAAGUUUUUCAUAGAUAGUGUAACGUCGCUGCUGAACUAUAAUAAGGAACCGCUAGGAAUCCACCGAUAACGUAGAUCUCCCUCGGAUGAUUUCCGGCUUAACGCGCACUAGCUGAUUCAUAUCAUGAACAAUAAGAUACAGACGUACUCUGCAGCGUGGACCUCACGGAACUUGUGAGUGCUGGUAGUUGCAGCAUGCGGCCGCUAUACCCGCCUGUUCUUAUUGAUCAUGAUCGUUCUUUCUGAGCGCUACACUGCUGGUGGUCCUGAAGCGGCGUAAUAAUUCAGGCAGUUUACAUUGCUUGGGAACGUAGCAACUAUGAUCGAAUUCGUACCCCCAGCACUAAGUACGGAUUAGCGCCAGUCGCGAAUUCUAUGAUCGUGCGAUGAUACCACUAUACGCGACAAUUAAGGUAAGCGUGGAACGCUAGGAAAGAGUGAUAUCAAAUGCGCACUACUGAGUCCCAGGUGUACAGUUACCGAAUGAUUUCAGAGUACGUAUUGCUUGUGGUGGAUCUCCUUGCAGAGAAGCUCACGAGUCCCGGACUGCCCAUGUGCACUGACUUGUUAGAAUAGAGAAUAGUAGAGGCUACUGUCCCGCACAGCUACCGUCCAUAGCAGAUUCUGUUCGCGUUUUAGAGAGGCAACUAGCAC
+>RNA_3
+CGGCUCCAAGACGUCGUAUAGUUACAGACACUAGGCGGUCUAGGGUCGUACUUUGAGCAACAUUGAAUCUGUUCAGACAUUUUACUGCUGAGCAUUUAGACCGGAUCGGGUGCGAGUGAGGGAUGGACGUGCCCGAUCACUGCGGUAACGGUCAUCACCACUUUGGGGAGGCCCAUUUAUUAGUCAGAGUAGGUUGCGAAGGUAAACACCGGCCUUAACAAGACUCACGAGGUCCCGAUAGGCAUGGACUCAGUACCAUUGGCUGGCGUGCAUAAACCAUAAGCUAGCCUGCUAGCUUCUUGCAGAAACGUAAACAAUAAAGUUUAGUAAGUAAGCCGCCGGAAAUUAUGUGGUUACCAACGUUAGCGUCUAUGAGUAACUCGCCAUCGGAUAAAAUUUCUCCCUACUUUACUCCCGAACGCCUUGGGGCAUGACUUGCAUAUACUCUUACACGCCUUCAAAAGCGGAGGGGAGAAUGACCAUCAUCAUGGUGCCAGCCUGGCUAAACUGCUGCCGGUGCGAAUUUUUCCCAGUACACCACAAAUACUGGCUCACAAAGUGUAGUGGGAUUACAUGUGAAGCAUGAUACGGAUAGGCGGGUCACGACAGCUUGGUGCUACUUGUUGGGGAAUAUAAAAUCGACUAAAGUGACCCCCACGGCUAAGUCUGUCAGCGAUGUAUUCUGUUAACCGGUCGUCUUUGACGGCGAGUGUCAUAUUCCUCUUAUAAUUCAAAGUCAGUGGGGCCUGGUAUUAUGCACAGCGCGGCCGCAAGCAUAGCGGAUACGUAUACUCAGAAGUAUAAUGUUUUCGUACCCUGACGCCAGAAGCAACUAGAUAUCGUCUUCGUGCAUCACGGAAUAUACGGCUACUGGCGGUAACCGUUGCUAGUGGUUAGGGGUUUAUAGGCGGCAGGUCUAGGUUCAUUGGCAAACGCACACGCGAUCACUCUCGUUUAUCUAGUCACGAUGGCUAGCCCGCGGGCACGAGCGCCUCAGGUC
+>RNA_4
+GAUGCGCGAAUCAACACCUAGGUCAUUAUACUGGGGUGGCAAAGCUUAUCGUGACUCGACGGGCUUUUUCCACCACCCCUACUCGGACCAUUGCUGGUCGAUCUACUAUAAGCGGCCACAAUGUGAAGUCCGGCUCCCGACGUGAGUAUCUAGAUAAUUAUGAGCAAGACACCAGUUAUGGCAUCUAGAAAUCAGCCACGGCCGCGCGCGGGGGGUACUGUACACUGAAACAGCAAACUUCUAUGACAGCCCGAAGUUGUUCCGGCGUGCGGUACAGAAAAGACUAAACAGGGCCUCAUUCCGCAUGUCACGUCACCGUCACCUUCGGACCCCACCUAGUUUUAGGAAACUCGCUCGUUUGAUAAGCAUAAAAACUUGUAUUGCAGCAAAGGGGUUCGCACGACUAAAAAGAUCCGCGUGUUGUGUAACAAGAGAUAACGUGGGGUCACACAAAAUGCCGACAGCCUACCGUUGUAAUGGCGAGCGAGUUCUGCCCGGGACGACAGCUCUAACAUUUUUGGCGCGUAGAAUAUGAAACUUCAUUAAAACAGGACGCACAACAUCUAUGGUGGUGAUUCAACCUUAGGAUGACCACAUCGAUUACUUGGAUUCUAAUGGUAGAAGCUAAGGGCUCCAUGAGAGACCACUGCGAGCGACUUGACAUGGCCUUUGCGCCAUUGUAUUGCCAUGCAAGAACAUCUAUGAUGGUUAUGGUCUCGUGUUGGUCCCCCACAGAGCAUCGUUAGGGUCGUCCAGGUUCAGGAGACCCUCUAAUUACGAGUCCUCUCAUACGGAGGAACUAGCGCACCCAGCCGCCGAUUCGUAGGGAUAUUAUAAUCGUACCGAGGAGCGCAGAUGAACUCGUGGUGUCUCUGUCAGUCCUCACCGCUAGUCCCGACCAUCGUGCGUACUGUCUUCUAACAUCAACGCCAUCUUCGCCUCGGCCACGUCUCAUAAUUCUUUUCUAAUGGCACAUUCUAGCGACUCAUAAUUUUGUCAG
+>RNA_5
+CGCGGAAGCCUCCGGCGAAGCGAUGGUGAGCCCACUGACCCUCACAGUUAACCCCAUCCGCAUCAUCAUUAAGAUCGCACCCGAAGUACGGUCAUCCGAGGGAAACUGCAGCAGCCUAUUGGGAACAGCGUUCACGACCUCCGCCGUCCGCCGUUACUCCCAUCUUGCGGGUGCCGACAGUAACGCCCUCCCGUGCUCCCCGCCGGUAGUCUGACAAUUUAUCCUAUAGGGACUAGCGAUCAGAUGGGAACCCGCCUCUCCCAUUGCUACCGCUCCGCCGGCCGCUAGCAACGGCCCAGCACAUUCGAAGAUUACUUUUCGCUGGCCCAUACUAGAACCAAUCCAUUCUACGGACAUACGGAUUGGCGCAAGUCCCUCAAAUCCCCUGCCUAAGCACGUUCUGCAGCGGGAGACACUUCAAGAGGGUAGGGGGAUUUAGCAAUGCGAUUGUGGUGUCACGAGAGUACGGUCCAUAAUUUAAAGUGGAGCUAUCCCGCUUAGUGUCUCCUCGUAUGGGAGAGCGAUUUAUCGGAGCCUGAACCACGCAACCAAUGCAAGGAUUGGACUACACGGAUACAAGGUGUGCAUGGCGCGAAUCCCGUGCUUCAAAGAGGCGCCCACUACAUCGACGCAUAGGUAGUAACUUGCUUUCUACAAGUAACCUUUCAGAUACUCGUUAACAUUCCCAUGGUUUCGGCACUUCCGUAACUCGAAACUACAUGAGCAGUAUUUGCGGGUCCGGUGCGCUGAUUUCGAACCUACUAGAGCUCUAGGAGCAACUGUGCAGCGGGGGUGGAGCCUUUGCCACCUAUCCGGUUAAGCUACAAGACACUAUUGUGGCCUCGCUCGCUAACGAUGUCAGUCUUAAUAAGUGGUCAGUGCUCCUCGUAUGCUAUGGGGUGCUUCAACGCCCGGGAAGUGAGACAAUGGGUACGAACAACGCCCAUCAUUAUGGAAAUAACGAAUCUGCCGACCUGUCCGACGACUGUUUCCAAUGUCA
+>RNA_6
+CGCGGAAGCCUCCGGCGAAGCGAUGGUGAGCCCACUGACCCUCACAGUUAACCCCAUCCGCAUCAUCAUUAAGAUCGCACCCGAAGUACGGUCAUCCGAGGGAAACUGCAGCAGCCUAUUGGGAACAGCGUUCACGACCUCCGCCGUCCGCCGUUACUCCCAUCUUGCGGGUGCCGACAGUAACGCCCUCCCGUGCUCCCCGCCGGUAGUCUGACAAUUUAUCCUAUAGGGACUAGCGAUCAGAUGGGAACCCGCCUCUCCCAUUGCUACCGCUCCGCCGGCCGCUAGCAACGGCCCAGCACAUUCGAAGAUUACUUUUCGCUGGCCCAUACUAGAACCAAUCCAUUCUACGGACAUACGGAUUGGCGCAAGUCCCUCAAAUCCCCUGCCUAAGCACGUUCUGCAGCGGGAGACACUUCAAGAGGGUAGGGGGAUUUAGCAAUGCGAUUGUGGUGUCACGAGAGUACGGUCCAUAAUUUAAAGUGGAGCUAUCCCGCUUAGUGUCUCCUCGUAUGGGAGAGCGAUUUAUCGGAGCCUGAACCACGCAACCAAUGCAAGGAUUGGACUACACGGAUACAAGGUGUGCAUGGCGCGAAUCCCGUGCUUCAAAGAGGCGCCCACUACAUCGACGCAUAGGUAGUAACUUGCUUUCUACAAGUAACCUUUCAGAUACUCGUUAACAUUCCCAUGGUUUCGGCACUUCCGUAACUCGAAACUACAUGAGCAGUAUUUGCGGGUCCGGUGCGCUGAUUUCGAACCUACUAGAGCUCUAGGAGCAACUGUGCAGCGGGGGUGGAGCCUUUGCCACCUAUCCGGUUAAGCUACAAGACACUAUUGUGGCCUCGCUCGCUAACGAUGUCAGUCUUAAUAAGUGGUCAGUGCUCCUCGUAUGCUAUGGGGUGCUUCAACGCCCGGGAAGUGAGACAAUGGGUACGAACAACGCCCAUCAUUAUGGAAAUAACGAAUCUGCCGACCUGUCCGACGACUGUUUCCAAUGUCA
+>RNA_7
+UUGUCUCCACGAACACCUAGUCUCACUCAUGUCCCAGCCAGGCAGUCCUUGCUGAGUGGCAACACAUCACCUCAGCUAUGGUGAGGUCCCUACGACGCACCGUUCCGCCGACCCUUUUCGGUUUAGAACUGCUUUGUCAUCGAACGUGAGGGCACCGGAGGUAGGCUUCGAAGCGCGAUCCUGAUAAACUCGACUCAGACGCUCCCAAGCCAUGUUCAACAUCGACCCGUAGUUGAUGGUAUCGGAAACGCGACAAGAGCCGUCCCGCAGACAGAUAUUCCGCCGCCCCAGACGAUACGUGGGGCCGCAGGCCUGCCGAAUGACACAGGGUCUUUAUCAGUCCUCCCGUGCGCUUGUUCCACAUCUGCACUUAAAUGUGGCCUUCUGGGAUAUGCCCGUCCAGCCUGGUUCUAGGGUAGCCCUGCAUCUUCAAGGUUGUACCGGCAAACUGGCAAGGGACAAUGGGAAUAGAGGUCAGCUCACGUCCGGGCCGCUAGCAU
+>RNA_8
+AAUCGAGACAGGCGAGACUCUUCGGACCGGGCCGGCCGCAUUGAGGAAAGGGGUCAGAGCGACCGGUGCCCCAUUCCGGAAUCGCCCAGGCAUGAUGUAAACACACCAACCGUCCUUGCCUGAUGAACUCGCACGUUUCGUGCCUCAUGAAGAGUAGACUCCCGGUUGUAUUUGUCCCCCGUGUGUACUGAUAGAACCUAGGAACGGCAAUAAUACCACUGAGGAACGGGUGGCCUAUCCAAGGGGGGCUACGGAUAGCGGGUAGCCAUACGCCGUAGGGAGUCUUAUUCGGUAAGUUUGAGCUAUAGAACGUAAAAUUAGCUGCUCCGUGUUCCUCUAUAUGCGGCUGCAUUUCCACGUACCUACUCCCGUGAGUUAGCUCGACACUUUAUAACCAUCUCAGUUGCUACGGUGAUAAAGAAUUACGUGCCGCCGACGGGUAGUCCCGCGCAAAGAUGUGGUCGUUUAGUGGGUUCUCCUACUCUAGAGACUUGCUCGUAGCAUGCGCUUAUUACAAUAUCAGUCUGGUGGUGCCGAUUUCUAGCUCAUGACCGUAAGCAACCCAAAGCAUCGUAGAAUCGGAUACAUACAGCUGAAUGAUUACAAUAACUUUGUAGAGCCCACAGUAAAGCCUAAUGAGAGCCGUUCGUUGCCUGUAAUAUCUCACUUGGGUGAUGCAAGCCCCGCUUGAUUGGAAAUCCGGGGCUCGGGUGUCCCGUGUCGUGGGAUGGUACUUGCUCACUGCCAGGAUAAAUAAAUCCCACCUCACGCGGGCAAUUACAGUGAUGACAACAAACGCC
+>RNA_9
+CCCUGCCUGAGUCAUCCGGGAGUUCCCUGAGUUAUGCAGUGUAAAAAGAAUAUAUGAGAGCUCUUUUAGGGACUCAGUGAAGCUAUAACGAAGCCUUCCUCCAUCCCUCAUUUUAUAUAAAGUUGUCGGUAGUCGGGGCUACAGUGCAGUGUCCUGAUGAGCCCGCUGCUGCGAACCUUAGGGAUCUCUUAAGCUCACCGAAGCCAUUAUGUGAGAUCCCCCGCCUUGCUGGCGAUUAAGUGGGUCUAAAGCGUGUAAUUCGCCUCCUCGUAUAACUUUCUUCGGUAAUUAUGGCCGUUCAAUACUUGGAUAGACUUCGUUAUAUACGCGGUGAUUCUAUUACCACGGAGCGUUUAAAAAAAUUACCAGGAUCUCAAGGGCACCCCGCCUGAUGUGUUGAGUCUCUCGAUUGUAGGUCAGUUAGCUAGCUGACUCACAAUUGCUCUCAGCCCAGGCGUCCUAUGGCUUACCCUACAAGUAUGUUGCUCUCUGUACGGACA
+>RNA_10
+CCCUGCCUGAGUCAUCCGGGAGUUCCCUGAGUUAUGCAGUGUAAAAAGAAUAUAUGAGAGCUCUUUUAGGGACUCAGUGAAGCUAUAACGAAGCCUUCCUCCAUCCCUCAUUUUAUAUAAAGUUGUCGGUAGUCGGGGCUACAGUGCAGUGUCCUGAUGAGCCCGCUGCUGCGAACCUUAGGGAUCUCUUAAGCUCACCGAAGCCAUUAUGUGAGAUCCCCCGCCUUGCUGGCGAUUAAGUGGGUCUAAAGCGUGUAAUUCGCCUCCUCGUAUAACUUUCUUCGGUAAUUAUGGCCGUUCAAUACUUGGAUAGACUUCGUUAUAUACGCGGUGAUUCUAUUACCACGGAGCGUUUAAAAAAAUUACCAGGAUCUCAAGGGCACCCCGCCUGAUGUGUUGAGUCUCUCGAUUGUAGGUCAGUUAGCUAGCUGACUCACAAUUGCUCUCAGCCCAGGCGUCCUAUGGCUUACCCUACAAGUAUGUUGCUCUCUGUACGGACA
\ No newline at end of file
diff --git a/tests/gtf_example.gtf b/tests/gtf_example.gtf
new file mode 100644
index 0000000000000000000000000000000000000000..cc1d19daae0e06cbe1588eb0edfc1f9748d61262
--- /dev/null
+++ b/tests/gtf_example.gtf
@@ -0,0 +1,21 @@
+RNA_1	.	priming_site	50	68	89	.	.	.
+RNA_2	.	priming_site	500	518	50	.	.	.
+RNA_2	.	priming_site	980	1000	90	.	.	.
+RNA_3	.	priming_site	500	522	34	.	.	.
+RNA_3	.	priming_site	255	270	89	.	.	.
+RNA_3	.	priming_site	678	698	34	.	.	.
+RNA_3	.	priming_site	990	1000	34	.	.	.
+RNA_4	.	priming_site	400	422	23	.	.	.
+RNA_4	.	priming_site	20	40	60	.	.	.
+RNA_5	.	priming_site	90	112	12	.	.	.
+RNA_5	.	priming_site	40	58	34.9	.	.	.
+RNA_5	.	priming_site	800	812	23.9	.	.	.
+RNA_5	.	priming_site	456	474	56.9	.	.	.
+RNA_6	.	priming_site	978	1000	100	.	.	.
+RNA_7	.	priming_site	100	122	0.1	.	.	.
+RNA_7	.	priming_site	50	70	40	.	.	.
+RNA_8	.	priming_site	20	42	20	.	.	.
+RNA_8	.	priming_site	80	96	98.9	.	.	.
+RNA_9	.	priming_site	400	418	40	.	.	.
+RNA_9	.	priming_site	10	32	60	.	.	.
+RNA_10	.	priming_site	200	218	10	.	.	.
\ No newline at end of file
diff --git a/tests/nrcopies_example.csv b/tests/nrcopies_example.csv
new file mode 100644
index 0000000000000000000000000000000000000000..debd32888f00d3f82e8bfa7418ef8a3d7234ab59
--- /dev/null
+++ b/tests/nrcopies_example.csv
@@ -0,0 +1,10 @@
+RNA_1,Gene_1,2
+RNA_2,Gene_2,90
+RNA_3,Gene_3,6
+RNA_4,Gene_4,8
+RNA_5,Gene_5,25
+RNA_6,Gene_6,23
+RNA_7,Gene_7,5
+RNA_8,Gene_8,5
+RNA_9,Gene_9,1
+RNA_10,Gene_10,7
\ No newline at end of file