From 55588f1d3fe6c307a0ebd102927202ff8490a36f Mon Sep 17 00:00:00 2001
From: Suvi <s.selliah@unibas.ch>
Date: Tue, 4 Jan 2022 09:58:20 +0100
Subject: [PATCH] Final Version 1

---
 .DS_Store                                     | Bin 6148 -> 8196 bytes
 build/lib/generatecDNA/__init__.py            |   3 +
 build/lib/generatecDNA/generatecDNA-cli.py    |  37 +++
 build/lib/generatecDNA/generatecDNA.py        | 257 ++++++++++++++++++
 generatecDNA.egg-info/PKG-INFO                |   4 +-
 .../__pycache__/__init__.cpython-39.pyc       | Bin 0 -> 262 bytes
 .../generatecDNA.cpython-39-pytest-6.2.5.pyc  | Bin 0 -> 5647 bytes
 .../__pycache__/generatecDNA.cpython-39.pyc   | Bin 5093 -> 5205 bytes
 generatecDNA/generatecDNA.py                  |   3 +-
 requirements-dev.txt                          |   4 +
 requirements.txt                              |   4 +
 tests/.DS_Store                               | Bin 6148 -> 6148 bytes
 tests/.coverage                               | Bin 0 -> 53248 bytes
 ...t_generatecDNA.cpython-39-pytest-6.2.5.pyc | Bin 0 -> 1557 bytes
 tests/test_generatecDNA.py                    |  23 ++
 15 files changed, 331 insertions(+), 4 deletions(-)
 create mode 100644 build/lib/generatecDNA/__init__.py
 create mode 100644 build/lib/generatecDNA/generatecDNA-cli.py
 create mode 100644 build/lib/generatecDNA/generatecDNA.py
 create mode 100644 generatecDNA/__pycache__/__init__.cpython-39.pyc
 create mode 100644 generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc
 create mode 100644 requirements-dev.txt
 create mode 100644 requirements.txt
 create mode 100644 tests/.coverage
 create mode 100644 tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc
 create mode 100644 tests/test_generatecDNA.py

diff --git a/.DS_Store b/.DS_Store
index 0be791f97a0ff41cfcb9194d1ac6fbdea9665f90..258757630b3ea2ae538ac895182db1eba76b1f92 100644
GIT binary patch
literal 8196
zcmZQzU|@7AO)+F(kYHe7;9!8z0^AH(0Z1N%F(jFwBK#op7#IW?7*afQ@{^Nt@{^!4
zqts{!jE2By2#kinXb6mkz+ejjMu>wraMb${*->&d1V%$(M1}yUd{BV2?HL@PbOVG2
zNii@oFo3%Nj0_AcumEBN_X8L}av-fB8l)9OgS0X*f>>ZPz*-p?p;{Ti-4KvI0kBbw
z3}EdHj9{BV;$ZCzj9{A?7#Jbi85p59GeUbPj1cV%j1cV%j9}ZrMvfArAut*OXdwXV
zyR$MRF_bc7GUTB2-v$1o0R~Xkc4tUs$YV%lC}K!tC}Buth-XM<aAELcaAeS9C}5~W
z>i-KcFfht8Ffh!67z4JKhanwJKU6oeK8Sh*$-uyXQ(sbfaRGF^VP{f7PG)h5fx$IK
zCT12^Hg*nn4sMRv;Eeq8;F83W(qgB?qG%8=BtJhV3C2!L3d>9_j~5Ve&d)1J%*;zI
z0x1d3Oi4{jEQ$%w%uC5Hcgio#ODP8Hg-UR6a&X2ANLE+tT9}#WC>R<V)aocyn;RME
zD3}<S*Vb}!h$`z_2gPUS<mTmfgS`$fxfpmMd^p*I&x2*bMR_^-dFkLHlYv19R{%2T
zGUPF&G3bFv0)Z)t(+oX^REBhhbOv39OprMY`2<YjV(?+`WN>D1W$<GNW^iTDW5^|#
zgg6<37+e_~8C)2AiPRy&P{feRPzX+<AXY9o36(GuGw3p;Fr+e+G3YUrFjO#<5OA#k
zPIHLU&%sd4kjhZPP|8q%nb;A@6~ba*U|<DjkYa`shGGIPLsB_h5;T&RP?GRy{SVLg
zqm0oI7|;-4hS)9v>i@ekFyI>hho~AQM?+vV1cqe@FtWG=yEs8xnAm&>>T84g(+N;X
zP#+xB9cKjf%OUzeN<dwEaCe*$GN}kw&By@h$ulxQdh(163?Qwe0|DUB86`$TU|>Q3
E04L~;NB{r;

delta 485
zcmZp1XfcprU|?W$DortDU=UznVBlbY&@5aN3&ketDT;D~<iTP^42cXS45<vs3@#fB
zmoqbRPwp3#5%J8)Pfp6oPlD=mWME*>o17=0GWnDMxA3l{f}G6a5(9&4j7-cdtZeKY
z9FunnN9%HO#tTSQSL+&@o15t<7#W+_>L^rOn40P+n3$W?)^c))D(hPZ#b@W_=H>TH
zb`X(g?48^%7z1+PdXNSG!GM8b@^3*a0R{#}SrkE6Ar+8dH;Q1TkWv-{1LIF*L9l;|
z8B!Ta7)lum81xtl7%H*&XCBzL{|q3&3J!u|h7yKiG?gHS!BtM~6_%u`?-N9%Cl=K#
uX6N7#WCpo|fk6NiGp-B_EL<B4zcWwfm+=Gz4+9e<4nfh%usNP*4l@A$pMA{$

diff --git a/build/lib/generatecDNA/__init__.py b/build/lib/generatecDNA/__init__.py
new file mode 100644
index 0000000..dde8fb9
--- /dev/null
+++ b/build/lib/generatecDNA/__init__.py
@@ -0,0 +1,3 @@
+"""This is the __init__ function."""
+
+__version__ = "0.1.0"
diff --git a/build/lib/generatecDNA/generatecDNA-cli.py b/build/lib/generatecDNA/generatecDNA-cli.py
new file mode 100644
index 0000000..46009bf
--- /dev/null
+++ b/build/lib/generatecDNA/generatecDNA-cli.py
@@ -0,0 +1,37 @@
+"""Command-line interface client."""
+
+import argparse
+import generatecDNA as gn
+
+
+def main() -> None:
+    """Entry point for CLI executable."""
+    parser = argparse.ArgumentParser(description="cDNA generator")
+
+    parser.add_argument(
+        "-rna",
+        type=str,
+        metavar="",
+        help="Path file to fasta file with RNA sequence")
+    parser.add_argument(
+        "-gtf",
+        type=str,
+        metavar="",
+        help="Path file to gtf file")
+    parser.add_argument(
+        "-cnr",
+        type=str,
+        metavar="",
+        help="Path file to copy number file")
+
+    args = parser.parse_args()
+
+    Generator = gn.GeneratecDNA(
+        fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr)
+    Generator.generatecDNA(
+        fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr)
+    print("Done")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/build/lib/generatecDNA/generatecDNA.py b/build/lib/generatecDNA/generatecDNA.py
new file mode 100644
index 0000000..c134b51
--- /dev/null
+++ b/build/lib/generatecDNA/generatecDNA.py
@@ -0,0 +1,257 @@
+"""Module to generate cDNA copies.
+
+Class:
+    GeneratecDNA: contains one method
+        generatecDNA: takes as input fasta-formatted file &
+        gtf-formatted-file & csv-formatted file,
+        outputs fasta-formatted file with cDNA ID and unique cDNA sequence &
+        csv-formatted file with cDNA ID and copy number
+"""
+
+import random
+
+
+class GeneratecDNA:
+    """Contains function to generate cDNA.
+
+    Args:
+        input files: path to fasta-file (RNA_ID & RNA_Seq),
+        gtf-file (RNA_ID & Priming sites & Probability),
+        csv-file (RNA_ID & copy number)
+
+    Attributes:
+        fastaFile: RNA_ID & RNA_Seq
+        gtf: RNA_ID & Priming sites & Probability
+        cp_nr: RNA_ID & copy number
+    """
+
+    def __init__(self, fastaFile, gtf, cp_nr) -> str:
+        """Class intructor."""
+        self.fastaFile = fastaFile
+        self.gtf = gtf
+        self.cp_nr = cp_nr
+
+    def generatecDNA(self, fastaFile, gtf, cp_nr):
+        """Generate cDNA.
+
+        Args:
+            fastaFile (str): RNA_ID & RNA_Seq
+            gtf (str): RNA_ID & Priming sites & Probability
+            cp_nr (str): RNA_ID & copy number
+
+        Returns:
+            cDNA.fasta: cDNA_ID & cDNA sequence
+            cDNA.csv: cDNA_ID & copy number
+        """
+        # defining global variables
+        gtfFileInputDict = {}
+        csvFileInputDict = {}
+        fastaInputDict = {}
+        # READING INPUT FILES / PART I
+        # open gtf file
+        with open(gtf, 'r') as gt:
+            # read gtf file
+            for mygtfline in gt:
+                currentGTFString = mygtfline
+                gtf_list = currentGTFString.split('\t')
+                gtf_seqname = gtf_list[0]
+                gtf_start = gtf_list[3]
+                gtf_end = gtf_list[4]
+                gtf_score = gtf_list[5]
+                my_temp_list_1 = [int(gtf_start),
+                                  int(gtf_end), float(gtf_score)]
+                if gtf_seqname in gtfFileInputDict:
+                    my_temp_list_2 = gtfFileInputDict[gtf_seqname]
+                    my_temp_list_2.append(my_temp_list_1)
+                    gtfFileInputDict[gtf_seqname] = my_temp_list_2
+                else:
+                    gtfFileInputDict[gtf_seqname] = [my_temp_list_1]
+        print(gtfFileInputDict)
+        # open csv file
+        with open(cp_nr, 'r') as cp:
+            # read csv file
+            for mycsvline in cp:
+                currentcsvstring = mycsvline
+                csv_list = currentcsvstring.split(',')
+                csv_trans_id = csv_list[0]
+                csv_count = csv_list[2]
+                csv_count = csv_count.replace("\n", "")
+                """ trans id should be always new,
+                    otherwise unhash csv_current_count
+                    in defining variables section.
+                if csv_trans_id in csvFileInputDict:
+                    csv_current_count = csvFileInputDict[csv_trans_id]
+                    csv_current_count += csv_count
+                    csvFileInputDict[csv_trans_id] = csv_current_count
+                else:
+                    csvFileInputDict[csv_trans_id] = csv_count
+                """
+                csvFileInputDict[csv_trans_id] = int(csv_count)
+        print(csvFileInputDict)
+        # open fasta file
+        with open(fastaFile, 'r') as fa:
+            # defining variables
+            fasta_id = ""
+            fasta_seq = ""
+            fasta_id_found = False
+            fasta_seq_found = False
+            # read fasta file
+            for myfastaline in fa:
+                currentfastastring = myfastaline
+                # find fasta ID
+                if not fasta_id_found and not fasta_seq_found:
+                    position_of_start = currentfastastring.find('>')
+                    if position_of_start != 0:
+                        continue
+                    elif position_of_start == 0:
+                        fasta_id = myfastaline
+                        fasta_id = fasta_id.replace(">", "")
+                        fasta_id = fasta_id.replace("\n", "")
+                        # I don't know, how the sequence id is formatted and
+                        # which part thereof is equal to the transcript ID
+                        # in the csv-formatted file and gtf-formatted file
+                        # temp_fasta_list_1 = fasta_id.split('\t')
+                        # fasta_id = temp_fasta_list_1[0]
+                        fasta_id_found = True
+                        continue
+                    else:
+                        print("FASTA: Start position in fasta file not found")
+                        break
+                # find fasta sequence
+                if fasta_id_found and not fasta_seq_found:
+                    while not fasta_seq_found:
+                        zero_position = currentfastastring[0]
+                        if zero_position == ";":
+                            currentfastastring = fa.readline()
+                        elif zero_position == ">":
+                            assert False, "FASTA: No Sequence after headline"
+                        else:
+                            fasta_seq = currentfastastring
+                            fasta_seq_found = True
+                if fasta_id_found and fasta_seq_found:
+                    fastaInputDict[fasta_id] = fasta_seq
+                    fasta_id_found = False
+                    fasta_seq_found = False
+                    fasta_id = ""
+                    fasta_seq = ""
+        print(fastaInputDict)
+        # COMPUTATION OF INPUT FILES / PART II
+        outputFastaDict = {}
+        outputCSVDict = {}
+        # starting Loop1: read fasta dict
+        for (k, v) in fastaInputDict.items():
+            rna_seq = v
+            # search for transcript ID in gtf-file to get
+            # priming sites and scores
+            if k in gtfFileInputDict:
+                gtfList = gtfFileInputDict[k]
+            else:
+                assert False, "Fasta-ID from fasta-file not found in gtf-file"
+            # Excluding priming sites within 40 bases
+            # at the beginning of the transcript and
+            # ordering priming sites on the RNA sequence in gtf-dict
+            # sorting
+            gtfList.sort(key=lambda x: x[0])
+            # elimination
+            for i in gtfList:
+                if i[0] <= 40:
+                    gtfList.remove(i)
+            # search for transcript ID in csv-file
+            # to get copy number of transcript
+            if k in csvFileInputDict:
+                actual_count = csvFileInputDict[k]
+            else:
+                assert False, "Fasta-ID from fasta-file not found in csv-file"
+            # random choosing
+            scores = []
+            for i in gtfList:
+                scores.append(i[2])
+            print("gtfList: ", gtfList)
+            print("scores: ", scores)
+            my_weighted_list = random.choices(
+                gtfList, weights=scores, k=actual_count)
+            # counts per priming site
+            counts_per_priming_site = []
+            for i in range(0, len(gtfList)):
+                counts_per_priming_site.append(0)
+            for i in range(0, len(gtfList)):
+                counts_per_priming_site[i] = my_weighted_list.count(gtfList[i])
+            print("counts: ", counts_per_priming_site)
+            # Loop2: through gtfList to create cDNA starting on priming sites
+            # according to counts per priming sites
+            counter_cDNA = 0
+            for i in gtfList:
+                cDNA_3_5 = ""
+                counter_cDNA += 1
+                cDNA_ID = "-".join([k, "cDNA", str(counter_cDNA)])
+                if counter_cDNA == 1:
+                    end = i[1]
+                    # create 3' to 5' cDNA
+                    for j in range(0, int(end)):
+                        if rna_seq[j] == "A":
+                            cDNA_3_5 = cDNA_3_5 + "T"
+                        elif rna_seq[j] == "U":
+                            cDNA_3_5 = cDNA_3_5 + "A"
+                        elif rna_seq[j] == "G":
+                            cDNA_3_5 = cDNA_3_5 + "C"
+                        elif rna_seq[j] == "C":
+                            cDNA_3_5 = cDNA_3_5 + "G"
+                        else:
+                            print(
+                                k, rna_seq, gtfList, i,
+                                cDNA_ID, counts_per_priming_site)
+                            assert False, "cDNA synthesis failed, position " \
+                                          "is not A,U,G or C in transcript"
+                else:
+                    previous_end = end + 1
+                    this_end = i[1]
+                    # create 3' to 5' cDNA
+                    for j in range(int(previous_end), int(this_end)):
+                        if rna_seq[j] == "A":
+                            cDNA_3_5 = cDNA_3_5 + "T"
+                        elif rna_seq[j] == "U":
+                            cDNA_3_5 = cDNA_3_5 + "A"
+                        elif rna_seq[j] == "G":
+                            cDNA_3_5 = cDNA_3_5 + "C"
+                        elif rna_seq[j] == "C":
+                            cDNA_3_5 = cDNA_3_5 + "G"
+                        else:
+                            print(
+                                k, rna_seq, gtfList, i,
+                                cDNA_ID, counts_per_priming_site)
+                            assert False, "cDNA synthesis failed, " \
+                                          "position is not A,U,G or C " \
+                                          "in transcript"
+                # reverse sequence to 5' to 3'
+                cDNA_5_3 = cDNA_3_5[::-1]
+                if counts_per_priming_site[(counter_cDNA - 1)] == 0:
+                    continue
+                elif cDNA_5_3 in outputCSVDict:
+                    new_count = outputCSVDict[cDNA_5_3]
+                    new_count += counts_per_priming_site[(counter_cDNA - 1)]
+                    outputCSVDict[cDNA_5_3] = new_count
+                else:
+                    outputFastaDict[cDNA_5_3] = cDNA_ID
+                    outputCSVDict[cDNA_5_3] = \
+                        counts_per_priming_site[(counter_cDNA - 1)]
+        # WRITING OUTPUT FILES / PART III
+        # write fasta-file and csv-formatted file
+        with open("cDNA.fasta", 'w') as myFa, open("cDNA.csv", 'w') as myCO:
+            firstLine = True
+            for (k, v) in outputFastaDict.items():
+                headline = "".join([">", v])
+                csvLine = ",".join([v, str(outputCSVDict[k])])
+                if firstLine:
+                    myFa.write(headline)
+                    myFa.write("\n")
+                    myFa.write(k)
+                    myCO.write(csvLine)
+                    firstLine = False
+                else:
+                    myFa.write("\n")
+                    myFa.write(headline)
+                    myFa.write("\n")
+                    myFa.write(k)
+                    myCO.write("\n")
+                    myCO.write(csvLine)
+        return myFa, myCO
diff --git a/generatecDNA.egg-info/PKG-INFO b/generatecDNA.egg-info/PKG-INFO
index ccabcbf..fb1f429 100644
--- a/generatecDNA.egg-info/PKG-INFO
+++ b/generatecDNA.egg-info/PKG-INFO
@@ -1,10 +1,10 @@
 Metadata-Version: 2.1
 Name: generatecDNA
 Version: 0.1.0
-Summary: Generates cDNA copies of RNA transcript from internal priming sites
+Summary: Generates cDNA copies of RNA transcriptfrom internal priming sites
 Home-page: https://git.scicore.unibas.ch/zavolan_group/pipelines/scrna-seq-simulation.git
 Author: Suvarnan Selliah and Ruth Eneida Montano Crespo
-Author-email: r.montanocrespo@unibas.ch
+Author-email: s.selliah@unibas.ch,r.montanocrespo@unibas.ch
 License: MIT
 Platform: UNKNOWN
 License-File: LICENSE.md
diff --git a/generatecDNA/__pycache__/__init__.cpython-39.pyc b/generatecDNA/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5645f9ca76b976ab3f1bb9645b08638f1aa6a6fa
GIT binary patch
literal 262
zcmYe~<>g{vU|`TbaWc_{fq~&Mh=Yt785kHG7#J9ec^DWNQW&BbQW&EcQ<#DoG?}a9
zLNYRo6*7wzN-|Ov;^Q;(GE3s)71B!cl1no4^Yp4%4fG844E!{iZn4M5r{pKc$KT?P
zk1tCtD$dN$i;rK)P{ho@03m**J6pws7N-^!$K>Ycl@yhhWW>1SCzs}?=9Lu3xM!C5
zBqqfK6y>KECFbU4=A|p7<rgXVWTvGm1Se;v<|U^V$0VoZCAz2Pr4}WY<QK)HLs+TF
hE`E+NFt_OyRNmsS$<0qG%}KQb`K*|Ufq{XA2>?}@OjrN_

literal 0
HcmV?d00001

diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a0d7794b293b572e92a4216f10ab30a25418c393
GIT binary patch
literal 5647
zcmYe~<>g{vU|=}ycsVgehk@ZSh=Yuo85kHG7#J9eEf^RWQW&BbQW#U1au}nSz%+9%
z3ljq)LoRC+8zV%9J&N6(A%!`GrG+7drI{&;BZVoLL6bGHnvsDaATc>RF+EiwIX|x?
zF*C1Np|m73C$l6owOFAfUm-m;FSRJKBvm2V#m|vTA+cDYAhD=KAwNx_BqLR!JijPA
zEhoPmq_Q|Ow=@T&vN$=&&#^eQP>+ktIVZ8W*osR*K|#SCY7odAD}-hFd8rDysU;ct
zDPUC~l`tzIDoYZxQ;R|NX66-?mMEkp7MCRIrsWsqCYF?>rYNLk=A<g9A@r7{AtZGn
zk_ySiWe7pA4js6L{L+$w(vo8A=9XucWPm-c;OU}}n3tkZnwMEv3JD8^;?%;@)VyS5
z+c7Otz%U{?zo1efuQWF)wTSB_C{F#7K_oJU$F3R!14Al96k`elC}yLWQ(01&<}iZe
zktK>Xg*An(g&~S9g*})-lj9Z-atJ3zGcqtZ!(ue8G%vX%Gd~X$O|Z}f`%8}t91xC0
z>CjjLg+DZ)GjmdltrQ9pOEN&(py3V*ZH*v5$9PW{1vLc_BRI8C6A_&0pzudk6i}3z
zo0*rcP@GwkTCAX^5Kxq#l$ZocVwFf5!9j<r5h<iJq3$dxDauSLElEXkE!bghnK`Le
zVCNva3+y+LkJ3xhkYw>V2(BZ!AU>}MNe_}+z-lxZZ?P4nmXsFdC9@z!9f-}uz`)=P
z%9~;g3=B1lH4O0#HB2=O@r*UhH4O1gDU87knoLy!&Y-NJkeOFfRGM6pU!>=!$$X0w
z?wMQ6=_P5mSi#QFWVyvsoSKtX1Twvdm4Shwh>d}PVI@Nm2Ll7cuWV<ln9$<XqT-m`
ze2{4+88I&T$)&lec_qa$?wKV%iAga5MfvGPiMil-NXsu$@X1U|RR~Vb1f}2NnB<ha
zL}=p4FN%RB5K#6*;pi1q-r|Ul&&<m#iI0~B`4Hp`1|~K}HpVJZr~#=UZF(?OD1l?k
zz`y`1*1&<YjGciYlcAO+lcAP1lcAQagrSBhg|V5bzo(YHhTVlBwyc(;hNG6VhI0X9
z3e!TyTCN(d6lO_=TJ9RI6c$N_TAmuN6jn)wTHYG26gEkQTD}sd8lGmx5~dp7X2ufc
z8op-6bmm(A8txjFGKQj#8kQPvNrqa18i55YHT(-185wE>Kw>rwg)%k#>5MRTI713M
z3j+&7GczMYo`4}k@d^V5Mlg(IU}Pv^tzl_qWMn8|s9^@XHlS9pM$m;J)~Z&hMyOV}
zMtA{Z3dcgmT9FzNuxmwYL{c~<8EVC9#1^onaHViBWU3V_VX6^pW~^bY5w&3`JcVMT
zkRiha#$p*XJ6WN2GJ@=!z}Rn5E1tq#E0MxoE1AMmE0w}iD_tY*!Z3j`R-jg<My6J_
zMrs0M@xB^qFe$r$J%x86W360`TnckG^907Ch6#*?0eN;{KGOuoq75}NwGuTF3)oV4
z!7hZbA+D83;j5JbtLH0WP2mT*gppwaQ(;Pt)C8s?n;Pi}OvNx#7VKyF8u=7~Y>>Hc
zYQz_CEM%ya&GV^|PvOmGn!r?Kkit`|P@^z`saUIoa{*V1;6la}A#sKj;S$aT+%*ad
z88aEh7-|(ucuF`I@YX0UWNc<^VoYJLRmkJ0k*$?X;j5LNz*u++<inx>kPnLjY9wpe
zY9wqJU||6Z8<49pLYJwy3N3WmKz_<(s8z~js8z0ETfn!F!G&Q0bL_SlrdpL+)f&}W
zwHg&rDw@Dlw6BJxMn#gLRz1%G?vGDke~1w4j~ewX{uI#^u@;6D@m^3mRCi&Bm8;dL
z(Ex>BibOW!1m+^H8ubMNH5#CFI)S;6t45`UwTxi`bCF#QE7*Qbxc$t<MPU0SiM2nS
zp;oH~WT!~2Mvc}2mKqIE3WeHKSHhYiRioa_1WM~E(luIOmSC1pje3n%jb@5WFLSMS
zIzz2aiEs*YiAarlGh;K83q!0xtwxQ`0+tkJs5z(5<Rxk~YP1)K)~MHLNHQ#BtkJ2_
zuwj_MT==SlHAS{Y2V_eMbFD7OT@#pMEowDVxNG%lbW`Na8ESP?<QIt5sH7;Ah}Y;g
zGo~{yWUSQ($z?N5U@a1;(MXY#WT-U&+sIU70CmxZTEiN(8be8j6vb?&2`ohwHF_yZ
z;taKVd6S^x%3$#cHF_zEVDT!bxC&T&MU7sHDp))TDy|9^KTxBWq5>B8Dq*crsZj&@
zsYbs>ry11Pf~0NMVm5G`s1X|{6Icpk@~+evq{xXg)EYs3WLzR#!c=1fF4K&$`iiy4
z538?Oi_)<AinXW?tFKs#rV-_<J0$vwwa}<WFNHmYJw?40RCb%x=z!u)f<clYg*lsP
z0$UN+1eQXvJeL}i8s##E32a4XHOe(6k_@$`HKqu)<`BMejfo9Io<xm)jS`53kO%3p
zVE~ylfvIp#Izz2lIzz2_2}6oTie@v@1jc@)T8k2f6fF?1|68qP3SX^NjS?iE$KHVC
z@)Yg`Vkx{es_CG*#yW*#fmn@diFl1ND7Q5;f<<j=tR}D*G1QnXkf^av;i^#qr9?)C
z8uJA#H8$ytpp;)@u|TrM8pcZDg0X5;z%0uJk~KCURtit8RbCXxq!ccwSu7xNbhD65
zz-ua09o+m0Y=tctB_Dfn33ADYMScphe2qnoWg4iPpq;40$iM)r-;vvmptcoi+YuxJ
zZ^S5Q6qgifBDJER?L}}43ZxQ*(@WAY)DhD{1L*}fk1%v2x7FZQ1%VrDNbN?jeiuJS
zJ+O1Fz}|wiYC$a+c<UZ+09<`?aT$tgr1m^W4G7<2Ec(UBS;WY|!0-~(n12asxxECn
z-28qq>bwNC5N|PZ-C}V2#b_5&rR(Mx9O7uD5L^Q4Srz0LXM)@6nR$>-3An?eke6Sg
zkd|MXm-36zx=InM!!KVU7}g6(Oe;w(QpiY6Ov%a2ORW;%vP&#3PAw`?P}L}?G}J7P
zVlJpOtkQD>_n$mn6w-?Fb0HQ$x=C;g6f*M^V9oeskQZR>TM!%M^k7ia57hLmVMt+=
zWB@g1{lI<&w?{P@Z!uPYSrCGgfq~%`hfPjmZc<93-E0O1hR+}atF&PW3KZOW$c;`t
zo1FaQ#GGO~J(w;{##_wUsg*A^7#J9c^#ZIzQN@{FlID|HTw<kA#ZjD`Uj*vaXfoYm
zFHg-(&nPLr#h6{ik(^(eS5j=H@Dk+5UyQm@EFf3iVsyO47;=j-^cJK0Ek@@mdyt?)
zab;dfMrv^;sLz<0lbWKVfQYhUa6CEcgzC5}<QFM8gTk_;C^4@%xhS)s<mLbW|NmEU
zflPs<x?7CpRUBY$a&eia)Ge0$g4DcQti=U6nI*TFGxJJrv8LtZCzjk|ODrfz%}cq(
zT2KTMWiLuC$Vp62y~UE2nU`{lqX-)7x40Y%Doav}OX3rYZVARiddcySbdXz|ev31{
zI591?D7B#I7M~+Hb!6t}xfT`W7u^zusZOmZ$Vtpg1SP>+teGXLxy83wit~#~K>eNE
z{Ib+rY(<HADfzj#*poBzGm}$`Z?P67=B20JV$K1%9UPLkShDgn^KLO0mlWM%ORXpY
zJH5Opvm{m1wul|nfdOR*L{}zCAiX3F)Q9r~_3>RYlS`rml8ehwMfkuO6|UkIQ+i1h
zXKrPBNg61@M+qdC78RxDmAHqv1(y_M=A}n*q?e?{=VTU_L~(;y#i@mPiMgp!p!5)5
z49aU!>>%;fycDPyC~-vb<yOX*q~;cYb;cW_@r`aVB^Q9KO)f4&u{OE5tQcx-a&Z}0
za}*DV1&;ps%#<ij5DyYOOlgTx9ANK&B_OQg)WRr;ALBDq;=y@6iXSco72wXT1Pg)f
z7J_;iECRJ%5SAF@^V6Wd<gH3A%8!SMM{zq8R6^5)0fYldCq_~Hkiige5dn?_UWkBm
za2SYNBm>IIjAc>mMR|$wAm@U@50tfTF=j^bBqo=XCg#M0eHg_C$!Spnxs~ydjFy@L
z2^4X#g5vms)S~zT$k0GMXlNjc2P~0V6c5S}QQ&M5ZyawL#SY@fd%E0W1|=iLtSFv>
zqSUg?{L<ohP}FghWMo2FV1=gf#<w{0Qp=$Z;mfUbON@t(ZKW20qSM(QiO&L(k7CKK
zboP(pOv@}PF7e6COO4`y6{Jz@$;D+LAxLQkD)x%NMH&xsk);eO65T+B2QLdF3nL3N
z4-*F?3nLdJ2Nbd~^89CE0`a&QxtO#VIhc@D@Nnxfaxn2Rf%P)7FmW-mF!3;QF>*1o
zF>*1oF!C_+u(2?*F>x@mG4U|6F>-+AxEQUNSQt4Nc^E-<f%IxHa)8WW5@KXw2H9i5
zD!|0S$j8VB;ek~Pf%UR5vi)acW?>X!<YN?K6l3IK2Duhwnh40%OdxSCMm|P3W?}-_
z@{fhtj+ujzgGmfo4#bAb+pvI41i71oQGk()iGzvjKOYOobyAEhi~@{&jAD#zi~@`h
z|AGC;B!tA{V)SFIlEhg#_-V3(1|dK>IX?atSA2YKeoARhYJB`Hp7{8}(!?C7410Wh
zN`7*Dd=Y5Wp$IhWP~-tBc)$^eAV5)B1gg~dK`{s#&){HW;pJlki)nHe`GAr<M^b5K
zPDy57@hu)u-huNZ;~}N39;AfL%+J#+N(C2lw>aTKx7eX#MfxE7%|QgnLZtKrb`HoT
Vw>WGd-mwEErD9Ma$-=F~3;<R;5WfHb

literal 0
HcmV?d00001

diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc
index f4322b4d9bb4b2f764b84c23b40cd483e44a35fc..9866867732759db27f44e64bd5c8800e4213d946 100644
GIT binary patch
delta 618
zcmaE=epQ1nk(ZZ?fq{YHwBzN(6rGKHtSpS#lSNqkCl|7)G0IM!!D7T`&A`9_Qsd0P
zz)*Z?^9`0(*7|gYT9b5!TGJAS6txufW~K>@{YtfFB@8JVAYT8sTJsdXT8kQm1)K{R
zTo__+#4yz=)hefOFAz-Otx-;ATF6*ynZmI^utvE=s7A3yNs^(N5iDv|V=;lPNT<eR
zfpCpw3RjI%I`cwCMur;G1uQjI>5O2u*@AkJ8cQfsB!vscs!;;7%om8%Sb<n6Jhc{i
zQ6QsIxS)oyfW*-ZLpA}gsZg8D7l?2{ESSJrxM8v)w=nMl0|rJgjAUSB0Eq}6Ko+Sn
zt1(Yw+MLX7!NfRw@*Lg{##@tB`6e-nPTs-SCcu_jQIeXMa*MURD6=G0(|WQS|9-|>
zli38E8E;MY5}3;<Gx>$Ua_w83d8y^`$@!&uB~g62m2Qdg$@zIDiJ5t+MIgSjKY~Ab
zmY}<^GAO#-7#SECcv%=(7+IKkm?nP_RAgkDEG87n$TT@eXqqlaLW+@vQGk(;QH+s|
zQGk($kqZjhn1qmcT#SB`6@`7dEf^RWia@EM$a!+5u#7A}$QCvR1_lmB7G6HKA|?g~
U22JK7caSop$(w`?n7KKa0i|`8OaK4?

delta 493
zcmcbr@l>5Jk(ZZ?fq{Wxs`}|fFTIU?tSpR`lSNqkCx^0_Fe*>(Vlm>gWME(bsc~jt
zU?}d|e2S%&wZ4QQMJ+|WnW_Iptw{+(iUydyr`9xuuhy(aVFBkt1{a3dc`;12O0~)<
z+zSL#cx#l?nHDnEnx}9q5Uf!y5vozFQIce6W(12`)R;|RD`KlLSs+|vzL1fT0mMq-
zg0X6pz%0`R!Zj8kR!SsKty!K8nj*X!p@zYYn7~@NESw>QorQsgp_!SHAy3GVVFFum
z%;aotdHxy$21YQ9WME`SVc$HL+k%O4-Q;V$9gI<v<M}2ricJ2**T%tGUX)pqs%bg7
zgMU9`)MO6<XU3?>JpywXr6+3%F6WNo%u6kgPtGsRE1CR9&|O#)6n++r3=9l>EQ~CS
zEX+J?lf#4*8QCUR2!%4TO+F_yO<sbLg;9W!k5P<~jgboq*_dD~uE`UHeYwpU7#NB`
u5mIC~`K7RotSHD_HU<U;4n`JUKDHtz1_lOA<{~GMGQ-JAA_mMt983V;baL|m

diff --git a/generatecDNA/generatecDNA.py b/generatecDNA/generatecDNA.py
index 0f470ab..c134b51 100644
--- a/generatecDNA/generatecDNA.py
+++ b/generatecDNA/generatecDNA.py
@@ -1,5 +1,4 @@
-"""Package contains utilities to generate cDNA
- as part of the workflow to simulate scRNAseq.
+"""Module to generate cDNA copies.
 
 Class:
     GeneratecDNA: contains one method
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..24a31c7
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,4 @@
+coverage==6.2
+flake8-docstrings==1.6.0
+flake8==4.0.1
+pytest==6.2.5
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..65750d9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,4 @@
+coverage==6.2
+flake8-docstrings==1.6.0
+flake8==4.0.1
+pytest==6.2.5
diff --git a/tests/.DS_Store b/tests/.DS_Store
index 9f52ee6408fed3d2e48fd284a905747184f46c18..84fa1862bbbd180beb9a94b147bfa4b03c799777 100644
GIT binary patch
delta 74
zcmZoMXfc@J&&a+pU^g=(`(#B{sm*FE9~d>+8A=#Z8HyQ781xtl7%Dw;@{^Nt@{<@C
b7z7v?7#tZG80P&4gN=nJ7&o(X{N)D#|JxUb

delta 32
ocmZoMXfc@J&&akhU^g=(+hj#nsm*FE9~d{5ePrCs&heKY0H>V_od5s;

diff --git a/tests/.coverage b/tests/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..929bab39819360bc383b3a3b737bcc57517146dd
GIT binary patch
literal 53248
zcmWFz^vNtqRY=P(%1ta$FlG>7U}R))P*7lCVBln6VBlpy0Colj1{MUDff0#~i^<8L
z7w*B!AI8ALHjRNljyHl|muD%jKKCu2GS11|%h|IybJ(Wg(mX0Y8UmvsFd71*Au!TH
zpfQk#U0hO<v8lEsF)1gtq$n{twFpA;ItRHrhPWz(I6C>bDnP^(G`JKL6cluI6&w|c
z^2-$pQi~MQGILTDGV@Y0lM_oa^U@VcGE!lxON&!eKsBC1Nk(D`SfL(RKS)b_W{N^)
zUP)?tYLP-gQD$ypQKdq5YNZZX0o1mV)QS>_Kw5rLYG!&KNQHt1RJ*1^QEFOhQEFat
zYBAVw1&z!UO)kymdM<WxVPVGhRIr~Ci;@$IlDScQlUS6D@QP=eLQ!gReo+d@^+`pE
zdC3{6#X1U!a33b;=Yd=T@oiClu6{|rLQZBL*qeF9nI)O|c?zi&smY}!sVP`Jr~?W#
z7}nJV1qs|YCHV>oAP*%#bVBVy3LS)IEIOgep_<c*@^ka@VZn;f2(nsN7vko;(%huf
zB89Bd+yYSK6y@haEH25%r4t<AVE>}(Ey;)IBtCAi#3j@N@tG+o(Fqj>$0$TsX<lYw
zX{rJ&@^uuTO5!t9bQB;S*HHj@LX%5Vm77i6Qkan;GcP5zqPQ?8vm`Y>v9u%~%#Tkj
zN{%;7EK25S5#(SOmzQU3F9atPkQd{VGE0i#3{I3p0~f}TbU^aB(+wz>pe7PfF3QPK
z$WH@j8IW!TNB|du5(pyC=tF&~kXQuDXbSmxsbGbfCB@JP$jnK_nqVMV6*+#Wnc6^(
z%`7QaNXp4iLQQMn6jPj<3icRGSz2jcGAOGQD`e&=B<Gi<7A2;q>Op0}xgOb}uy{sE
z6`*{Alq68pLj#GTgrv!(X~@YY?kbNf>3|{$oJ8Uc;Y_YZNj7$IX=%nLPH-}UIs{6w
zqa-1yAR+-Ficy$UZfat2X%Va-!B<Gabb!hx)I0)}(d5$9WoHw2l*Z*9sJeJVD1)Ps
zorPUoRFtt1k|RLj4Z^G_o&bqp^8r|#c;^=+mSiYkx*a5}$)%~z$|i0tip$|3tKtno
zBwKS36WGb^?BLMPO)W_TVP+I3gG3Nc_D-$TElbQPO;spJ%q%Jf6&uhRKmnvMC9xz?
zAu%bxv;>@fL0oY1%ucODaxcUHO!sG}R%(Kk>*^|ar&bnQ!8i))8HvT9!lzhWAt^sU
zCp9q-R4#+_if6GxNk(R|0?00?Q;;hodxROql^`?H%hbV6ffxYE<{=rW@NA`!oS&PU
zn3n=6^1-z)q>4*Tf%^$kcIfIVD5#gE78Pga=b@Pis^&q4LS;b(6+GJ(R6@$9+{6?}
zE(gU0l8NORsTjsVoavHSl3J3Po2md(0(J~Yt3r8Vu|jfDY9hD_)l_H37yF<{iZ=w2
zpk6WusLj?W&&V#Wt<Bh23Qh$ucg7bdXQbvPqH&o}(gvC&BANK-rGlCO$jJlLwm}LG
z5Eo(ts7!;zU=k!0V5*=^94<{z2cMDuHv|7~kb6h*Xb6mkz-S1JhQMeDjE2By2#kin
zXb6mkz-S1JhQMeDjE2B43jt<ECPsEp|DT0Fm4SaAe?NaQf9f#v<EVE=Ltr!nMnhmU
z1V%$(Gz3ONU^E0qLtr!nMnhmU1V%%Eej&ia!Ys=VTV`P(#mp>f3|eMjWME{ZYhb8r
zV60$hX=Q9`WoXFD#4IfgouoI@GfH4&k!>`EEv|@GceaX&2`x@7DvpWC&Ce?-DlN%~
ziP1~W&r3;+iAl{XD~^c?@bn3eiOI=Kiis)CEJ@WZNKDR7OiwM2i7BWoNi8mkiHVQT
zP0Y-TkJl@xRFCBXE$ZjL#K3=v{|o;M{`>R`lu?^TLtr!nMnhmU1V%$(Gz3ONU^E0q
zLtr!nMnhmU1V%$(Gz7>G0Ul;qM%Y*b3kS0(BWS3BnVVUf5jw8G#LX<r2^&)Y&Hpn9
zkZ;?l&e0GU4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7!85Z5Eu=C!5;#k`Tx=W|KN|O
zQ5TMez-S1JhQMeDjE2By2#kinXb6mkz-S1JhQMeDjD`RuAuu}sPf6&F>KzS%(GVC7
zfzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S}H%0;BW)LnEF>T{#*8qaiRF0;3@?8Umvs
zFd71*Aut*OqaiRF0;3@?8UmDr!07xxC80N}cQgb>Ltr!nMnhmU1V%$(Gz3ONU^E0q
zLtr!nMnhmU1cpWkjL!cLjd&V$<!A_uhQMeDjE2By2#kinXb6mkz-S1JhQMeDjE2By
z2v8CNqx1iigx;v$(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7#blkI{!a3
z;%U^CqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0;3@?8UmvsKuHLU&i_*qdZT(rLtr!n
jMnhmU1V%$(Gz3ONU^E0qLtr!nMnhmU1V%$(XoLU&Rl7>b

literal 0
HcmV?d00001

diff --git a/tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc b/tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53f3f8e42efcd93b67840bce64f6db4c5f79e007
GIT binary patch
literal 1557
zcmYe~<>g{vU|=u_yqx%jiGkrUh=Yt-7#J8F7#J9egBTbXQW#Pga~Pr+!8B7YGZO<N
zLoQ1cD<edPEs8CLA%!W2J(nYjgOMSHIf^rdC5kJBHHtfxBb76iGnKoUIf^HRErq>>
zA&NJJF_=M<<0Z&8KTXD4JnpG^sYQt;smU&Wj#a{GiNz&}@u?Mwxdl0?dSG4^e|kw8
zTqwOHtx6=XC^^3%Gqo5lnp|9#3^Eah*%=rZI2afhoI!q&U}9h>VaQ^vWk_MHWvpSS
zVQglsWlCYHWv*eWVa{f%WvO8)o>juMfH{S^h9!+Lg#{#@#<Y+ng|&ob0qa7BGKONG
z5~c-gB@7E0i^58n7O;a@c}yv+Aa)JILPjvn2&I{zH1k5n6t+36VhpvcDeNg6HLRJ8
zVhpuxB^)J83pi`o7BV(7HZi6!urSnuFe3{?EfWhvElVwP2^ZM4oVZ*&ff4K;O!soZ
z-TM#hUap0V6Bvs?_OX|6Eo97NO5p;#l=?2_2D_LWr;C}28L);h56s0A7>hT7-OLSk
z^B#~pOSr*q<^i#(AId2V!3>(bepPIcbQYq?_>0NbR+F*HPgSF!(oD0Ms~XBsuvGxD
zj9@}Wn#E8d1I=Q+)V!49^30M9u38O{Dih6OO|Du6TZkS@&Ei{};B@SknUku?8pT{t
zX#gUOK!gd1Far^mRs4y?#i>OlV4DmLHH)KI3Mvf^t622XOVVz!r<bIG45(t!OD-<E
z#hzD`4B~3?6oInpEe@C)ZgDvjRF<R`m&7L)-4cvXPRz-PFG?*aO3u$MNGwXd#p6&=
z8J|>|nNyOPSA2^tCqFqcr}&mcd~rs8X-*1QX?%K4eo|sid|qO1>MhRr;>5I6kO{Z=
z9Kr6(%+GTzD#|aqB^;lYUzD3z5}#U8kdv5~Sdy8aSH#M|z)-}-z`&r%9>oo^3*sq5
zWR5X1#}t`kfy^<F0y`FBiJ<`s&uAq>kpKe&!>>$dtC-N@)S}{;-2A+fqSBI#7?=Fy
z(%jU%lHwTm%o3l(q?mxB{Pd#4+}zB(bcMA1A_bq!v{Z%Q<jmB(<kaGr<dnQbXc?4W
z6axynVlWw>j#Mz|6;$351j(U@a)U}2P%*>6w2pBT;}+&3A&}4cZgIduxCm6t-ja-m
z#EBjxFf#M=^oml;i!w`6Z*jtfZm~nfia;jbVuNUjl0<O^GN*_M6xN{VM}&G5Q+giQ
o5#Z24A_N&27;bUc<mRW8=A_y&f*j7nz`&ry#KXkG2!cG!07>7p=>Px#

literal 0
HcmV?d00001

diff --git a/tests/test_generatecDNA.py b/tests/test_generatecDNA.py
new file mode 100644
index 0000000..76f848e
--- /dev/null
+++ b/tests/test_generatecDNA.py
@@ -0,0 +1,23 @@
+"""Testing generatecDNA module.
+
+Test script for generatecDNA module,
+from the workflow to simulate scRNAseq.
+"""
+
+from generatecDNA.generatecDNA import GeneratecDNA
+
+fastaFile = "fasta_example.fasta"
+gtfFile = "gtf_example.gtf"
+nrcFile = "nrcopies_example.csv"
+gn = GeneratecDNA(fastaFile, fastaFile, nrcFile)
+
+
+def test_generatecDNA():
+    """Testing main function of generatecDNA module.
+
+    Function to test type of input files required
+    to generate cDNA copies
+    """
+    assert fastaFile.endswith(".fasta")
+    assert gtfFile.endswith(".gtf")
+    assert nrcFile.endswith(".csv")
-- 
GitLab