From 55588f1d3fe6c307a0ebd102927202ff8490a36f Mon Sep 17 00:00:00 2001 From: Suvi <s.selliah@unibas.ch> Date: Tue, 4 Jan 2022 09:58:20 +0100 Subject: [PATCH] Final Version 1 --- .DS_Store | Bin 6148 -> 8196 bytes build/lib/generatecDNA/__init__.py | 3 + build/lib/generatecDNA/generatecDNA-cli.py | 37 +++ build/lib/generatecDNA/generatecDNA.py | 257 ++++++++++++++++++ generatecDNA.egg-info/PKG-INFO | 4 +- .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 262 bytes .../generatecDNA.cpython-39-pytest-6.2.5.pyc | Bin 0 -> 5647 bytes .../__pycache__/generatecDNA.cpython-39.pyc | Bin 5093 -> 5205 bytes generatecDNA/generatecDNA.py | 3 +- requirements-dev.txt | 4 + requirements.txt | 4 + tests/.DS_Store | Bin 6148 -> 6148 bytes tests/.coverage | Bin 0 -> 53248 bytes ...t_generatecDNA.cpython-39-pytest-6.2.5.pyc | Bin 0 -> 1557 bytes tests/test_generatecDNA.py | 23 ++ 15 files changed, 331 insertions(+), 4 deletions(-) create mode 100644 build/lib/generatecDNA/__init__.py create mode 100644 build/lib/generatecDNA/generatecDNA-cli.py create mode 100644 build/lib/generatecDNA/generatecDNA.py create mode 100644 generatecDNA/__pycache__/__init__.cpython-39.pyc create mode 100644 generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc create mode 100644 requirements-dev.txt create mode 100644 requirements.txt create mode 100644 tests/.coverage create mode 100644 tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc create mode 100644 tests/test_generatecDNA.py diff --git a/.DS_Store b/.DS_Store index 0be791f97a0ff41cfcb9194d1ac6fbdea9665f90..258757630b3ea2ae538ac895182db1eba76b1f92 100644 GIT binary patch literal 8196 zcmZQzU|@7AO)+F(kYHe7;9!8z0^AH(0Z1N%F(jFwBK#op7#IW?7*afQ@{^Nt@{^!4 zqts{!jE2By2#kinXb6mkz+ejjMu>wraMb${*->&d1V%$(M1}yUd{BV2?HL@PbOVG2 zNii@oFo3%Nj0_AcumEBN_X8L}av-fB8l)9OgS0X*f>>ZPz*-p?p;{Ti-4KvI0kBbw z3}EdHj9{BV;$ZCzj9{A?7#Jbi85p59GeUbPj1cV%j1cV%j9}ZrMvfArAut*OXdwXV zyR$MRF_bc7GUTB2-v$1o0R~Xkc4tUs$YV%lC}K!tC}Buth-XM<aAELcaAeS9C}5~W z>i-KcFfht8Ffh!67z4JKhanwJKU6oeK8Sh*$-uyXQ(sbfaRGF^VP{f7PG)h5fx$IK zCT12^Hg*nn4sMRv;Eeq8;F83W(qgB?qG%8=BtJhV3C2!L3d>9_j~5Ve&d)1J%*;zI z0x1d3Oi4{jEQ$%w%uC5Hcgio#ODP8Hg-UR6a&X2ANLE+tT9}#WC>R<V)aocyn;RME zD3}<S*Vb}!h$`z_2gPUS<mTmfgS`$fxfpmMd^p*I&x2*bMR_^-dFkLHlYv19R{%2T zGUPF&G3bFv0)Z)t(+oX^REBhhbOv39OprMY`2<YjV(?+`WN>D1W$<GNW^iTDW5^|# zgg6<37+e_~8C)2AiPRy&P{feRPzX+<AXY9o36(GuGw3p;Fr+e+G3YUrFjO#<5OA#k zPIHLU&%sd4kjhZPP|8q%nb;A@6~ba*U|<DjkYa`shGGIPLsB_h5;T&RP?GRy{SVLg zqm0oI7|;-4hS)9v>i@ekFyI>hho~AQM?+vV1cqe@FtWG=yEs8xnAm&>>T84g(+N;X zP#+xB9cKjf%OUzeN<dwEaCe*$GN}kw&By@h$ulxQdh(163?Qwe0|DUB86`$TU|>Q3 E04L~;NB{r; delta 485 zcmZp1XfcprU|?W$DortDU=UznVBlbY&@5aN3&ketDT;D~<iTP^42cXS45<vs3@#fB zmoqbRPwp3#5%J8)Pfp6oPlD=mWME*>o17=0GWnDMxA3l{f}G6a5(9&4j7-cdtZeKY z9FunnN9%HO#tTSQSL+&@o15t<7#W+_>L^rOn40P+n3$W?)^c))D(hPZ#b@W_=H>TH zb`X(g?48^%7z1+PdXNSG!GM8b@^3*a0R{#}SrkE6Ar+8dH;Q1TkWv-{1LIF*L9l;| z8B!Ta7)lum81xtl7%H*&XCBzL{|q3&3J!u|h7yKiG?gHS!BtM~6_%u`?-N9%Cl=K# uX6N7#WCpo|fk6NiGp-B_EL<B4zcWwfm+=Gz4+9e<4nfh%usNP*4l@A$pMA{$ diff --git a/build/lib/generatecDNA/__init__.py b/build/lib/generatecDNA/__init__.py new file mode 100644 index 0000000..dde8fb9 --- /dev/null +++ b/build/lib/generatecDNA/__init__.py @@ -0,0 +1,3 @@ +"""This is the __init__ function.""" + +__version__ = "0.1.0" diff --git a/build/lib/generatecDNA/generatecDNA-cli.py b/build/lib/generatecDNA/generatecDNA-cli.py new file mode 100644 index 0000000..46009bf --- /dev/null +++ b/build/lib/generatecDNA/generatecDNA-cli.py @@ -0,0 +1,37 @@ +"""Command-line interface client.""" + +import argparse +import generatecDNA as gn + + +def main() -> None: + """Entry point for CLI executable.""" + parser = argparse.ArgumentParser(description="cDNA generator") + + parser.add_argument( + "-rna", + type=str, + metavar="", + help="Path file to fasta file with RNA sequence") + parser.add_argument( + "-gtf", + type=str, + metavar="", + help="Path file to gtf file") + parser.add_argument( + "-cnr", + type=str, + metavar="", + help="Path file to copy number file") + + args = parser.parse_args() + + Generator = gn.GeneratecDNA( + fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr) + Generator.generatecDNA( + fastaFile=args.rna, gtf=args.gtf, cp_nr=args.cnr) + print("Done") + + +if __name__ == '__main__': + main() diff --git a/build/lib/generatecDNA/generatecDNA.py b/build/lib/generatecDNA/generatecDNA.py new file mode 100644 index 0000000..c134b51 --- /dev/null +++ b/build/lib/generatecDNA/generatecDNA.py @@ -0,0 +1,257 @@ +"""Module to generate cDNA copies. + +Class: + GeneratecDNA: contains one method + generatecDNA: takes as input fasta-formatted file & + gtf-formatted-file & csv-formatted file, + outputs fasta-formatted file with cDNA ID and unique cDNA sequence & + csv-formatted file with cDNA ID and copy number +""" + +import random + + +class GeneratecDNA: + """Contains function to generate cDNA. + + Args: + input files: path to fasta-file (RNA_ID & RNA_Seq), + gtf-file (RNA_ID & Priming sites & Probability), + csv-file (RNA_ID & copy number) + + Attributes: + fastaFile: RNA_ID & RNA_Seq + gtf: RNA_ID & Priming sites & Probability + cp_nr: RNA_ID & copy number + """ + + def __init__(self, fastaFile, gtf, cp_nr) -> str: + """Class intructor.""" + self.fastaFile = fastaFile + self.gtf = gtf + self.cp_nr = cp_nr + + def generatecDNA(self, fastaFile, gtf, cp_nr): + """Generate cDNA. + + Args: + fastaFile (str): RNA_ID & RNA_Seq + gtf (str): RNA_ID & Priming sites & Probability + cp_nr (str): RNA_ID & copy number + + Returns: + cDNA.fasta: cDNA_ID & cDNA sequence + cDNA.csv: cDNA_ID & copy number + """ + # defining global variables + gtfFileInputDict = {} + csvFileInputDict = {} + fastaInputDict = {} + # READING INPUT FILES / PART I + # open gtf file + with open(gtf, 'r') as gt: + # read gtf file + for mygtfline in gt: + currentGTFString = mygtfline + gtf_list = currentGTFString.split('\t') + gtf_seqname = gtf_list[0] + gtf_start = gtf_list[3] + gtf_end = gtf_list[4] + gtf_score = gtf_list[5] + my_temp_list_1 = [int(gtf_start), + int(gtf_end), float(gtf_score)] + if gtf_seqname in gtfFileInputDict: + my_temp_list_2 = gtfFileInputDict[gtf_seqname] + my_temp_list_2.append(my_temp_list_1) + gtfFileInputDict[gtf_seqname] = my_temp_list_2 + else: + gtfFileInputDict[gtf_seqname] = [my_temp_list_1] + print(gtfFileInputDict) + # open csv file + with open(cp_nr, 'r') as cp: + # read csv file + for mycsvline in cp: + currentcsvstring = mycsvline + csv_list = currentcsvstring.split(',') + csv_trans_id = csv_list[0] + csv_count = csv_list[2] + csv_count = csv_count.replace("\n", "") + """ trans id should be always new, + otherwise unhash csv_current_count + in defining variables section. + if csv_trans_id in csvFileInputDict: + csv_current_count = csvFileInputDict[csv_trans_id] + csv_current_count += csv_count + csvFileInputDict[csv_trans_id] = csv_current_count + else: + csvFileInputDict[csv_trans_id] = csv_count + """ + csvFileInputDict[csv_trans_id] = int(csv_count) + print(csvFileInputDict) + # open fasta file + with open(fastaFile, 'r') as fa: + # defining variables + fasta_id = "" + fasta_seq = "" + fasta_id_found = False + fasta_seq_found = False + # read fasta file + for myfastaline in fa: + currentfastastring = myfastaline + # find fasta ID + if not fasta_id_found and not fasta_seq_found: + position_of_start = currentfastastring.find('>') + if position_of_start != 0: + continue + elif position_of_start == 0: + fasta_id = myfastaline + fasta_id = fasta_id.replace(">", "") + fasta_id = fasta_id.replace("\n", "") + # I don't know, how the sequence id is formatted and + # which part thereof is equal to the transcript ID + # in the csv-formatted file and gtf-formatted file + # temp_fasta_list_1 = fasta_id.split('\t') + # fasta_id = temp_fasta_list_1[0] + fasta_id_found = True + continue + else: + print("FASTA: Start position in fasta file not found") + break + # find fasta sequence + if fasta_id_found and not fasta_seq_found: + while not fasta_seq_found: + zero_position = currentfastastring[0] + if zero_position == ";": + currentfastastring = fa.readline() + elif zero_position == ">": + assert False, "FASTA: No Sequence after headline" + else: + fasta_seq = currentfastastring + fasta_seq_found = True + if fasta_id_found and fasta_seq_found: + fastaInputDict[fasta_id] = fasta_seq + fasta_id_found = False + fasta_seq_found = False + fasta_id = "" + fasta_seq = "" + print(fastaInputDict) + # COMPUTATION OF INPUT FILES / PART II + outputFastaDict = {} + outputCSVDict = {} + # starting Loop1: read fasta dict + for (k, v) in fastaInputDict.items(): + rna_seq = v + # search for transcript ID in gtf-file to get + # priming sites and scores + if k in gtfFileInputDict: + gtfList = gtfFileInputDict[k] + else: + assert False, "Fasta-ID from fasta-file not found in gtf-file" + # Excluding priming sites within 40 bases + # at the beginning of the transcript and + # ordering priming sites on the RNA sequence in gtf-dict + # sorting + gtfList.sort(key=lambda x: x[0]) + # elimination + for i in gtfList: + if i[0] <= 40: + gtfList.remove(i) + # search for transcript ID in csv-file + # to get copy number of transcript + if k in csvFileInputDict: + actual_count = csvFileInputDict[k] + else: + assert False, "Fasta-ID from fasta-file not found in csv-file" + # random choosing + scores = [] + for i in gtfList: + scores.append(i[2]) + print("gtfList: ", gtfList) + print("scores: ", scores) + my_weighted_list = random.choices( + gtfList, weights=scores, k=actual_count) + # counts per priming site + counts_per_priming_site = [] + for i in range(0, len(gtfList)): + counts_per_priming_site.append(0) + for i in range(0, len(gtfList)): + counts_per_priming_site[i] = my_weighted_list.count(gtfList[i]) + print("counts: ", counts_per_priming_site) + # Loop2: through gtfList to create cDNA starting on priming sites + # according to counts per priming sites + counter_cDNA = 0 + for i in gtfList: + cDNA_3_5 = "" + counter_cDNA += 1 + cDNA_ID = "-".join([k, "cDNA", str(counter_cDNA)]) + if counter_cDNA == 1: + end = i[1] + # create 3' to 5' cDNA + for j in range(0, int(end)): + if rna_seq[j] == "A": + cDNA_3_5 = cDNA_3_5 + "T" + elif rna_seq[j] == "U": + cDNA_3_5 = cDNA_3_5 + "A" + elif rna_seq[j] == "G": + cDNA_3_5 = cDNA_3_5 + "C" + elif rna_seq[j] == "C": + cDNA_3_5 = cDNA_3_5 + "G" + else: + print( + k, rna_seq, gtfList, i, + cDNA_ID, counts_per_priming_site) + assert False, "cDNA synthesis failed, position " \ + "is not A,U,G or C in transcript" + else: + previous_end = end + 1 + this_end = i[1] + # create 3' to 5' cDNA + for j in range(int(previous_end), int(this_end)): + if rna_seq[j] == "A": + cDNA_3_5 = cDNA_3_5 + "T" + elif rna_seq[j] == "U": + cDNA_3_5 = cDNA_3_5 + "A" + elif rna_seq[j] == "G": + cDNA_3_5 = cDNA_3_5 + "C" + elif rna_seq[j] == "C": + cDNA_3_5 = cDNA_3_5 + "G" + else: + print( + k, rna_seq, gtfList, i, + cDNA_ID, counts_per_priming_site) + assert False, "cDNA synthesis failed, " \ + "position is not A,U,G or C " \ + "in transcript" + # reverse sequence to 5' to 3' + cDNA_5_3 = cDNA_3_5[::-1] + if counts_per_priming_site[(counter_cDNA - 1)] == 0: + continue + elif cDNA_5_3 in outputCSVDict: + new_count = outputCSVDict[cDNA_5_3] + new_count += counts_per_priming_site[(counter_cDNA - 1)] + outputCSVDict[cDNA_5_3] = new_count + else: + outputFastaDict[cDNA_5_3] = cDNA_ID + outputCSVDict[cDNA_5_3] = \ + counts_per_priming_site[(counter_cDNA - 1)] + # WRITING OUTPUT FILES / PART III + # write fasta-file and csv-formatted file + with open("cDNA.fasta", 'w') as myFa, open("cDNA.csv", 'w') as myCO: + firstLine = True + for (k, v) in outputFastaDict.items(): + headline = "".join([">", v]) + csvLine = ",".join([v, str(outputCSVDict[k])]) + if firstLine: + myFa.write(headline) + myFa.write("\n") + myFa.write(k) + myCO.write(csvLine) + firstLine = False + else: + myFa.write("\n") + myFa.write(headline) + myFa.write("\n") + myFa.write(k) + myCO.write("\n") + myCO.write(csvLine) + return myFa, myCO diff --git a/generatecDNA.egg-info/PKG-INFO b/generatecDNA.egg-info/PKG-INFO index ccabcbf..fb1f429 100644 --- a/generatecDNA.egg-info/PKG-INFO +++ b/generatecDNA.egg-info/PKG-INFO @@ -1,10 +1,10 @@ Metadata-Version: 2.1 Name: generatecDNA Version: 0.1.0 -Summary: Generates cDNA copies of RNA transcript from internal priming sites +Summary: Generates cDNA copies of RNA transcriptfrom internal priming sites Home-page: https://git.scicore.unibas.ch/zavolan_group/pipelines/scrna-seq-simulation.git Author: Suvarnan Selliah and Ruth Eneida Montano Crespo -Author-email: r.montanocrespo@unibas.ch +Author-email: s.selliah@unibas.ch,r.montanocrespo@unibas.ch License: MIT Platform: UNKNOWN License-File: LICENSE.md diff --git a/generatecDNA/__pycache__/__init__.cpython-39.pyc b/generatecDNA/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5645f9ca76b976ab3f1bb9645b08638f1aa6a6fa GIT binary patch literal 262 zcmYe~<>g{vU|`TbaWc_{fq~&Mh=Yt785kHG7#J9ec^DWNQW&BbQW&EcQ<#DoG?}a9 zLNYRo6*7wzN-|Ov;^Q;(GE3s)71B!cl1no4^Yp4%4fG844E!{iZn4M5r{pKc$KT?P zk1tCtD$dN$i;rK)P{ho@03m**J6pws7N-^!$K>Ycl@yhhWW>1SCzs}?=9Lu3xM!C5 zBqqfK6y>KECFbU4=A|p7<rgXVWTvGm1Se;v<|U^V$0VoZCAz2Pr4}WY<QK)HLs+TF hE`E+NFt_OyRNmsS$<0qG%}KQb`K*|Ufq{XA2>?}@OjrN_ literal 0 HcmV?d00001 diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-39-pytest-6.2.5.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a0d7794b293b572e92a4216f10ab30a25418c393 GIT binary patch literal 5647 zcmYe~<>g{vU|=}ycsVgehk@ZSh=Yuo85kHG7#J9eEf^RWQW&BbQW#U1au}nSz%+9% z3ljq)LoRC+8zV%9J&N6(A%!`GrG+7drI{&;BZVoLL6bGHnvsDaATc>RF+EiwIX|x? zF*C1Np|m73C$l6owOFAfUm-m;FSRJKBvm2V#m|vTA+cDYAhD=KAwNx_BqLR!JijPA zEhoPmq_Q|Ow=@T&vN$=&&#^eQP>+ktIVZ8W*osR*K|#SCY7odAD}-hFd8rDysU;ct zDPUC~l`tzIDoYZxQ;R|NX66-?mMEkp7MCRIrsWsqCYF?>rYNLk=A<g9A@r7{AtZGn zk_ySiWe7pA4js6L{L+$w(vo8A=9XucWPm-c;OU}}n3tkZnwMEv3JD8^;?%;@)VyS5 z+c7Otz%U{?zo1efuQWF)wTSB_C{F#7K_oJU$F3R!14Al96k`elC}yLWQ(01&<}iZe zktK>Xg*An(g&~S9g*})-lj9Z-atJ3zGcqtZ!(ue8G%vX%Gd~X$O|Z}f`%8}t91xC0 z>CjjLg+DZ)GjmdltrQ9pOEN&(py3V*ZH*v5$9PW{1vLc_BRI8C6A_&0pzudk6i}3z zo0*rcP@GwkTCAX^5Kxq#l$ZocVwFf5!9j<r5h<iJq3$dxDauSLElEXkE!bghnK`Le zVCNva3+y+LkJ3xhkYw>V2(BZ!AU>}MNe_}+z-lxZZ?P4nmXsFdC9@z!9f-}uz`)=P z%9~;g3=B1lH4O0#HB2=O@r*UhH4O1gDU87knoLy!&Y-NJkeOFfRGM6pU!>=!$$X0w z?wMQ6=_P5mSi#QFWVyvsoSKtX1Twvdm4Shwh>d}PVI@Nm2Ll7cuWV<ln9$<XqT-m` ze2{4+88I&T$)&lec_qa$?wKV%iAga5MfvGPiMil-NXsu$@X1U|RR~Vb1f}2NnB<ha zL}=p4FN%RB5K#6*;pi1q-r|Ul&&<m#iI0~B`4Hp`1|~K}HpVJZr~#=UZF(?OD1l?k zz`y`1*1&<YjGciYlcAO+lcAP1lcAQagrSBhg|V5bzo(YHhTVlBwyc(;hNG6VhI0X9 z3e!TyTCN(d6lO_=TJ9RI6c$N_TAmuN6jn)wTHYG26gEkQTD}sd8lGmx5~dp7X2ufc z8op-6bmm(A8txjFGKQj#8kQPvNrqa18i55YHT(-185wE>Kw>rwg)%k#>5MRTI713M z3j+&7GczMYo`4}k@d^V5Mlg(IU}Pv^tzl_qWMn8|s9^@XHlS9pM$m;J)~Z&hMyOV} zMtA{Z3dcgmT9FzNuxmwYL{c~<8EVC9#1^onaHViBWU3V_VX6^pW~^bY5w&3`JcVMT zkRiha#$p*XJ6WN2GJ@=!z}Rn5E1tq#E0MxoE1AMmE0w}iD_tY*!Z3j`R-jg<My6J_ zMrs0M@xB^qFe$r$J%x86W360`TnckG^907Ch6#*?0eN;{KGOuoq75}NwGuTF3)oV4 z!7hZbA+D83;j5JbtLH0WP2mT*gppwaQ(;Pt)C8s?n;Pi}OvNx#7VKyF8u=7~Y>>Hc zYQz_CEM%ya&GV^|PvOmGn!r?Kkit`|P@^z`saUIoa{*V1;6la}A#sKj;S$aT+%*ad z88aEh7-|(ucuF`I@YX0UWNc<^VoYJLRmkJ0k*$?X;j5LNz*u++<inx>kPnLjY9wpe zY9wqJU||6Z8<49pLYJwy3N3WmKz_<(s8z~js8z0ETfn!F!G&Q0bL_SlrdpL+)f&}W zwHg&rDw@Dlw6BJxMn#gLRz1%G?vGDke~1w4j~ewX{uI#^u@;6D@m^3mRCi&Bm8;dL z(Ex>BibOW!1m+^H8ubMNH5#CFI)S;6t45`UwTxi`bCF#QE7*Qbxc$t<MPU0SiM2nS zp;oH~WT!~2Mvc}2mKqIE3WeHKSHhYiRioa_1WM~E(luIOmSC1pje3n%jb@5WFLSMS zIzz2aiEs*YiAarlGh;K83q!0xtwxQ`0+tkJs5z(5<Rxk~YP1)K)~MHLNHQ#BtkJ2_ zuwj_MT==SlHAS{Y2V_eMbFD7OT@#pMEowDVxNG%lbW`Na8ESP?<QIt5sH7;Ah}Y;g zGo~{yWUSQ($z?N5U@a1;(MXY#WT-U&+sIU70CmxZTEiN(8be8j6vb?&2`ohwHF_yZ z;taKVd6S^x%3$#cHF_zEVDT!bxC&T&MU7sHDp))TDy|9^KTxBWq5>B8Dq*crsZj&@ zsYbs>ry11Pf~0NMVm5G`s1X|{6Icpk@~+evq{xXg)EYs3WLzR#!c=1fF4K&$`iiy4 z538?Oi_)<AinXW?tFKs#rV-_<J0$vwwa}<WFNHmYJw?40RCb%x=z!u)f<clYg*lsP z0$UN+1eQXvJeL}i8s##E32a4XHOe(6k_@$`HKqu)<`BMejfo9Io<xm)jS`53kO%3p zVE~ylfvIp#Izz2lIzz2_2}6oTie@v@1jc@)T8k2f6fF?1|68qP3SX^NjS?iE$KHVC z@)Yg`Vkx{es_CG*#yW*#fmn@diFl1ND7Q5;f<<j=tR}D*G1QnXkf^av;i^#qr9?)C z8uJA#H8$ytpp;)@u|TrM8pcZDg0X5;z%0uJk~KCURtit8RbCXxq!ccwSu7xNbhD65 zz-ua09o+m0Y=tctB_Dfn33ADYMScphe2qnoWg4iPpq;40$iM)r-;vvmptcoi+YuxJ zZ^S5Q6qgifBDJER?L}}43ZxQ*(@WAY)DhD{1L*}fk1%v2x7FZQ1%VrDNbN?jeiuJS zJ+O1Fz}|wiYC$a+c<UZ+09<`?aT$tgr1m^W4G7<2Ec(UBS;WY|!0-~(n12asxxECn z-28qq>bwNC5N|PZ-C}V2#b_5&rR(Mx9O7uD5L^Q4Srz0LXM)@6nR$>-3An?eke6Sg zkd|MXm-36zx=InM!!KVU7}g6(Oe;w(QpiY6Ov%a2ORW;%vPPAw`?P}L}?G}J7P zVlJpOtkQD>_n$mn6w-?Fb0HQ$x=C;g6f*M^V9oeskQZR>TM!%M^k7ia57hLmVMt+= zWB@g1{lI<&w?{P@Z!uPYSrCGgfq~%`hfPjmZc<93-E0O1hR+}atF&PW3KZOW$c;`t zo1FaQ#GGO~J(w;{##_wUsg*A^7#J9c^#ZIzQN@{FlID|HTw<kA#ZjD`Uj*vaXfoYm zFHg-(&nPLr#h6{ik(^(eS5j=H@Dk+5UyQm@EFf3iVsyO47;=j-^cJK0Ek@@mdyt?) zab;dfMrv^;sLz<0lbWKVfQYhUa6CEcgzC5}<QFM8gTk_;C^4@%xhS)s<mLbW|NmEU zflPs<x?7CpRUBY$a&eia)Ge0$g4DcQti=U6nI*TFGxJJrv8LtZCzjk|ODrfz%}cq( zT2KTMWiLuC$Vp62y~UE2nU`{lqX-)7x40Y%Doav}OX3rYZVARiddcySbdXz|ev31{ zI591?D7B#I7M~+Hb!6t}xfT`W7u^zusZOmZ$Vtpg1SP>+teGXLxy83wit~#~K>eNE z{Ib+rY(<HADfzj#*poBzGm}$`Z?P67=B20JV$K1%9UPLkShDgn^KLO0mlWM%ORXpY zJH5Opvm{m1wul|nfdOR*L{}zCAiX3F)Q9r~_3>RYlS`rml8ehwMfkuO6|UkIQ+i1h zXKrPBNg61@M+qdC78RxDmAHqv1(y_M=A}n*q?e?{=VTU_L~(;y#i@mPiMgp!p!5)5 z49aU!>>%;fycDPyC~-vb<yOX*q~;cYb;cW_@r`aVB^Q9KO)f4&u{OE5tQcx-a&Z}0 za}*DV1&;ps%#<ij5DyYOOlgTx9ANK&B_OQg)WRr;ALBDq;=y@6iXSco72wXT1Pg)f z7J_;iECRJ%5SAF@^V6Wd<gH3A%8!SMM{zq8R6^5)0fYldCq_~Hkiige5dn?_UWkBm za2SYNBm>IIjAc>mMR|$wAm@U@50tfTF=j^bBqo=XCg#M0eHg_C$!Spnxs~ydjFy@L z2^4X#g5vms)S~zT$k0GMXlNjc2P~0V6c5S}QQ&M5ZyawL#SY@fd%E0W1|=iLtSFv> zqSUg?{L<ohP}FghWMo2FV1=gf#<w{0Qp=$Z;mfUbON@t(ZKW20qSM(QiO&L(k7CKK zboP(pOv@}PF7e6COO4`y6{Jz@$;D+LAxLQkD)x%NMH&xsk);eO65T+B2QLdF3nL3N z4-*F?3nLdJ2Nbd~^89CE0`a&QxtO#VIhc@D@Nnxfaxn2Rf%P)7FmW-mF!3;QF>*1o zF>*1oF!C_+u(2?*F>x@mG4U|6F>-+AxEQUNSQt4Nc^E-<f%IxHa)8WW5@KXw2H9i5 zD!|0S$j8VB;ek~Pf%UR5vi)acW?>X!<YN?K6l3IK2Duhwnh40%OdxSCMm|P3W?}-_ z@{fhtj+ujzgGmfo4#bAb+pvI41i71oQGk()iGzvjKOYOobyAEhi~@{&jAD#zi~@`h z|AGC;B!tA{V)SFIlEhg#_-V3(1|dK>IX?atSA2YKeoARhYJB`Hp7{8}(!?C7410Wh zN`7*Dd=Y5Wp$IhWP~-tBc)$^eAV5)B1gg~dK`{s#&){HW;pJlki)nHe`GAr<M^b5K zPDy57@hu)u-huNZ;~}N39;AfL%+J#+N(C2lw>aTKx7eX#MfxE7%|QgnLZtKrb`HoT Vw>WGd-mwEErD9Ma$-=F~3;<R;5WfHb literal 0 HcmV?d00001 diff --git a/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc b/generatecDNA/__pycache__/generatecDNA.cpython-39.pyc index f4322b4d9bb4b2f764b84c23b40cd483e44a35fc..9866867732759db27f44e64bd5c8800e4213d946 100644 GIT binary patch delta 618 zcmaE=epQ1nk(ZZ?fq{YHwBzN(6rGKHtSpS#lSNqkCl|7)G0IM!!D7T`&A`9_Qsd0P zz)*Z?^9`0(*7|gYT9b5!TGJAS6txufW~K>@{YtfFB@8JVAYT8sTJsdXT8kQm1)K{R zTo__+#4yz=)hefOFAz-Otx-;ATF6*ynZmI^utvE=s7A3yNs^(N5iDv|V=;lPNT<eR zfpCpw3RjI%I`cwCMur;G1uQjI>5O2u*@AkJ8cQfsB!vscs!;;7%om8%Sb<n6Jhc{i zQ6QsIxS)oyfW*-ZLpA}gsZg8D7l?2{ESSJrxM8v)w=nMl0|rJgjAUSB0Eq}6Ko+Sn zt1(Yw+MLX7!NfRw@*Lg{##@tB`6e-nPTs-SCcu_jQIeXMa*MURD6=G0(|WQS|9-|> zli38E8E;MY5}3;<Gx>$Ua_w83d8y^`$@!&uB~g62m2Qdg$@zIDiJ5t+MIgSjKY~Ab zmY}<^GAO#-7#SECcv%=(7+IKkm?nP_RAgkDEG87n$TT@eXqqlaLW+@vQGk(;QH+s| zQGk($kqZjhn1qmcT#SB`6@`7dEf^RWia@EM$a!+5u#7A}$QCvR1_lmB7G6HKA|?g~ U22JK7caSop$(w`?n7KKa0i|`8OaK4? delta 493 zcmcbr@l>5Jk(ZZ?fq{Wxs`}|fFTIU?tSpR`lSNqkCx^0_Fe*>(Vlm>gWME(bsc~jt zU?}d|e2S%&wZ4QQMJ+|WnW_Iptw{+(iUydyr`9xuuhy(aVFBkt1{a3dc`;12O0~)< z+zSL#cx#l?nHDnEnx}9q5Uf!y5vozFQIce6W(12`)R;|RD`KlLSs+|vzL1fT0mMq- zg0X6pz%0`R!Zj8kR!SsKty!K8nj*X!p@zYYn7~@NESw>QorQsgp_!SHAy3GVVFFum z%;aotdHxy$21YQ9WME`SVc$HL+k%O4-Q;V$9gI<v<M}2ricJ2**T%tGUX)pqs%bg7 zgMU9`)MO6<XU3?>JpywXr6+3%F6WNo%u6kgPtGsRE1CR9&|O#)6n++r3=9l>EQ~CS zEX+J?lf#4*8QCUR2!%4TO+F_yO<sbLg;9W!k5P<~jgboq*_dD~uE`UHeYwpU7#NB` u5mIC~`K7RotSHD_HU<U;4n`JUKDHtz1_lOA<{~GMGQ-JAA_mMt983V;baL|m diff --git a/generatecDNA/generatecDNA.py b/generatecDNA/generatecDNA.py index 0f470ab..c134b51 100644 --- a/generatecDNA/generatecDNA.py +++ b/generatecDNA/generatecDNA.py @@ -1,5 +1,4 @@ -"""Package contains utilities to generate cDNA - as part of the workflow to simulate scRNAseq. +"""Module to generate cDNA copies. Class: GeneratecDNA: contains one method diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..24a31c7 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +coverage==6.2 +flake8-docstrings==1.6.0 +flake8==4.0.1 +pytest==6.2.5 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..65750d9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +coverage==6.2 +flake8-docstrings==1.6.0 +flake8==4.0.1 +pytest==6.2.5 diff --git a/tests/.DS_Store b/tests/.DS_Store index 9f52ee6408fed3d2e48fd284a905747184f46c18..84fa1862bbbd180beb9a94b147bfa4b03c799777 100644 GIT binary patch delta 74 zcmZoMXfc@J&&a+pU^g=(`(#B{sm*FE9~d>+8A=#Z8HyQ781xtl7%Dw;@{^Nt@{<@C b7z7v?7#tZG80P&4gN=nJ7&o(X{N)D#|JxUb delta 32 ocmZoMXfc@J&&akhU^g=(+hj#nsm*FE9~d{5ePrCs&heKY0H>V_od5s; diff --git a/tests/.coverage b/tests/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..929bab39819360bc383b3a3b737bcc57517146dd GIT binary patch literal 53248 zcmWFz^vNtqRY=P(%1ta$FlG>7U}R))P*7lCVBln6VBlpy0Colj1{MUDff0#~i^<8L z7w*B!AI8ALHjRNljyHl|muD%jKKCu2GS11|%h|IybJ(Wg(mX0Y8UmvsFd71*Au!TH zpfQk#U0hO<v8lEsF)1gtq$n{twFpA;ItRHrhPWz(I6C>bDnP^(G`JKL6cluI6&w|c z^2-$pQi~MQGILTDGV@Y0lM_oa^U@VcGE!lxON&!eKsBC1Nk(D`SfL(RKS)b_W{N^) zUP)?tYLP-gQD$ypQKdq5YNZZX0o1mV)QS>_Kw5rLYG!&KNQHt1RJ*1^QEFOhQEFat zYBAVw1&z!UO)kymdM<WxVPVGhRIr~Ci;@$IlDScQlUS6D@QP=eLQ!gReo+d@^+`pE zdC3{6#X1U!a33b;=Yd=T@oiClu6{|rLQZBL*qeF9nI)O|c?zi&smY}!sVP`Jr~?W# z7}nJV1qs|YCHV>oAP*%#bVBVy3LS)IEIOgep_<c*@^ka@VZn;f2(nsN7vko;(%huf zB89Bd+yYSK6y@haEH25%r4t<AVE>}(Ey;)IBtCAi#3j@N@tG+o(Fqj>$0$TsX<lYw zX{rJ&@^uuTO5!t9bQB;S*HHj@LX%5Vm77i6Qkan;GcP5zqPQ?8vm`Y>v9u%~%#Tkj zN{%;7EK25S5#(SOmzQU3F9atPkQd{VGE0i#3{I3p0~f}TbU^aB(+wz>pe7PfF3QPK z$WH@j8IW!TNB|du5(pyC=tF&~kXQuDXbSmxsbGbfCB@JP$jnK_nqVMV6*+#Wnc6^( z%`7QaNXp4iLQQMn6jPj<3icRGSz2jcGAOGQD`e&=B<Gi<7A2;q>Op0}xgOb}uy{sE z6`*{Alq68pLj#GTgrv!(X~@YY?kbNf>3|{$oJ8Uc;Y_YZNj7$IX=%nLPH-}UIs{6w zqa-1yAR+-Ficy$UZfat2X%Va-!B<Gabb!hx)I0)}(d5$9WoHw2l*Z*9sJeJVD1)Ps zorPUoRFtt1k|RLj4Z^G_o&bqp^8r|#c;^=+mSiYkx*a5}$)%~z$|i0tip$|3tKtno zBwKS36WGb^?BLMPO)W_TVP+I3gG3Nc_D-$TElbQPO;spJ%q%Jf6&uhRKmnvMC9xz? zAu%bxv;>@fL0oY1%ucODaxcUHO!sG}R%(Kk>*^|ar&bnQ!8i))8HvT9!lzhWAt^sU zCp9q-R4#+_if6GxNk(R|0?00?Q;;hodxROql^`?H%hbV6ffxYE<{=rW@NA`!oS&PU zn3n=6^1-z)q>4*Tf%^$kcIfIVD5#gE78Pga=b@Pis^&q4LS;b(6+GJ(R6@$9+{6?} zE(gU0l8NORsTjsVoavHSl3J3Po2md(0(J~Yt3r8Vu|jfDY9hD_)l_H37yF<{iZ=w2 zpk6WusLj?W&&V#Wt<Bh23Qh$ucg7bdXQbvPqH&o}(gvC&BANK-rGlCO$jJlLwm}LG z5Eo(ts7!;zU=k!0V5*=^94<{z2cMDuHv|7~kb6h*Xb6mkz-S1JhQMeDjE2By2#kin zXb6mkz-S1JhQMeDjE2B43jt<ECPsEp|DT0Fm4SaAe?NaQf9f#v<EVE=Ltr!nMnhmU z1V%$(Gz3ONU^E0qLtr!nMnhmU1V%%Eej&ia!Ys=VTV`P(#mp>f3|eMjWME{ZYhb8r zV60$hX=Q9`WoXFD#4IfgouoI@GfH4&k!>`EEv|@GceaX&2`x@7DvpWC&Ce?-DlN%~ ziP1~W&r3;+iAl{XD~^c?@bn3eiOI=Kiis)CEJ@WZNKDR7OiwM2i7BWoNi8mkiHVQT zP0Y-TkJl@xRFCBXE$ZjL#K3=v{|o;M{`>R`lu?^TLtr!nMnhmU1V%$(Gz3ONU^E0q zLtr!nMnhmU1V%$(Gz7>G0Ul;qM%Y*b3kS0(BWS3BnVVUf5jw8G#LX<r2^&)Y&Hpn9 zkZ;?l&e0GU4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7!85Z5Eu=C!5;#k`Tx=W|KN|O zQ5TMez-S1JhQMeDjE2By2#kinXb6mkz-S1JhQMeDjD`RuAuu}sPf6&F>KzS%(GVC7 zfzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S}H%0;BW)LnEF>T{#*8qaiRF0;3@?8Umvs zFd71*Aut*OqaiRF0;3@?8UmDr!07xxC80N}cQgb>Ltr!nMnhmU1V%$(Gz3ONU^E0q zLtr!nMnhmU1cpWkjL!cLjd&V$<!A_uhQMeDjE2By2#kinXb6mkz-S1JhQMeDjE2By z2v8CNqx1iigx;v$(GVC7fzc2c4S~@R7!85Z5Eu=C(GVC7fzc2c4S~@R7#blkI{!a3 z;%U^CqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0;3@?8UmvsKuHLU&i_*qdZT(rLtr!n jMnhmU1V%$(Gz3ONU^E0qLtr!nMnhmU1V%$(XoLU&Rl7>b literal 0 HcmV?d00001 diff --git a/tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc b/tests/__pycache__/test_generatecDNA.cpython-39-pytest-6.2.5.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53f3f8e42efcd93b67840bce64f6db4c5f79e007 GIT binary patch literal 1557 zcmYe~<>g{vU|=u_yqx%jiGkrUh=Yt-7#J8F7#J9egBTbXQW#Pga~Pr+!8B7YGZO<N zLoQ1cD<edPEs8CLA%!W2J(nYjgOMSHIf^rdC5kJBHHtfxBb76iGnKoUIf^HRErq>> zA&NJJF_=M<<0Z&8KTXD4JnpG^sYQt;smU&Wj#a{GiNz&}@u?Mwxdl0?dSG4^e|kw8 zTqwOHtx6=XC^^3%Gqo5lnp|9#3^Eah*%=rZI2afhoI!q&U}9h>VaQ^vWk_MHWvpSS zVQglsWlCYHWv*eWVa{f%WvO8)o>juMfH{S^h9!+Lg#{#@#<Y+ng|&ob0qa7BGKONG z5~c-gB@7E0i^58n7O;a@c}yv+Aa)JILPjvn2&I{zH1k5n6t+36VhpvcDeNg6HLRJ8 zVhpuxB^)J83pi`o7BV(7HZi6!urSnuFe3{?EfWhvElVwP2^ZM4oVZ*&ff4K;O!soZ z-TM#hUap0V6Bvs?_OX|6Eo97NO5p;#l=?2_2D_LWr;C}28L);h56s0A7>hT7-OLSk z^B#~pOSr*q<^i#(AId2V!3>(bepPIcbQYq?_>0NbR+F*HPgSF!(oD0Ms~XBsuvGxD zj9@}Wn#E8d1I=Q+)V!49^30M9u38O{Dih6OO|Du6TZkS@&Ei{};B@SknUku?8pT{t zX#gUOK!gd1Far^mRs4y?#i>OlV4DmLHH)KI3Mvf^t622XOVVz!r<bIG45(t!OD-<E z#hzD`4B~3?6oInpEe@C)ZgDvjRF<R`m&7L)-4cvXPRz-PFG?*aO3u$MNGwXd#p6&= z8J|>|nNyOPSA2^tCqFqcr}&mcd~rs8X-*1QX?%K4eo|sid|qO1>MhRr;>5I6kO{Z= z9Kr6(%+GTzD#|aqB^;lYUzD3z5}#U8kdv5~Sdy8aSH#M|z)-}-z`&r%9>oo^3*sq5 zWR5X1#}t`kfy^<F0y`FBiJ<`s&uAq>kpKe&!>>$dtC-N@)S}{;-2A+fqSBI#7?=Fy z(%jU%lHwTm%o3l(q?mxB{Pd#4+}zB(bcMA1A_bq!v{Z%Q<jmB(<kaGr<dnQbXc?4W z6axynVlWw>j#Mz|6;$351j(U@a)U}2P%*>6w2pBT;}+&3A&}4cZgIduxCm6t-ja-m z#EBjxFf#M=^oml;i!w`6Z*jtfZm~nfia;jbVuNUjl0<O^GN*_M6xN{VM}&G5Q+giQ o5#Z24A_N&27;bUc<mRW8=A_y&f*j7nz`&ry#KXkG2!cG!07>7p=>Px# literal 0 HcmV?d00001 diff --git a/tests/test_generatecDNA.py b/tests/test_generatecDNA.py new file mode 100644 index 0000000..76f848e --- /dev/null +++ b/tests/test_generatecDNA.py @@ -0,0 +1,23 @@ +"""Testing generatecDNA module. + +Test script for generatecDNA module, +from the workflow to simulate scRNAseq. +""" + +from generatecDNA.generatecDNA import GeneratecDNA + +fastaFile = "fasta_example.fasta" +gtfFile = "gtf_example.gtf" +nrcFile = "nrcopies_example.csv" +gn = GeneratecDNA(fastaFile, fastaFile, nrcFile) + + +def test_generatecDNA(): + """Testing main function of generatecDNA module. + + Function to test type of input files required + to generate cDNA copies + """ + assert fastaFile.endswith(".fasta") + assert gtfFile.endswith(".gtf") + assert nrcFile.endswith(".csv") -- GitLab