diff --git a/.gitignore b/.gitignore
index 67cda8e87a9a2ed336794254c9a0039a2fbdcc35..20d1246b9039e9f35190e7656ec1a278c205281b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,4 +60,5 @@ __pycache__/
 *_cache
 *egg-info/
 .coverage
-build/
\ No newline at end of file
+build/
+*/play.py
\ No newline at end of file
diff --git a/term_frag_sel/cli.py b/term_frag_sel/cli.py
index a9b8e957a60f3a64c33e2e15cd10de951b887850..30c2535c219bf40e94ef441f075d99f5ab628884 100644
--- a/term_frag_sel/cli.py
+++ b/term_frag_sel/cli.py
@@ -10,6 +10,13 @@ import pandas as pd  # type: ignore
 from term_frag_sel.fragmentation import fragmentation
 from term_frag_sel.utils import check_positive, check_prob
 
+logging.basicConfig(
+    format='[%(asctime)s: %(levelname)s] %(message)s \
+        (module "%(module)s")',
+    level=logging.INFO,
+)
+logger = logging.getLogger("main")
+
 
 def main(args: argparse.Namespace):
     """Use CLI arguments to fragment sequences and output text file \
@@ -18,6 +25,9 @@ def main(args: argparse.Namespace):
     Args:
         args (parser): list of arguments from CLI.
     """
+    if not isinstance(args, argparse.Namespace):
+        raise TypeError("Input should be argparse.Namespace")
+
     # Create or wipe output file
     with open(args.output, "w", encoding="utf-8") as _:
         pass
@@ -26,17 +36,16 @@ def main(args: argparse.Namespace):
     fasta, seq_counts = file_validation(args.fasta, args.counts, args.sep)
 
     logger.info("Fragmentation of %s...", args.fasta)
-    fasta_parse = {}
-    for record in fasta:
-        fasta_parse[record.id] = record.seq
-    splits = np.arange(0, len(list(fasta_parse))+args.size, args.size)
+    splits = np.arange(0, len(list(fasta))+args.size, args.size)
 
+    nuc_probs = {'A': args.a_prob, 'T': args.t_prob,
+                 'G': args.g_prob, 'C': args.c_prob}
     for i, split in enumerate(splits):
-        fasta_dict = fasta_parse[split:splits[i+1]]
+        fasta_dict = fasta[split:splits[i+1]]
         term_frags = fragmentation(fasta_dict, seq_counts,
+                                   nuc_probs,
                                    args.mean, args.std,
-                                   args.A_prob, args.T_prob,
-                                   args.G_prob, args.C_prob)
+                                   )
 
         logger.info("Writing batch %s sequences to %s...", i, args.output)
         with open(args.output, 'a', encoding="utf-8") as out_file:
@@ -55,24 +64,32 @@ def file_validation(fasta_file: str,
         sep (str): Separator for counts file.
 
     Returns:
-        tuple: fasta and sequence counts variables
+        tuple: fasta dict and sequence counts pd.DataFrame
     """
+    fasta_dict = {}
     with open(fasta_file, "r", encoding="utf-8") as handle:
-        fasta = SeqIO.parse(handle, "fasta")
-    if not any(fasta):
-        raise ValueError("Input FASTA file is either empty or \
-            incorrect file type.")
+        fasta_sequences = SeqIO.parse(handle, "fasta")
+
+        if not any(fasta_sequences):
+            raise ValueError("Input FASTA file is either empty or \
+                incorrect file type.")
+
+        for record in fasta_sequences:
+            fasta_dict[record.id] = str(record.seq).upper()
 
     count_path = Path(counts_file)
     if not count_path.is_file():
-        logger.exception("Input counts file does not exist or isn't a file.")
+        raise FileNotFoundError("Input counts file does not exist or \
+            isn't a file.")
+
+    if sep == ",":
+        seq_counts = pd.read_csv(counts_file, names=["seqID", "count"])
+        seq_counts = seq_counts.astype({"seqID": str})
     else:
-        if sep == ",":
-            seq_counts = pd.read_csv(counts_file, names=["seqID", "count"])
-        else:
-            seq_counts = pd.read_table(counts_file, names=["seqID", "count"])
+        seq_counts = pd.read_table(counts_file, names=["seqID", "count"])
+        seq_counts = seq_counts.astype({"seqID": str})
 
-    return fasta, seq_counts
+    return fasta_dict, seq_counts
 
 
 def parse_arguments() -> argparse.Namespace:
@@ -97,21 +114,21 @@ def parse_arguments() -> argparse.Namespace:
                         help="output file path")
     parser.add_argument('--mean', required=False, default=300,
                         type=check_positive,
-                        help="Mean fragment length (default: 10)")
+                        help="Mean fragment length (default: 300)")
     parser.add_argument('--std', required=False, default=60,
                         type=check_positive,
                         help="Standard deviation fragment length \
-                            (defafult: 1)")
-    parser.add_argument('-a', '--A_prob', required=False, default=0.22,
+                            (defafult: 60)")
+    parser.add_argument('-a', '--a_prob', required=False, default=0.22,
                         type=check_prob,
                         help="Probability cut happens after nucleotide A")
-    parser.add_argument('-t', '--T_prob', required=False, default=0.25,
+    parser.add_argument('-t', '--t_prob', required=False, default=0.25,
                         type=check_prob,
                         help="Probability cut happens after nucleotide T")
-    parser.add_argument('-g', '--G_prob', required=False, default=0.25,
+    parser.add_argument('-g', '--g_prob', required=False, default=0.25,
                         type=check_prob,
                         help="Probability cut happens after nucleotide G")
-    parser.add_argument('-c', '--C_prob', required=False, default=0.28,
+    parser.add_argument('-c', '--c_prob', required=False, default=0.28,
                         type=check_prob,
                         help="Probability cut happens after nucleotide C")
     parser.add_argument('-s', '--size', required=False, default=10000,
diff --git a/term_frag_sel/fragmentation.py b/term_frag_sel/fragmentation.py
index 302fe6f41a8602688af4abcf8681ef830e91ba48..ef347cab82a652ede9f1ea3d2cd57f27920bb0cd 100644
--- a/term_frag_sel/fragmentation.py
+++ b/term_frag_sel/fragmentation.py
@@ -5,41 +5,81 @@ import numpy as np
 import pandas as pd  # type: ignore
 
 
+def get_cut_number(seq_len: int, mean: float) -> int:
+    """Get the number of cuts for a particular sequence.
+
+    Args:
+        seq_len (int): length of sequence/number of nucleotides in the sequence
+        mean (float): mean fragment length
+
+    Returns:
+        int: number of cuts
+    """
+    if not isinstance(seq_len, int):
+        raise ValueError(f"Sequence length must be numeric, \
+            not {type(seq_len)}")
+
+    if not isinstance(mean, int):
+        raise ValueError(f"Mean must be numeric, not {type(mean)}")
+
+    cuts_distribution = []  # distribution of cut numbers (n_cuts)
+
+    # 1000 iterations should not be too computationally inefficient
+    # given the nature of the code
+    for _ in range(1000):
+        n_cuts = 0
+        len_sum = 0
+        while True:
+            len_sum += np.random.exponential(scale=mean)
+            if len_sum < seq_len:
+                n_cuts += 1
+            else:
+                cuts_distribution.append(n_cuts)
+                break
+
+    cuts_distribution.sort()
+    cut_counts = {x: cuts_distribution.count(x) for x in cuts_distribution}
+    cut_probs = [x/1000 for x in cut_counts.values()]
+
+    # randomly ick no. of cut from cut distribution based on probability
+    # of cut numbers
+    n_cuts = np.random.choice(list(cut_counts.keys()), p=cut_probs)
+
+    return n_cuts
+
+
 def fragmentation(fasta: dict, seq_counts: pd.DataFrame,
-                  mean_length: int = 100, std: int = 10,
-                  a_prob: float = 0.22, t_prob: float = 0.25,
-                  g_prob: float = 0.25, c_prob: float = 0.28
+                  nuc_probs: dict,
+                  mu_length: int = 100, std: int = 10,
                   ) -> list:
     """Fragment cDNA sequences and select terminal fragment.
 
     Args:
         fasta_file (dict): dictionary of {transcript IDs: sequences}
         counts_file (pd.DataFrame): dataframe with sequence counts and IDs
-        mean_length (int): mean length of desired fragments
+        nuc_probs (dict): probability of cut occuring a certain nucleotide. \
+            Ordered as A, T, G, C. E.g: {'A': 0.22, 'T': 0.25, \
+                'G': 0.25, 'C': 0.28}.
+        mu_length (int): mean length of desired fragments
         std (int): standard deviation of desired fragment lengths
-        a_prob (float): probability of nucleotide A
-        t_prob (float): probability of nucleotide T
-        g_prob (float): probability of nucleotide G
-        c_prob (float): probability of nucleotide C
 
     Returns:
         list: list of selected terminal fragments
     """
     # calculated using https://www.nature.com/articles/srep04532#MOESM1
-    nuc_probs = {'A': a_prob, 'T': t_prob, 'G': g_prob, 'C': c_prob}
-
     term_frags = []
     for seq_id, seq in fasta.items():
         counts = seq_counts[seq_counts["seqID"] == seq_id]["count"]
-        for _ in range(counts):
-            n_cuts = int(len(seq)/mean_length)
-
+        for _ in range(counts.iloc[0]):
+            # pick no. of cuts from gauss fragment length distribution
             # non-uniformly random DNA fragmentation implementation based on
             # https://www.nature.com/articles/srep04532#Sec1
             # assume fragmentation by sonication for NGS workflow
             cuts = []
             cut_nucs = np.random.choice(list(nuc_probs.keys()),
-                                        n_cuts, p=list(nuc_probs.values()))
+                                        size=get_cut_number(len(seq),
+                                                            mu_length),
+                                        p=list(nuc_probs.values()))
             for nuc in cut_nucs:
                 nuc_pos = [x.start() for x in re.finditer(nuc, seq)]
                 pos = np.random.choice(nuc_pos)
@@ -50,13 +90,11 @@ def fragmentation(fasta: dict, seq_counts: pd.DataFrame,
             cuts.sort()
             cuts.insert(0, 0)
             term_frag = ""
-            for i, val in enumerate(cuts):
-                if i == len(cuts)-1:
-                    fragment = seq[val+1:cuts[-1]]
-                else:
-                    fragment = seq[val:cuts[i+1]]
-                if mean_length-std <= len(fragment) <= mean_length+std:
-                    term_frag = fragment
+
+            # check if 3' fragment is in the correct size range
+            if mu_length-std <= len(seq[cuts[-1]:len(seq)]) <= mu_length+std:
+                term_frag = seq[cuts[-1]:len(seq)]
+
             if term_frag != "":
                 term_frags.append(term_frag)
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 53a94c3f5011ee77266ceb8ee8c96951d3d1ad82..4ffe56aa4c4a9dfc43f195c305dc4ada3e89536d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,13 +1,31 @@
 """Test cli.py functions."""
 import pytest
-# from Bio import SeqIO
 
-from term_frag_sel.cli import file_validation  # type: ignore
+from term_frag_sel.cli import file_validation, main  # type: ignore
 
 FASTA_FILE = "tests/test_files/test.fasta"
+CSV_FILE = "tests/test_files/test.csv"
+TAB_FILE = "tests/test_files/test_tab.csv"
+
+_, csv_counts = file_validation(FASTA_FILE, CSV_FILE, ",")
+_, tab_counts = file_validation(FASTA_FILE, TAB_FILE, "\t")
 
 
 def test_file():
-    """Test check_positive function."""
+    """Test file_validation function."""
+    assert isinstance(file_validation(FASTA_FILE, CSV_FILE, ","), tuple)
+    assert isinstance(file_validation(FASTA_FILE, TAB_FILE, "\t"), tuple)
+    assert csv_counts.equals(tab_counts)
+
+    with pytest.raises(FileNotFoundError):
+        file_validation(FASTA_FILE, "", ",")
     with pytest.raises(FileNotFoundError):
-        file_validation("", "", ",")
+        file_validation("", CSV_FILE, ",")
+    with pytest.raises(ValueError):
+        file_validation(CSV_FILE, CSV_FILE, ",")
+
+
+def test_main():
+    """Test main() function."""
+    with pytest.raises(TypeError):
+        main("")
diff --git a/tests/test_files/test.csv b/tests/test_files/test.csv
new file mode 100644
index 0000000000000000000000000000000000000000..d6ffadaa26aed64c3281302fc419a59031c57d71
--- /dev/null
+++ b/tests/test_files/test.csv
@@ -0,0 +1,50 @@
+0,1,99
+1,2,130
+2,3,144
+3,4,52
+4,5,98
+5,6,85
+6,7,119
+7,8,143
+8,9,112
+9,10,87
+10,11,53
+11,12,50
+12,13,129
+13,14,112
+14,15,56
+15,16,82
+16,17,130
+17,18,98
+18,19,87
+19,20,75
+20,21,140
+21,22,141
+22,23,74
+23,24,54
+24,25,56
+25,26,56
+26,27,56
+27,28,99
+28,29,101
+29,30,101
+30,31,62
+31,32,96
+32,33,131
+33,34,117
+34,35,53
+35,36,81
+36,37,114
+37,38,106
+38,39,67
+39,40,121
+40,41,134
+41,42,105
+42,43,91
+43,44,90
+44,45,145
+45,46,59
+46,47,84
+47,48,62
+48,49,50
+49,50,86
diff --git a/tests/test_files/test_tab.csv b/tests/test_files/test_tab.csv
new file mode 100644
index 0000000000000000000000000000000000000000..ece6e9e305df43d46df7f6f4f7a240585e279e4f
--- /dev/null
+++ b/tests/test_files/test_tab.csv
@@ -0,0 +1,50 @@
+0	1	99
+1	2	130
+2	3	144
+3	4	52
+4	5	98
+5	6	85
+6	7	119
+7	8	143
+8	9	112
+9	10	87
+10	11	53
+11	12	50
+12	13	129
+13	14	112
+14	15	56
+15	16	82
+16	17	130
+17	18	98
+18	19	87
+19	20	75
+20	21	140
+21	22	141
+22	23	74
+23	24	54
+24	25	56
+25	26	56
+26	27	56
+27	28	99
+28	29	101
+29	30	101
+30	31	62
+31	32	96
+32	33	131
+33	34	117
+34	35	53
+35	36	81
+36	37	114
+37	38	106
+38	39	67
+39	40	121
+40	41	134
+41	42	105
+42	43	91
+43	44	90
+44	45	145
+45	46	59
+46	47	84
+47	48	62
+48	49	50
+49	50	86
diff --git a/tests/test_frag.py b/tests/test_frag.py
index d97b176054f801a93796832ffc54c324b4076046..d6eaccd78626a6129fe4f1c4a05a9e66da43328e 100644
--- a/tests/test_frag.py
+++ b/tests/test_frag.py
@@ -1,23 +1,54 @@
 """Test utils.py functions."""
+import pandas as pd
 import pytest
 from Bio import SeqIO
 
-from term_frag_sel.fragmentation import fragmentation  # type: ignore
+from term_frag_sel.fragmentation import fragmentation, get_cut_number
 
-with open("tests/test_files/test.fasta", "r", encoding="utf-8") as handle:
-    fasta = SeqIO.parse(handle, "fasta")
+FASTA_FILE = "tests/test_files/test.fasta"
+CSV_FILE = "tests/test_files/test.csv"
 
-# have to create the counts file
+fasta_dict = {}
+with open(FASTA_FILE, "r", encoding="utf-8") as handle:
+    fasta_sequences = SeqIO.parse(handle, "fasta")
+    for record in fasta_sequences:
+        fasta_dict[record.id] = str(record.seq).upper()
 
-MU = 100
-STD = 10
-A_PROB = 0.22
-C_PROB = 0.28
-T_PROB = 0.25
-G_PROB = 0.25
+seq_counts = pd.read_csv(CSV_FILE, names=["seqID", "count"])
+seq_counts = seq_counts.astype({"seqID": str})
 
+NUC = {'A': 0.22, 'T': 0.25, 'G': 0.25, 'C': 0.28}
 
-def test_frag():
+
+def test_get_cut_number():
+    """Test get_cut_number function."""
+    assert get_cut_number(100, 20) <= 15 and get_cut_number(100, 20) >= 0
+
+    with pytest.raises(ValueError):
+        get_cut_number(seq_len='a', mean=4)
+    with pytest.raises(ValueError):
+        get_cut_number(seq_len=10, mean='a')
+
+
+def test_fragmentation():
     """Test fragmentation function."""
-    with pytest.raises(TypeError):
-        fragmentation()
+    assert isinstance(fragmentation(fasta_dict, seq_counts,
+                                    NUC), list)
+
+    # no need to check string mean or std since it's checked at CLI
+    with pytest.raises(ValueError):
+        nuc_probs = {'A': 'a', 'T': 0.25,
+                     'G': 0.25, 'C': 0.28}
+        fragmentation(fasta_dict, seq_counts, nuc_probs)
+    with pytest.raises(ValueError):
+        nuc_probs = {'A': 0.22, 'T': 'a',
+                     'G': 0.25, 'C': 0.28}
+        fragmentation(fasta_dict, seq_counts, nuc_probs)
+    with pytest.raises(ValueError):
+        nuc_probs = {'A': 0.22, 'T': 0.25,
+                     'G': 'a', 'C': 0.28}
+        fragmentation(fasta_dict, seq_counts, nuc_probs)
+    with pytest.raises(ValueError):
+        nuc_probs = {'A': 0.22, 'T': 0.25,
+                     'G': 0.25, 'C': 'a'}
+        fragmentation(fasta_dict, seq_counts, nuc_probs)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 36084a66eb91d452b0bfd525b3e8b91c786f2b81..c7d7be2d3fb43f5511322d1bdfe2d4107ecc38ae 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,9 +1,4 @@
 """Test utils.py functions."""
-<<<<<<< HEAD
-import pytest
-
-from 
-=======
 import argparse
 import pytest
 
@@ -40,4 +35,3 @@ def test_prob():
         check_prob("string")
     with pytest.raises(ValueError):
         check_prob("")
->>>>>>> hugo_new