diff --git a/.gitignore b/.gitignore deleted file mode 100644 index fe236d6d40038e6d13d64badc3a9c2370d9cdde1..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,56 +0,0 @@ -# Created by https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode -# Edit at https://www.toptal.com/developers/gitignore?templates=macos,visualstudiocode - -### macOS ### -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -### macOS Patch ### -# iCloud generated files -*.icloud - -### VisualStudioCode ### -.vscode/* -!.vscode/settings.json -!.vscode/tasks.json -!.vscode/launch.json -!.vscode/extensions.json -!.vscode/*.code-snippets - -# Local History for Visual Studio Code -.history/ - -# Built Visual Studio Code Extensions -*.vsix - -### VisualStudioCode Patch ### -# Ignore all local history of files -.history -.ionide - -# End of https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode \ No newline at end of file diff --git a/Images/.gitkeep b/Images/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/Images/git_sunho.png b/Images/git_sunho.png deleted file mode 100644 index fd10c600ac93b038db96236a6dc1b9bb62ba9ec6..0000000000000000000000000000000000000000 Binary files a/Images/git_sunho.png and /dev/null differ diff --git a/Images/git_sunho2.png b/Images/git_sunho2.png deleted file mode 100644 index 8b70717c0ef12171dbcabf31a034775e65ca4a37..0000000000000000000000000000000000000000 Binary files a/Images/git_sunho2.png and /dev/null differ diff --git a/Images/markdown_sunho.png b/Images/markdown_sunho.png deleted file mode 100644 index a9a594b9e2c69cd7d1fc6ab229a2b141d0c466d7..0000000000000000000000000000000000000000 Binary files a/Images/markdown_sunho.png and /dev/null differ diff --git a/LICENSE b/LICENSE deleted file mode 100644 index edb874900e3f120cb97930894f9a0c54cbb340b1..0000000000000000000000000000000000000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 zavolan_group / tools - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/frag_package/fragmentation.py b/frag_package/fragmentation.py deleted file mode 100644 index 7dba6091ec1ee1973862c36c94b483729955b4ff..0000000000000000000000000000000000000000 --- a/frag_package/fragmentation.py +++ /dev/null @@ -1,36 +0,0 @@ -import random - - -dna_seq = { - "ATAACATGTGGATGGCCAGTGGTCGGTTGTTACACGCCTACCGCGATGCTGAATGACCCGGACTAGAGTGGCGAAATTTATGGCGTGTGACCCGTTATGC": 100, - "TCCATTTCGGTCAGTGGGTCATTGCTAGTAGTCGATTGCATTGCCATTCTCCGAGTGATTTAGCGTGACAGCCGCAGGGAACCCATAAAATGCAATCGTA": 100 -} - -mean_length = 12 -std = 1 - -term_frags = [] -for seq, counts in dna_seq.items(): - for _ in range(counts): - n_cuts = int(len(seq)/mean_length) - cuts = random.sample(range(1,len(seq)-1), n_cuts) - cuts.sort() - cuts.insert(0,0) - term_frag = "" - for i, val in enumerate(cuts): - if i == len(cuts)-1: - fragment = seq[val:cuts[-1]] - else: - fragment = seq[val:cuts[i+1]] - if mean_length-std <= len(fragment) <= mean_length+std: - term_frag = fragment - if term_frag == "": - continue - else: - term_frags.append(term_frag) - -with open('terminal_frags.txt', 'w') as f: - for line in term_frags: - f.write(line) - f.write('\n') - diff --git a/frag_package/fragmentation_v2.py b/frag_package/fragmentation_v2.py deleted file mode 100644 index bce9ca91cc627b90cdd85e53fb00519d7c3460a0..0000000000000000000000000000000000000000 --- a/frag_package/fragmentation_v2.py +++ /dev/null @@ -1,63 +0,0 @@ -import re - -import numpy as np -import pandas as pd - - -def fasta_process(fasta_file): - with open(fasta_file, "r") as f: - lines = f.readlines() - - ident_pattern = re.compile('>(\S+)') - seq_pattern = re.compile('^(\S+)$') - - genes = {} - for line in lines: - if ident_pattern.search(line): - seq_id = (ident_pattern.search(line)).group(1) - elif seq_id in genes.keys(): - genes[seq_id] += (seq_pattern.search(line)).group(1) - else: - genes[seq_id] = (seq_pattern.search(line)).group(1) - return genes - -def fragmentation(fasta_file, counts_file, mean_length, std): - fasta = fasta_process(fasta_file) - seq_counts = pd.read_csv(counts_file, names = ["seqID", "count"]) - - nucs = ['A','T','G','C'] - mononuc_freqs = [0.22, 0.25, 0.23, 0.30] - - term_frags = [] - for seq_id, seq in fasta.items(): - counts = seq_counts[seq_counts["seqID"] == seq_id]["count"] - for _ in range(counts): - n_cuts = int(len(seq)/mean_length) - - # non-uniformly random DNA fragmentation implementation based on https://www.nature.com/articles/srep04532#Sec1 - # assume fragmentation by sonication for NGS workflow - cuts = [] - cut_nucs = np.random.choice(nucs, n_cuts, p=mononuc_freqs) - for nuc in cut_nucs: - nuc_pos = [x.start() for x in re.finditer(nuc, seq)] - pos = np.random.choice(nuc_pos) - while pos in cuts: - pos = np.random.choice(nuc_pos) - cuts.append(pos) - - cuts.sort() - cuts.insert(0,0) - term_frag = "" - for i, val in enumerate(cuts): - if i == len(cuts)-1: - fragment = seq[val+1:cuts[-1]] - else: - fragment = seq[val:cuts[i+1]] - if mean_length-std <= len(fragment) <= mean_length+std: - term_frag = fragment - if term_frag == "": - continue - else: - term_frags.append(term_frag) - return term_frags - diff --git a/frag_package/main.py b/frag_package/main.py deleted file mode 100644 index 661c8f370c844f633823dc44ba22f6cb0a57c6a1..0000000000000000000000000000000000000000 --- a/frag_package/main.py +++ /dev/null @@ -1,31 +0,0 @@ -import argparse - -from fragmentation_v2 import fragmentation -from utils import check_positive, extant_file - - -def main(args): - fasta, seq_counts, mean_length, std = args - - term_frags = fragmentation(fasta, seq_counts, mean_length, std) - with open('terminal_frags.txt', 'w') as f: - for line in term_frags: - f.write(line) - f.write('\n') - -# Parse command-line arguments -def parse_arguments(): - parser = argparse.ArgumentParser(description="Takes as input FASTA file of cDNA sequences, a CSV with sequence counts, and mean and std. dev. of fragment lengths. Outputs most terminal fragment (within desired length range) for each sequence.") - - parser.add_argument('--fasta', required=True, type=extant_file, help="FASTA file with cDNA sequences") - parser.add_argument('--counts', required=True, type=extant_file, help="CSV file with sequence counts") - parser.add_argument('--mean', required = False, default = 10, type = check_positive, help="Mean fragment length (default: 10)") - parser.add_argument('--std', required = False, default = 1, type = check_positive, help="Standard deviation fragment length (defafult: 1)") - args = parser.parse_args() - - return args.fasta, args.counts, args.mean, args.std - - -if __name__ == '__main__': - arguments = parse_arguments() - main(arguments) \ No newline at end of file diff --git a/frag_package/utils.py b/frag_package/utils.py deleted file mode 100644 index 2212bbc61d3db2fd0c55f8d85f52866fb31c2d07..0000000000000000000000000000000000000000 --- a/frag_package/utils.py +++ /dev/null @@ -1,24 +0,0 @@ -import argparse -import os.path - - -# found on https://stackoverflow.com/questions/11540854/file-as-command-line-argument-for-argparse-error-message-if-argument-is-not-va -def extant_file(x): - """ - 'Type' for argparse - checks that file exists but does not open. - """ - if not os.path.exists(x): - # Argparse uses the ArgumentTypeError to give a rejection message like: - # error: argument input: x does not exist - raise argparse.ArgumentTypeError("{0} does not exist".format(x)) - elif not x.endswith((".fasta", ".fa", ".csv")): - raise argparse.ArgumentTypeError("{0} is not the correct file format".format(x)) - return x - -# found on https://stackoverflow.com/questions/14117415/in-python-using-argparse-allow-only-positive-integers -def check_positive(value): - ivalue = int(value) - if ivalue <= 0: - raise argparse.ArgumentTypeError("%s is an invalid positive int value" % value) - return ivalue - diff --git a/images/PushPull_Hugo.png b/images/PushPull_Hugo.png deleted file mode 100644 index 063021cba0bf6adf5bbd1afcbf6de594d014c536..0000000000000000000000000000000000000000 Binary files a/images/PushPull_Hugo.png and /dev/null differ diff --git a/images/gitIntro_Hugo.png b/images/gitIntro_Hugo.png deleted file mode 100644 index e1f812aaa5725d960ad2f7b69047b761801fe4dd..0000000000000000000000000000000000000000 Binary files a/images/gitIntro_Hugo.png and /dev/null differ diff --git a/images/gittutorial_Tanya.png b/images/gittutorial_Tanya.png deleted file mode 100644 index 23dff08d437f4dbefa04cc135a4d82222ce62035..0000000000000000000000000000000000000000 Binary files a/images/gittutorial_Tanya.png and /dev/null differ diff --git a/images/gittutorial_Tanya2.png b/images/gittutorial_Tanya2.png deleted file mode 100644 index aae38f8a62d15c890816e05e7d706d2813941b0a..0000000000000000000000000000000000000000 Binary files a/images/gittutorial_Tanya2.png and /dev/null differ diff --git a/images/markdownTutorial_Hugo.png b/images/markdownTutorial_Hugo.png deleted file mode 100644 index 3a2573fd278582d9cd9bd6402e15dd65eaa4e948..0000000000000000000000000000000000000000 Binary files a/images/markdownTutorial_Hugo.png and /dev/null differ diff --git a/images/markdown_Tanya.png b/images/markdown_Tanya.png deleted file mode 100644 index fcecf48dcb06f93fa6ec116901fd53f7b2f10aca..0000000000000000000000000000000000000000 Binary files a/images/markdown_Tanya.png and /dev/null differ