Skip to content
Snippets Groups Projects
Commit c279468e authored by Madan Mukundan's avatar Madan Mukundan
Browse files

build: unencapsulated working version with tests

parent 557be05b
Branches
No related tags found
1 merge request!26Issue14 prc
Pipeline #13844 failed
"""Plots read counts of gene expression data."""
import logging
import sys
from pathlib import Path
from tkinter import Tk, ttk, filedialog, StringVar
from typing import Union
import pandas
import matplotlib.pyplot as plt
"""Given a GeneID, construct the histogram of read counts per cell."""
def plot_read_counts(
dir_path: Union[str, Path] = None,
gene_id: str = None
) -> None:
"""Plots histogram of gene ID and read counts from .csv files.
This function reads .csv files from a directory formatted
"GeneID, Counts" and plots a histogram of read counts for a
chosen geneID.
Args:
dir_path: Path of directory containing .csv files of read counts.
gene_id: Name of gene to plot.
Raises:
ValueError: No value is passed for path and/or gene ID.
ValueError: Path passed in does not exist.
ValueError: Gene not found in selection of .csv files.
TypeError: Gene ID parameter passed is not a String.
AttributeError: No .csv files in chosen directory
"""
# Initialize logger
logging.basicConfig(
format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s',
level=logging.INFO)
LOGGER = logging.getLogger(__name__)
try:
dir_path = Path(dir_path)
if not dir_path.exists():
LOGGER.critical("Invalid path")
raise ValueError("Invalid Path")
elif not isinstance(gene_id, str):
LOGGER.critical("Invalid gene ID, please enter as string")
raise TypeError("Invalid gene ID")
# identify csv files from directory, formatted ['Gene', 'Count']
# if this is not immediately cast as list, on some python versions
# it sets file_paths to null after a single call which is big sad
file_paths = list(dir_path.glob("*.csv"))
file_paths.sort()
if file_paths == 0:
LOGGER.warning('No csv files found in current directory, exiting')
raise AttributeError("No .csv files found in chosen directory")
gene_count_dict = {}
file_counter = 1
LOGGER.info("Run with %s and gene %s" % (dir_path, gene_id))
for file_path in file_paths:
# read in csv here and transform to pandas dataframe
df_read_counts = pandas.read_csv(str(file_path),
header=None, names=['Gene', 'Count'])
# find gene_id within dataframe
gene_count = df_read_counts.loc[df_read_counts.Gene == gene_id,
'Count'].tolist()
if len(gene_count) > 1:
LOGGER.warning(
"%s.csv contains more than one entry for chosen gene - these will be summed"
% (file_path.stem)
)
elif not gene_count:
LOGGER.warning(
"%s.csv does not contain read of chosen gene, this is interpreted as 0"
% (file_path.stem))
gene_count = [0]
# in case count entry is missing, interpreted as 0
if not gene_count:
gene_count = [0]
LOGGER.warning(
"File %s contains an entry for this gene but is missing a count number \
this is interpreted as 0" % (file_path.stem)
)
file_counter += 1
gene_count_dict[file_path.stem] = sum(gene_count)
except AttributeError:
LOGGER.fatal("Invalid selection, please verify input")
raise ValueError("Either the path, gene ID, or both were not properly specified")
if sum(gene_count_dict.values()) == 0:
LOGGER.warning(
"Gene not found -> Please check for correct spelling or ID"
)
raise ValueError("Gene not found")
else:
LOGGER.info('Read complete from %s files' % (file_counter-1))
# read in dictionary from read_counts and plot counts on the same histogram
file_names = list(gene_count_dict.keys())
counts = list(gene_count_dict.values())
# create and show bar plot of read counts for chosen gene
plt.bar(range(len(gene_count_dict)), counts, tick_label=file_names)
plt.title('Counts for %s' % (gene_id))
plt.show()
def prc_dialog() -> None:
"""Creates a GUI to select directory and enter gene."""
# Create dialog window
dialog = Tk()
dialog.title("Plot Gene Read Counts")
dialog.resizable(False, False)
dialog.geometry("380x210+20+20")
dialog.configure(background="black")
# Create and attach welcome message to GUI
wlcom_text = "Welcome to Plot Read Counts "
wlcom_msg = ttk.Label(dialog,
text=wlcom_text,
font=("Helvetica", 12, "bold"),
foreground="white",
background="black",
padding=5)
wlcom_msg.grid(column=1, row=0, columnspan=2, pady=5)
# Create and attach instruction message to GUI
inst_msg = ttk.Label(dialog,
text="Select a directory of .csv files and a gene ID to plot",
font=("Helvetica", 12),
foreground="white",
background="black",
padding=5)
inst_msg.grid(column=1, row=1, columnspan=2, padx=10, pady=5)
# Create and attach directory selection elements
# Either globals are needed or all functions
# need to be encapsulated
global dir_str_var
dir_str_var = StringVar()
dir_lbl = ttk.Label(dialog,
textvariable=dir_str_var,
font=("Courier", 10, "bold"),
background="black",
foreground="green")
dir_lbl.grid(column=1, row=4, padx=5, pady=1, sticky="e")
dir_btn = ttk.Button(dialog, text="Select Directory",
command=lambda: get_dir())
dir_btn.grid(column=1, row=6, padx=5, pady=1)
# Create and attach gene selection elements
global gene_lbl
gene_lbl = StringVar()
gene_entry = ttk.Entry(dialog)
gene_entry.grid(column=2, row=5, pady=1)
gene_entry_lbl = ttk.Label(anchor="center",
textvariable=gene_lbl,
font=("Courier", 10, "bold"),
background="black",
foreground="green")
gene_entry_lbl.grid(column=2, row=4, pady=1)
gene_entry_btn = ttk.Button(dialog,
text="Confirm Gene (Case Sensitive)",
command=lambda: get_gene_id(gene_entry))
gene_entry_btn.grid(column=2, row=6, padx=2, pady=2)
# Create and attach confirm and close element
kill_btn = ttk.Button(dialog, text="Exit and Plot",
command=lambda: dialog.destroy())
kill_btn.grid(column=1, row=8, columnspan=2, padx=5, pady=15)
dialog.mainloop()
if 'gui_path' in globals() and 'gui_gene_id' in globals():
plot_read_counts(gui_path, gui_gene_id)
else:
sys.exit()
def get_dir() -> Path:
"""Support function to retrieve dir from GUI."""
global gui_path
gui_path = Path(filedialog.askdirectory(
title='Please select the directory containing \
gene reads'))
short_dir = ("%s/.../%s/" % (gui_path.drive, gui_path.name))
dir_str_var.set(short_dir)
def get_gene_id(gene_entry: StringVar) -> str:
"""Support function to retrieve gene ID from GUI."""
global gui_gene_id
gui_gene_id = gene_entry.get()
gene_lbl.set(gene_entry.get())
def main() -> None:
"""Main opens GUI for dir and gene ID selection."""
prc_dialog()
if __name__ == '__main__':
main()
GENE1,624
GENE2,895
GENE3,870
GENE4,428
GENE5,449
GENE6,696
GENE7,544
GENE8,338
GENE9,159
GENE10,267
GENE11,268
GENE12,879
GENE13,556
GENE14,613
GENE15,629
GENE16,116
GENE17,762
GENE18,67
GENE19,405
GENE20,3
GENE1,830
GENE2,602
GENE3,859
GENE4,174
GENE5,525
GENE6,606
,
GENE8,910
GENE9,194
GENE10,415
GENE11,953
GENE12,299
GENE13,127
GENE14,750
GENE15,968
GENE16,282
GENE17,868
GENE18,688
GENE19,650
GENE1,629
GENE2,803
GENE3,106
GENE4,378
GENE5,728
GENE6,783
GENE7,353
GENE8,747
GENE9,253
GENE10,883
GENE11,552
GENE12,655
GENE13,309
GENE14,133
GENE15,131
GENE16,626
GENE17,669
GENE18,493
GENE19,304
GENE1,830
GENE2,602
GENE3,859
GENE4,174
GENE5,525
GENE6,606
GENE7,851
GENE8,910
GENE9,194
GENE10,415
GENE11,953
GENE12,299
GENE13,127
GENE14,750
GENE15,968
GENE16,282
GENE17,868
GENE18,688
GENE19,650
GENE1,676
GENE2,787
GENE3,876
GENE4,368
GENE5,676
GENE6,896
GENE7,865
GENE8,585
GENE9,853
GENE10,447
GENE11,583
GENE12,573
GENE13,895
GENE14,207
GENE15,562
GENE16,411
GENE17,635
GENE18,469
GENE19,128
Random,Data,for
This,is,a
negative,control,test
some,numbers,
234,,
,432,
,,
32,,
"Test for plot_read_counts module"
import pytest
from pathlib import Path
from src.plot_read_counts import plot_read_counts
class TestPlotReadCounts ():
@pytest.mark.parametrize(
"dir_path, gene_id, expected",
[
('a', 'GENE1', ValueError),
('Path.home()', 5, TypeError),
(None, None, ValueError),
('Path.home()', 'GENE1', AttributeError),
]
)
def test_invalid_input(self, dir_path, gene_id, expected):
with pytest.raises(expected):
plot_read_counts(dir_path, gene_id)
def validate_input():
pass
#plot_dir = PurePath('H:/My Drive/PhD/PLS/PLS scRNAseq Repo/scrna-seq-simulation/tests/resources')
#plot_dir = 'H:/My Drive/PhD/PLS scRNAseq Repo/scrna-seq-simulation/'
# plot_dir = PurePath(filedialog.askdirectory(title='Please select the directory containing \
# gene reads'))
# gene_of_interest = input('\nPlease type a gene ID to sample: ').upper()
#gene_of_interest = 'GENE1'
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment