Issue14 prc
Build: function plot_read_counts and associated tests
Fails tests because of import error for Matplotlib
Merge request reports
Activity
requested review from @kanitz
- tests/resources/Transcript2.csv 0 → 100644
1 GENE1,624 changed this line in version 2 of the diff
3 3 flake8-docstrings 4 4 mypy 5 5 pandas 6 pytest changed this line in version 2 of the diff
- tests/test_plot_read_counts.py 0 → 100644
22 def test_invalid_input(self, dir_path, gene_id, expected): 23 """Tests invalid input.""" 24 with pytest.raises(expected): 25 plot_read_counts(dir_path, gene_id) 26 27 @pytest.mark.parametrize( 28 "dir_path, gene_id, expected", 29 [ 30 (Path(str(Path.parent())+"/tests/resources/"), 'GENE1', True), 31 (Path(str(Path.parent())+"/tests/resources/"), 'GENE1', True), 32 (Path(str(Path.parent())+"/tests/resources/"), 'GENE7', True), 33 ] 34 ) 35 def validate_input(self, dir_path, gene_id): 36 """Tests valid input with return value boolean.""" 37 assert plot_read_counts(dir_path, gene_id) is True changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
1 """Plots read counts of gene expression data.""" 2 3 import logging 4 from pathlib import Path 5 from typing import Union 6 import pandas 7 import matplotlib.pyplot as plt 8 9 """Given a GeneID, construct the histogram of read counts per cell.""" Either remove, incorporate into the module- or function-level docstring or turn into a comment. Strings, even triple-quoted ones, that do not appear as the first statement in a module, class, function or method are not parsed by tools creating automatic documentation. There are, however, still parsed by the interpreter, slowing down the performance (very very very slightly, to be fair), without having any added benefits.
changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
1 """Plots read counts of gene expression data.""" 2 3 import logging See this comment about the positioning, grouping and ordering of
import
statements: !17 (comment 24913)
- src/plot_read_counts.py 0 → 100644
31 AttributeError: No .csv files in chosen directory 32 """ 33 # Initialize logger 34 logging.basicConfig( 35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s', 36 level=logging.INFO) 37 LOGGER = logging.getLogger(__name__) 38 39 try: 40 dir_path = Path(dir_path) 41 42 if not dir_path.exists(): 43 LOGGER.critical("Invalid path") 44 raise ValueError("Invalid Path") 45 46 elif not isinstance(gene_id, str): changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
29 ValueError: Gene not found in selection of .csv files. 30 TypeError: Gene ID parameter passed is not a String. 31 AttributeError: No .csv files in chosen directory 32 """ 33 # Initialize logger 34 logging.basicConfig( 35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s', 36 level=logging.INFO) 37 LOGGER = logging.getLogger(__name__) 38 39 try: 40 dir_path = Path(dir_path) 41 42 if not dir_path.exists(): 43 LOGGER.critical("Invalid path") 44 raise ValueError("Invalid Path") changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
1 """Plots read counts of gene expression data.""" 2 3 import logging 4 from pathlib import Path 5 from typing import Union 6 import pandas 7 import matplotlib.pyplot as plt 8 9 """Given a GeneID, construct the histogram of read counts per cell.""" 10 11 12 def plot_read_counts( 13 dir_path: Union[str, Path] = None, changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
12 def plot_read_counts( 13 dir_path: Union[str, Path] = None, 14 gene_id: str = None 15 ) -> bool: 16 """Plots histogram of gene ID and read counts from .csv files. 17 18 This function reads .csv files from a directory formatted 19 "GeneID, Counts" and plots a histogram of read counts for a 20 chosen geneID. 21 22 Args: 23 dir_path: Path of directory containing .csv files of read counts. 24 gene_id: Name of gene to plot. 25 26 Raises: 27 ValueError: No value is passed for path and/or gene ID. changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
43 LOGGER.critical("Invalid path") 44 raise ValueError("Invalid Path") 45 46 elif not isinstance(gene_id, str): 47 LOGGER.critical("Invalid gene ID, please enter as string") 48 raise TypeError("Invalid gene ID") 49 50 # identify csv files from directory, formatted ['Gene', 'Count'] 51 # if this is not immediately cast as list, on some python versions 52 # it sets file_paths to null after a single call which is big sad 53 file_paths = list(dir_path.glob("*.csv")) 54 file_paths.sort() 55 56 if file_paths == 0: 57 LOGGER.warning('No csv files found in current directory, exiting') 58 raise AttributeError("No .csv files found in chosen directory") changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
18 This function reads .csv files from a directory formatted 19 "GeneID, Counts" and plots a histogram of read counts for a 20 chosen geneID. 21 22 Args: 23 dir_path: Path of directory containing .csv files of read counts. 24 gene_id: Name of gene to plot. 25 26 Raises: 27 ValueError: No value is passed for path and/or gene ID. 28 ValueError: Path passed in does not exist. 29 ValueError: Gene not found in selection of .csv files. 30 TypeError: Gene ID parameter passed is not a String. 31 AttributeError: No .csv files in chosen directory 32 """ 33 # Initialize logger - src/plot_read_counts.py 0 → 100644
25 26 Raises: 27 ValueError: No value is passed for path and/or gene ID. 28 ValueError: Path passed in does not exist. 29 ValueError: Gene not found in selection of .csv files. 30 TypeError: Gene ID parameter passed is not a String. 31 AttributeError: No .csv files in chosen directory 32 """ 33 # Initialize logger 34 logging.basicConfig( 35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s', 36 level=logging.INFO) 37 LOGGER = logging.getLogger(__name__) 38 39 try: 40 dir_path = Path(dir_path) It's best not to have such big
try
blocks, because you will get very unspecific error handling: somewhere in all of those lines something went wrong... Best to catch errors as specific as possible. Also, you don't need to catch and handle every possible error. For example, ifdir_path
is a string, it will be very unlikely thatPath(dir_path)
will fail - but if it does, an error will be raised anyway. If that happens more often than we think, we can still catch that error more specifically.changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
3 import logging 4 from pathlib import Path 5 from typing import Union 6 import pandas 7 import matplotlib.pyplot as plt 8 9 """Given a GeneID, construct the histogram of read counts per cell.""" 10 11 12 def plot_read_counts( 13 dir_path: Union[str, Path] = None, 14 gene_id: str = None 15 ) -> bool: 16 """Plots histogram of gene ID and read counts from .csv files. 17 18 This function reads .csv files from a directory formatted changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
83 % (file_path.stem)) 84 gene_count = [0] 85 86 # in case count entry is missing, interpreted as 0 87 if not gene_count: 88 gene_count = [0] 89 LOGGER.warning( 90 "File %s contains an entry for this gene but is missing a count number \ 91 this is interpreted as 0" % (file_path.stem) 92 ) 93 94 file_counter += 1 95 gene_count_dict[file_path.stem] = sum(gene_count) 96 97 except AttributeError: 98 LOGGER.fatal("Invalid selection, please verify input") changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
88 gene_count = [0] 89 LOGGER.warning( 90 "File %s contains an entry for this gene but is missing a count number \ 91 this is interpreted as 0" % (file_path.stem) 92 ) 93 94 file_counter += 1 95 gene_count_dict[file_path.stem] = sum(gene_count) 96 97 except AttributeError: 98 LOGGER.fatal("Invalid selection, please verify input") 99 raise ValueError("Either the path, gene ID, or both were not properly specified") 100 101 if sum(gene_count_dict.values()) == 0: 102 LOGGER.warning( 103 "Gene not found -> Please check for correct spelling or ID" changed this line in version 2 of the diff
- src/plot_read_counts.py 0 → 100644
102 LOGGER.warning( 103 "Gene not found -> Please check for correct spelling or ID" 104 ) 105 raise ValueError("Gene not found") 106 107 else: 108 LOGGER.info('Read complete from %s files' % (file_counter-1)) 109 110 # read in dictionary from read_counts and plot counts on the same histogram 111 file_names = list(gene_count_dict.keys()) 112 counts = list(gene_count_dict.values()) 113 114 # create and show bar plot of read counts for chosen gene 115 plt.bar(range(len(gene_count_dict)), counts, tick_label=file_names) 116 plt.title('Counts for %s' % (gene_id)) 117 plt.show() changed this line in version 2 of the diff
removed review request for @kanitz
added 1 commit
- 79401641 - refactor: outputs to file in ./plots, updated cli and tests