Skip to content
Snippets Groups Projects

Issue14 prc

Closed Madan Mukundan requested to merge issue14_PRC into main
16 unresolved threads

Build: function plot_read_counts and associated tests

Fails tests because of import error for Matplotlib

Merge request reports

Loading
Loading

Activity

Filter activity
  • Approvals
  • Assignees & reviewers
  • Comments (from bots)
  • Comments (from users)
  • Commits & branches
  • Edits
  • Labels
  • Lock status
  • Mentions
  • Merge request status
  • Tracking
1 GENE1,624
  • 3 3 flake8-docstrings
    4 4 mypy
    5 5 pandas
    6 pytest
  • 22 def test_invalid_input(self, dir_path, gene_id, expected):
    23 """Tests invalid input."""
    24 with pytest.raises(expected):
    25 plot_read_counts(dir_path, gene_id)
    26
    27 @pytest.mark.parametrize(
    28 "dir_path, gene_id, expected",
    29 [
    30 (Path(str(Path.parent())+"/tests/resources/"), 'GENE1', True),
    31 (Path(str(Path.parent())+"/tests/resources/"), 'GENE1', True),
    32 (Path(str(Path.parent())+"/tests/resources/"), 'GENE7', True),
    33 ]
    34 )
    35 def validate_input(self, dir_path, gene_id):
    36 """Tests valid input with return value boolean."""
    37 assert plot_read_counts(dir_path, gene_id) is True
  • 1 """Plots read counts of gene expression data."""
    2
    3 import logging
    4 from pathlib import Path
    5 from typing import Union
    6 import pandas
    7 import matplotlib.pyplot as plt
    8
    9 """Given a GeneID, construct the histogram of read counts per cell."""
  • Alex Kanitz
  • 1 """Plots read counts of gene expression data."""
    2
    3 import logging
  • 31 AttributeError: No .csv files in chosen directory
    32 """
    33 # Initialize logger
    34 logging.basicConfig(
    35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s',
    36 level=logging.INFO)
    37 LOGGER = logging.getLogger(__name__)
    38
    39 try:
    40 dir_path = Path(dir_path)
    41
    42 if not dir_path.exists():
    43 LOGGER.critical("Invalid path")
    44 raise ValueError("Invalid Path")
    45
    46 elif not isinstance(gene_id, str):
  • 29 ValueError: Gene not found in selection of .csv files.
    30 TypeError: Gene ID parameter passed is not a String.
    31 AttributeError: No .csv files in chosen directory
    32 """
    33 # Initialize logger
    34 logging.basicConfig(
    35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s',
    36 level=logging.INFO)
    37 LOGGER = logging.getLogger(__name__)
    38
    39 try:
    40 dir_path = Path(dir_path)
    41
    42 if not dir_path.exists():
    43 LOGGER.critical("Invalid path")
    44 raise ValueError("Invalid Path")
  • 1 """Plots read counts of gene expression data."""
    2
    3 import logging
    4 from pathlib import Path
    5 from typing import Union
    6 import pandas
    7 import matplotlib.pyplot as plt
    8
    9 """Given a GeneID, construct the histogram of read counts per cell."""
    10
    11
    12 def plot_read_counts(
    13 dir_path: Union[str, Path] = None,
  • 12 def plot_read_counts(
    13 dir_path: Union[str, Path] = None,
    14 gene_id: str = None
    15 ) -> bool:
    16 """Plots histogram of gene ID and read counts from .csv files.
    17
    18 This function reads .csv files from a directory formatted
    19 "GeneID, Counts" and plots a histogram of read counts for a
    20 chosen geneID.
    21
    22 Args:
    23 dir_path: Path of directory containing .csv files of read counts.
    24 gene_id: Name of gene to plot.
    25
    26 Raises:
    27 ValueError: No value is passed for path and/or gene ID.
  • 43 LOGGER.critical("Invalid path")
    44 raise ValueError("Invalid Path")
    45
    46 elif not isinstance(gene_id, str):
    47 LOGGER.critical("Invalid gene ID, please enter as string")
    48 raise TypeError("Invalid gene ID")
    49
    50 # identify csv files from directory, formatted ['Gene', 'Count']
    51 # if this is not immediately cast as list, on some python versions
    52 # it sets file_paths to null after a single call which is big sad
    53 file_paths = list(dir_path.glob("*.csv"))
    54 file_paths.sort()
    55
    56 if file_paths == 0:
    57 LOGGER.warning('No csv files found in current directory, exiting')
    58 raise AttributeError("No .csv files found in chosen directory")
  • Alex Kanitz
  • 18 This function reads .csv files from a directory formatted
    19 "GeneID, Counts" and plots a histogram of read counts for a
    20 chosen geneID.
    21
    22 Args:
    23 dir_path: Path of directory containing .csv files of read counts.
    24 gene_id: Name of gene to plot.
    25
    26 Raises:
    27 ValueError: No value is passed for path and/or gene ID.
    28 ValueError: Path passed in does not exist.
    29 ValueError: Gene not found in selection of .csv files.
    30 TypeError: Gene ID parameter passed is not a String.
    31 AttributeError: No .csv files in chosen directory
    32 """
    33 # Initialize logger
    • This should be set in the application-level code, not in the function. E.g., in a module cli.py or similar, which contains the command-line interface and main() code to call this function with the command-line params provided by the user.

    • Please register or sign in to reply
  • 25
    26 Raises:
    27 ValueError: No value is passed for path and/or gene ID.
    28 ValueError: Path passed in does not exist.
    29 ValueError: Gene not found in selection of .csv files.
    30 TypeError: Gene ID parameter passed is not a String.
    31 AttributeError: No .csv files in chosen directory
    32 """
    33 # Initialize logger
    34 logging.basicConfig(
    35 format='[%(asctime)s: %(levelname)s] "%(module)s" %(message)s',
    36 level=logging.INFO)
    37 LOGGER = logging.getLogger(__name__)
    38
    39 try:
    40 dir_path = Path(dir_path)
    • It's best not to have such big try blocks, because you will get very unspecific error handling: somewhere in all of those lines something went wrong... Best to catch errors as specific as possible. Also, you don't need to catch and handle every possible error. For example, if dir_path is a string, it will be very unlikely that Path(dir_path) will fail - but if it does, an error will be raised anyway. If that happens more often than we think, we can still catch that error more specifically.

    • Madan Mukundan changed this line in version 2 of the diff

      changed this line in version 2 of the diff

    • Please register or sign in to reply
  • 3 import logging
    4 from pathlib import Path
    5 from typing import Union
    6 import pandas
    7 import matplotlib.pyplot as plt
    8
    9 """Given a GeneID, construct the histogram of read counts per cell."""
    10
    11
    12 def plot_read_counts(
    13 dir_path: Union[str, Path] = None,
    14 gene_id: str = None
    15 ) -> bool:
    16 """Plots histogram of gene ID and read counts from .csv files.
    17
    18 This function reads .csv files from a directory formatted
  • 83 % (file_path.stem))
    84 gene_count = [0]
    85
    86 # in case count entry is missing, interpreted as 0
    87 if not gene_count:
    88 gene_count = [0]
    89 LOGGER.warning(
    90 "File %s contains an entry for this gene but is missing a count number \
    91 this is interpreted as 0" % (file_path.stem)
    92 )
    93
    94 file_counter += 1
    95 gene_count_dict[file_path.stem] = sum(gene_count)
    96
    97 except AttributeError:
    98 LOGGER.fatal("Invalid selection, please verify input")
  • 88 gene_count = [0]
    89 LOGGER.warning(
    90 "File %s contains an entry for this gene but is missing a count number \
    91 this is interpreted as 0" % (file_path.stem)
    92 )
    93
    94 file_counter += 1
    95 gene_count_dict[file_path.stem] = sum(gene_count)
    96
    97 except AttributeError:
    98 LOGGER.fatal("Invalid selection, please verify input")
    99 raise ValueError("Either the path, gene ID, or both were not properly specified")
    100
    101 if sum(gene_count_dict.values()) == 0:
    102 LOGGER.warning(
    103 "Gene not found -> Please check for correct spelling or ID"
  • 102 LOGGER.warning(
    103 "Gene not found -> Please check for correct spelling or ID"
    104 )
    105 raise ValueError("Gene not found")
    106
    107 else:
    108 LOGGER.info('Read complete from %s files' % (file_counter-1))
    109
    110 # read in dictionary from read_counts and plot counts on the same histogram
    111 file_names = list(gene_count_dict.keys())
    112 counts = list(gene_count_dict.values())
    113
    114 # create and show bar plot of read counts for chosen gene
    115 plt.bar(range(len(gene_count_dict)), counts, tick_label=file_names)
    116 plt.title('Counts for %s' % (gene_id))
    117 plt.show()
  • Alex Kanitz removed review request for @kanitz

    removed review request for @kanitz

  • Madan Mukundan added 1 commit

    added 1 commit

    • 79401641 - refactor: outputs to file in ./plots, updated cli and tests

    Compare with previous version

  • closed

  • Please register or sign in to reply
    Loading