Skip to content
Snippets Groups Projects
Commit 9e2c6cc8 authored by Flavio Lombardo's avatar Flavio Lombardo :goal:
Browse files

add more tests and inserted global variables, DOI fix

parent 7b377281
No related branches found
No related tags found
No related merge requests found
Showing
with 151 additions and 86 deletions
......@@ -5,3 +5,7 @@
^\.github$
^LICENSE\.md$
^cran-comments\.md$
^config_drugsens\.txt$
^script_for_qupath\.txt$
^assets$
^CRAN-SUBMISSION$
......@@ -8,3 +8,5 @@
*.html
figures/**
temp/**
.DS_Store
*.txt
Package: drugsens
Title: Automated Analysis of QuPath Output Data and Metadata Extraction
Title: Automated Analysis of 'QuPath' Output Data and Metadata Extraction
Description: A comprehensive toolkit for analyzing microscopy data output from
QuPath software. Provides functionality for automated data processing,
'QuPath' software. Provides functionality for automated data processing,
metadata extraction, and statistical analysis of imaging results.
Designed to complement the STAR Protocol Protocol for quantifying drug
sensitivity in 3D patient-derived ovarian cancer models
10.1016/j.xpro.2024.103274.
<doi:10.1016/j.xpro.2024.103274>.
Version: 0.1.0
BugReports: https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues
SystemRequirements: QuPath™ 4.0.0 or higher
......
#' Reformat the counts data in longer format
#' @description
#' This function gets the count data data.frame, that has a wider format and it returns a longer-formatted data.frame
#' @title Data format changer
#' @importFrom tidyr pivot_longer
#' @importFrom dplyr select
#' @importFrom tidyselect any_of
......@@ -11,6 +12,7 @@
#' @param unique_name_row_identifier String that indicates the unique identifier for each image, defaults as "filter_image"
#' @export
#' @examples
#' \dontrun{
#' list_of_relabeling =list( "PathCellObject" = "onlyDAPIPositve",
#' "cCasp3" = "cCASP3", "E-Cadherin: cCASP3" = "E-Cadherin and cCASP3",
#' "EpCAM_E-Cadherin" = "E-Cadherin",
......@@ -20,8 +22,7 @@
#' counts_dataframe <- make_count_dataframe(bind_data)
#' plotting_ready_dataframe <-
#' change_data_format_to_longer(counts_dataframe)
# adding the image number so to identify the distribution
#' }
# pivot_longer
change_data_format_to_longer <- function(.data,
......
# Get a list of all the files that are in a user-specified folder and get a list of full paths
#' Internal utility functions for file handling
#' @name utils_internal
#' @description
#' This function lists the content of a selected folder either recursively or not
#' This file contains internal utility functions for file handling and processing
#' @keywords internal
#' @returns list
#' @name "Name", "list_of_relabeling", "marker_positivity","marker_positivity_ratio", "x", "y"
#' @importFrom utils read.csv
#' @importFrom stats setNames
#' @import roxygen2
#'
# important for the scripts
globalVariables(c(
"Name", "list_of_relabeling", "marker_positivity",
"marker_positivity_ratio", "x", "y"
))
# list all the files
list_all_files <- function(define_path, extension, recursive_search) {
......@@ -33,13 +26,14 @@ list_all_files <- function(define_path, extension, recursive_search) {
)
}
# Helper function to read and process a single file
#' @description
#' This function returns a processed single file
# Process a Single File
#' @title Process a Single File
#' @name process_file
#' @description This function returns a processed single file
#' @param file_path Path to the file
#' @param extension String File extension to filter
#' @keywords internal
#' @returns dataframe
#' @return dataframe
process_file <- function(file_path,
# relabeling_map,
extension) {
......@@ -85,27 +79,31 @@ process_file <- function(file_path,
}
#' Merge all the dataframes coming out from the QuPath
#' @name data_binding
#' @description
#' This function try to guess the string patterns that are in the dataset and then fill the dataframe
#' with that information. Finally the data is combined and combined them into one file
#' This function identifies string patterns in the dataset, fills the dataframe
#' with that information, and combines all data into a single file
#' @import knitr
#' @importFrom stringr str_extract
#' @return A `dataframe`/`tibble`.
#' @param path_to_the_projects_folder String/Path The path where the files coming out of QuPath are located
#' @param files_extension_to_look_for String The extension of the file outputted from QuPath, (default is "csv")
#' @param recursive_search Boolean, it defined the behavior of the file search, if recursive or not, (default is FALSE)
#' @returns Returns a concatenated dataframe from all the files within the indicated one
#' @param forcePath String defining an alternative path to the confic file
#' @return A concatenated dataframe from all the files within the indicated path
#' @export
#' @examples
#' \dontrun{
#' bind_data <- data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/",
#' package = "drugsens"))
#' #This will return the dataframe of all the data in the folder
#' package = "drugsens"))
#'}
# Main function to bind data from multiple files
data_binding <- function(path_to_the_projects_folder,
files_extension_to_look_for = "csv",
recursive_search = FALSE) {
recursive_search = FALSE,
forcePath = NULL) {
# run configuration file
make_run_config()
make_run_config(forcePath = forcePath)
# Validate input parameters
if (!dir.exists(path_to_the_projects_folder)) {
......
......@@ -2,12 +2,12 @@
#' @description
#' Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions
#' followed during the package development.
#'
#' @return `script_for_qupath.txt` in local working directory.
#'
#' @export
#' @examples
#' generate_qupath_script()
#' \dontrun{
#' generate_qupath_script()
#' }
generate_qupath_script <- function() {
write(
x = paste0('
......
......@@ -8,13 +8,15 @@
#' @param folder_name A string indicating the name of the folder where to save the plots in case that save_plots = TRUE
#' @param isolate_a_specific_patient A string indicating the patient name to isolate for single plot case (default is NULL)
#' @param x_plot_var A string indicating the treatment's full name for the QC plots (default is "Treatment_complete")
#'
#' @import ggplot2
#' @import ggpubr
#' @importFrom dplyr filter
#' @return A `dataframe`/`tibble`.
#' @example
#' \dontrun{get_QC_plots(longer_format_dataframe, patient_column_name = "PID", save_plots = TRUE, folder_name = "figures")}
#' @examples
#' \dontrun{
#' get_QC_plots(longer_format_dataframe, patient_column_name = "PID",
#' save_plots = TRUE, folder_name = "figures")
#' }
#' @export
get_QC_plots <- function(.data,
patient_column_name = "PID",
......@@ -33,7 +35,9 @@ get_QC_plots <- function(.data,
QC_plot <- .data |>
dplyr::filter(.data[[patient_column_name]] == i) |>
ggplot(aes(x = !!as.name(x_plot_var), y = marker_positivity_ratio, col = marker_positivity)) +
ggplot(aes(x = .data[[x_plot_var]],
y = .data$marker_positivity_ratio,
col = .data$marker_positivity)) +
geom_boxplot(
position = position_dodge(width = 1.0),
) +
......
......@@ -16,14 +16,16 @@
#' @param p_height Integer, indicate the plot's height (default is 10 inches)
#' @param p_width Integer, indicate the plot's width (default is 10 inches)
#' @param drug_column_name String, indicate the column indicating the Drug/Treament (default is "Treatment")
#'
#' @import ggplot2
#' @import ggpubr
#' @importFrom readr write_excel_csv
#' @importFrom dplyr filter
#' @return A `list`/`NULL`.
#' @example
#' \dontrun {qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE, save_list_of_plots = TRUE)}
#' @examples
#' \dontrun{
#' qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE,
#' save_list_of_plots = TRUE)
#' }
#' @export
get_QC_plots_parsed_merged_data <- function(.data,
......
......@@ -8,12 +8,22 @@
#' @param name_of_the_markers_column The name of the column of the .data where the marker names are expressed (ie E-Caderin, DAPI), "Defaults as Name"
#' @export
#' @examples
#' bind_data <- data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens"), files_extension_to_look_for = "csv")
#' \dontrun{
#' pkg_path <- system.file("extdata/to_merge/", package = "drugsens")
#' bind_data <- data_binding(
#' path_to_the_projects_folder = pkg_path,
#' files_extension_to_look_for = "csv"
#' )
#' counts_dataframe <- make_count_dataframe(bind_data)
#' plotting_ready_dataframe <- change_data_format_to_longer(counts_dataframe)
#' @example
#' \dontrun{make_count_dataframe(data, name_of_the_markers_column = "Name", unique_name_row_identifier = "filter_image")}
#' @return text file
#' plotting_ready_dataframe <- change_data_format_to_longer(
#' counts_dataframe
#' )
##' make_count_dataframe(
#' data,
#' name_of_the_markers_column = "Name",
#' unique_name_row_identifier = "filter_image"
#' )
#' }
# adding the image number so to identify the distribution
make_count_dataframe <- function(.data, unique_name_row_identifier = "filter_image",
......
# In make_run_config.R:
#' Generates and use a config txt file
#' @description
#' When this function run the first time, it will generated a config.txt file in the user working directory.
......@@ -7,16 +9,18 @@
#' @param forcePath String, Define a custom path for the config file
#' @export
#' @return A `dataframe`/`tibble`.
#' @example
#' \dontrun {make_run_config()}
#' @examples
#' \dontrun{
#' make_run_config()
#' }
make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) {
currentPath <- if (is.null(forcePath)) getwd() else forcePath
config_file <- file.path(currentPath, "config_drugsens.txt")
if (is.null(forcePath)) currentPath <- getwd() else currentPath <- forcePath
if (file.exists("config_drugsens.txt")) {
if (file.exists(config_file)) {
tryCatch(
expr = {
source("config_drugsens.txt", local = FALSE)
source(config_file, local = FALSE)
},
error = function(error) {
message("drugsens could not load the 'config.txt' file.
......@@ -26,13 +30,9 @@ make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) {
run_config to veryfy that the data was correctly read")
}
)
} else if (overwrite_config){
message("Overwriting config_drugsens.txt")
} else if (overwrite_config) {
write(
x =
(
'
# List of markers to relabel
x = '# List of markers to relabel
list_of_relabeling =
list(
"PathCellObject" = "onlyDAPIPositve",
......@@ -40,17 +40,12 @@ make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) {
"E-Cadherin: cCASP3" = "E-Cadherin and cCASP3",
"EpCAM_E-Cadherin" = "E-Cadherin",
"EpCAM_E-Cadherin and cCASP3" = "E-Cadherin and cCASP3"
)'
),
file = paste0(path.expand(currentPath), "/config_drugsens.txt")
)',
file = config_file
)
message("config_drugsens.txt has been overwritten correctly.")
} else {
write(
x =
(
'
# List of markers to relabel
x = '# List of markers to relabel
list_of_relabeling =
list(
"PathCellObject" = "onlyDAPIPositve",
......@@ -58,9 +53,8 @@ make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) {
"E-Cadherin: cCASP3" = "E-Cadherin and cCASP3",
"EpCAM_E-Cadherin" = "E-Cadherin",
"EpCAM_E-Cadherin and cCASP3" = "E-Cadherin and cCASP3"
)'
),
file = paste0(path.expand(currentPath), "/config_drugsens.txt")
)',
file = config_file
)
}
}
......@@ -9,10 +9,13 @@
#' @return A `dataframe`/`tibble`.
#' @param .data dataframe with parsed metadata
#' @examples
#' input_data <- data.frame(Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif")
#' test <- drugsens:::string_parsing(input_data)
#' @example
#' \dontrun {data.parsed <- string_parsing(.data)}
#' input_data <- data.frame(
#' Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif"
#' )
#' test <- drugsens:::string_parsing(input_data)
#' \dontrun{
#' data.parsed <- string_parsing(.data)
#' }
# Main function to bind data from multiple files
string_parsing <- function(.data) {
......
R/zzz.R 0 → 100644
.onLoad <- function(libname, pkgname) {
utils::globalVariables(c(
"list_of_relabeling",
"marker_positivity_ratio",
"marker_positivity",
"x",
"y",
"Name"
))
}
......@@ -208,7 +208,7 @@ In this code snippets we show an example of mock data `unique(bind_data$PID)` wi
> ⚠️ **WARNING**: As long as you keep the formatting as the above examples.
The dates should also be in the format **yyy-mm-dd**. For the combinations of two drugs they should be written together with each of the different drug capilized (**C**arboplatin**P**aclitaxel) and the rest lowercased letters.
For example **CarboplatinPaclitaxel_100_uM_10_nM**. This indicates a drug combination of Carboplatin 100_uM and Paclitaxel 10_nM. Each drug amount and each unit should always be separated by `_`. The first 100_uM belongs to the Carboplatin and the 10_nM belongs to the Paclitaxel. Those constrains are due to the parsing of the strings into useful metadata. If some of the data is not present, you can use a `.` separated by `_`. If you need additional data parsing, please let us know by filing an issue on GitLab [GitLab Issue]("https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues").
For example **CarboplatinPaclitaxel_100_uM_10_nM**. This indicates a drug combination of Carboplatin 100_uM and Paclitaxel 10_nM. Each drug amount and each unit should always be separated by `_`. The first 100_uM belongs to the Carboplatin and the 10_nM belongs to the Paclitaxel. Those constrains are due to the parsing of the strings into useful metadata. If some of the data is not present, you can use a `.` separated by `_`. If you need additional data parsing, please let us know by filing an issue on GitLab https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues.
### Counting the number of positiive cells for each marker in every image
This function will take the previous step's generated dataframe and it will counts image by image (sum the markers of every stack) for every sample the number of marker occurrences.
......@@ -386,6 +386,6 @@ Renv will automatically activate and install the necessary packages as specified
</details>
### Reporting Issues
If you encounter any bugs or have suggestions for improvements, please file an issue using our [GitLab Issue]("https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues"). Be sure to include as much information as possible to help us understand and address the issue.
If you encounter any bugs or have suggestions for improvements, please file an issue using our **GitLab Issue:** https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues. Be sure to include as much information as possible to help us understand and address the issue.
**BugReports:** https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues
Please make sure to file the issue in the GitLab repo as this one in GitHub is a forward-only mirror repo.
......@@ -2,7 +2,7 @@
% Please edit documentation in R/change_data_format_to_longer.R
\name{change_data_format_to_longer}
\alias{change_data_format_to_longer}
\title{Reformat the counts data in longer format}
\title{Data format changer}
\usage{
change_data_format_to_longer(
.data,
......@@ -26,7 +26,11 @@ A \code{dataframe}/\code{tibble}.
\description{
This function gets the count data data.frame, that has a wider format and it returns a longer-formatted data.frame
}
\details{
Reformat the counts data in longer format
}
\examples{
\dontrun{
list_of_relabeling =list( "PathCellObject" = "onlyDAPIPositve",
"cCasp3" = "cCASP3", "E-Cadherin: cCASP3" = "E-Cadherin and cCASP3",
"EpCAM_E-Cadherin" = "E-Cadherin",
......@@ -37,3 +41,4 @@ counts_dataframe <- make_count_dataframe(bind_data)
plotting_ready_dataframe <-
change_data_format_to_longer(counts_dataframe)
}
}
......@@ -7,7 +7,8 @@
data_binding(
path_to_the_projects_folder,
files_extension_to_look_for = "csv",
recursive_search = FALSE
recursive_search = FALSE,
forcePath = NULL
)
}
\arguments{
......@@ -16,18 +17,19 @@ data_binding(
\item{files_extension_to_look_for}{String The extension of the file outputted from QuPath, (default is "csv")}
\item{recursive_search}{Boolean, it defined the behavior of the file search, if recursive or not, (default is FALSE)}
\item{forcePath}{String defining an alternative path to the confic file}
}
\value{
A \code{dataframe}/\code{tibble}.
Returns a concatenated dataframe from all the files within the indicated one
A concatenated dataframe from all the files within the indicated path
}
\description{
This function try to guess the string patterns that are in the dataset and then fill the dataframe
with that information. Finally the data is combined and combined them into one file
This function identifies string patterns in the dataset, fills the dataframe
with that information, and combines all data into a single file
}
\examples{
\dontrun{
bind_data <- data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/",
package = "drugsens"))
#This will return the dataframe of all the data in the folder
package = "drugsens"))
}
}
......@@ -14,5 +14,7 @@ Generate a useful script to consistently save the output data from QuPath in .cs
followed during the package development.
}
\examples{
generate_qupath_script()
\dontrun{
generate_qupath_script()
}
}
......@@ -35,3 +35,9 @@ A \code{dataframe}/\code{tibble}.
\description{
Plot data to visualize immediate trends
}
\examples{
\dontrun{
get_QC_plots(longer_format_dataframe, patient_column_name = "PID",
save_plots = TRUE, folder_name = "figures")
}
}
......@@ -53,3 +53,9 @@ A \code{list}/\code{NULL}.
\description{
This plot can show trends within the dataset and run some basic statistics.
}
\examples{
\dontrun{
qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE,
save_list_of_plots = TRUE)
}
}
......@@ -19,14 +19,25 @@ make_count_dataframe(
}
\value{
A \code{dataframe}/\code{tibble}.
text file
}
\description{
This function counts every single marker present in the "Name" column of the data.frame and return a dataframe of the counts per marker
}
\examples{
bind_data <- data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens"), files_extension_to_look_for = "csv")
\dontrun{
pkg_path <- system.file("extdata/to_merge/", package = "drugsens")
bind_data <- data_binding(
path_to_the_projects_folder = pkg_path,
files_extension_to_look_for = "csv"
)
counts_dataframe <- make_count_dataframe(bind_data)
plotting_ready_dataframe <- change_data_format_to_longer(counts_dataframe)
plotting_ready_dataframe <- change_data_format_to_longer(
counts_dataframe
)
make_count_dataframe(
data,
name_of_the_markers_column = "Name",
unique_name_row_identifier = "filter_image"
)
}
}
......@@ -19,3 +19,8 @@ When this function run the first time, it will generated a config.txt file in th
It will import the data config file into the use environment. This data will be used to change the column names
of the imported dataset and change the name of the markers that is often incorrectly exported.
}
\examples{
\dontrun{
make_run_config()
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment