diff --git a/DESCRIPTION b/DESCRIPTION index de1cc47ca88da1aecff1e86bbbdc1d0406d43eba..fac926bb5a3ea04db89510c6035123c60265c151 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ Version: 0.1.0 BugReports: https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues SystemRequirements: QuPathâ„¢ 4.0.0 or higher Authors@R: c( - person("Flavio", "Lombardo", , "flavio.lombardo@unibas.ch", role = c("aut", "cre", "cph")), + person("Flavio", "Lombardo", , "flavio.lombardo@unibas.ch", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-4853-6838")), person("Ricardo Coelho", role = c("cph")), person("Ovarian Cancer Research", role = c("cph")), person("University of Basel and University Hospital Basel", role = c("cph")) @@ -19,16 +19,16 @@ URL: https://git.scicore.unibas.ch/ovca-research/drugsens/ Maintainer: Flavio Lombardo <flavio.lombardo@unibas.ch> License: MIT + file LICENSE Imports: - utils, dplyr, - tidyr, - readr, - stringr, - knitr, ggplot2, ggpubr, + knitr, roxygen2, + stats, + stringr, + tidyr, tidyselect, + utils, testthat (>= 3.0.0) Depends: R (>= 4.2) diff --git a/NAMESPACE b/NAMESPACE index 1bec287b8e48a624996bca6aaa64e8b52d92607d..b73a5960e5d797af505889204edec8d9b92ac989 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,7 +15,18 @@ import(roxygen2) import(testthat) importFrom(dplyr,filter) importFrom(dplyr,select) -importFrom(readr,write_excel_csv) +importFrom(ggplot2,aes) +importFrom(ggplot2,aes_string) +importFrom(ggplot2,element_text) +importFrom(ggplot2,facet_wrap) +importFrom(ggplot2,geom_boxplot) +importFrom(ggplot2,geom_jitter) +importFrom(ggplot2,geom_violin) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,labs) +importFrom(ggplot2,position_jitter) +importFrom(ggplot2,stat_summary) +importFrom(ggplot2,theme) importFrom(stats,setNames) importFrom(stringr,str_count) importFrom(stringr,str_extract) diff --git a/R/generate_qu_path_script.R b/R/generate_qupath_script.R similarity index 83% rename from R/generate_qu_path_script.R rename to R/generate_qupath_script.R index f006a2c632230f281f2b216eb85e00c636564af6..487b2548bed216c39e8e3ec4fedc773c3d762e81 100644 --- a/R/generate_qu_path_script.R +++ b/R/generate_qupath_script.R @@ -2,13 +2,24 @@ #' @description #' Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions #' followed during the package development. -#' @return `script_for_qupath.txt` in local working directory. +#' @param output_dir Directory where the script should be saved. If NULL, uses tempdir() +#' @return Invisibly returns the path to the generated script file. #' @export #' @examples #' \dontrun{ +#' # Generate script in a temporary directory #' generate_qupath_script() +#' +#' # Generate script in a specific directory +#' output_dir <- tempdir() +#' generate_qupath_script(output_dir = output_dir) #' } -generate_qupath_script <- function() { +generate_qupath_script <- function(output_dir = NULL) { + if(is.null(output_dir)) { + output_dir <- tempdir() + } + output_file <- file.path(output_dir, "script_for_qupath.txt") + write( x = paste0(' //This code script was tested with QuPath 4 @@ -53,9 +64,9 @@ def exporter = new MeasurementExporter() print "Done!" '), - file = paste0(path.expand(getwd()), "/script_for_qupath.txt") + file = output_file ) message("You can now take the script and personalize it to your needs") - message(paste0(Sys.time(), " The script file was generated here: ", getwd(), "/")) + message(paste0(Sys.time(), " The script file was generated here: ", output_file, "/")) message(paste0(Sys.time(), " Please make sure to follow the name convention here proposed, or it might fail to get all the information")) } diff --git a/R/get_QC_plots.R b/R/get_QC_plots.R index 8a5336f3235507a92e5e8bf075c99aa550c3f70e..7bd93af3305a15d7ead8ec4cb90d4037270fa4ad 100644 --- a/R/get_QC_plots.R +++ b/R/get_QC_plots.R @@ -1,85 +1,157 @@ #' Plot some QC plots to define that everything ran correctly #' @description -#' Plot data to visualize immediate trends -#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized +#' Plot data to visualize immediate trends. This function expects data that has been processed +#' through make_count_dataframe() and change_data_format_to_longer() to ensure the correct +#' data structure for plotting. +#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) +#' merged data.frame that should be visualized #' @param patient_column_name The PID's column name in the merged data.frame (defaults to "PID") -#' @param colors A list of colors to supply to personalize the plot, as default 4 colors c("dark green", "red", "orange", "pink") -#' @param save_plots A Boolean value indicating if the plots should be saved or not, TRUE for saving in the current working directory, FALSE to not. Default is FALSE -#' @param folder_name A string indicating the name of the folder where to save the plots in case that save_plots = TRUE +#' @param colors A list of colors to supply to personalize the plot, defaults to c("darkgreen", "red", "orange", "pink") +#' @param save_plots A Boolean value indicating if the plots should be saved or not (default is FALSE) +#' @param folder_name A string indicating the name of the folder where to save the plots if save_plots is TRUE #' @param isolate_a_specific_patient A string indicating the patient name to isolate for single plot case (default is NULL) #' @param x_plot_var A string indicating the treatment's full name for the QC plots (default is "Treatment_complete") #' @import ggplot2 #' @import ggpubr #' @importFrom dplyr filter -#' @return A `dataframe`/`tibble`. +#' @return Invisibly returns NULL, but saves plots to disk if save_plots is TRUE #' @examples #' \dontrun{ -#' get_QC_plots(longer_format_dataframe, patient_column_name = "PID", -#' save_plots = TRUE, folder_name = "figures") +#' # First process example data +#' example_path <- system.file("extdata/to_merge/", package = "drugsens") +#' raw_data <- data_binding(path_to_the_projects_folder = example_path) +#' count_data <- make_count_dataframe(raw_data) +#' processed_data <- change_data_format_to_longer(count_data) +#' +#' # Create and save plots to temporary directory +#' temp_dir <- file.path(tempdir(), "qc_plots") +#' get_QC_plots( +#' processed_data, +#' save_plots = TRUE, +#' folder_name = temp_dir +#' ) +#' +#' # Create plots for a specific patient +#' get_QC_plots( +#' processed_data, +#' isolate_a_specific_patient = "B39", +#' save_plots = TRUE, +#' folder_name = temp_dir +#' ) #' } #' @export get_QC_plots <- function(.data, patient_column_name = "PID", colors = c("darkgreen", "red", "orange", "pink"), save_plots = FALSE, - folder_name = "figures", + folder_name = NULL, x_plot_var = "Treatment_complete", isolate_a_specific_patient = NULL) { - if (!is.null(isolate_a_specific_patient)) .data <- .data[.data[[patient_column_name]] == isolate_a_specific_patient, ] - if (nrow(.data) < 1) stop("The data cannot be empty") + # Input validation + if (!is.data.frame(.data)) { + stop("Input must be a data frame") + } - # run for every unique PID the QC plot - for (i in unique(.data[patient_column_name])) { - message(paste0("Running the QC plot function for PID: ", i)) + # Check required columns exist + required_cols <- c(patient_column_name, x_plot_var, "marker_positivity", "marker_positivity_ratio") + missing_cols <- setdiff(required_cols, colnames(.data)) + if (length(missing_cols) > 0) { + stop("Missing required columns: ", paste(missing_cols, collapse = ", "), + ". Please ensure data has been processed with make_count_dataframe() and change_data_format_to_longer()") + } - QC_plot <- .data |> - dplyr::filter(.data[[patient_column_name]] == i) |> - ggplot(aes(x = .data[[x_plot_var]], - y = .data$marker_positivity_ratio, - col = .data$marker_positivity)) + - geom_boxplot( - position = position_dodge(width = 1.0), - ) + - facet_wrap(~marker_positivity) + - geom_jitter(width = 0.15) + - theme_light() + - labs(title = paste0("Cell marker ratios for PID: ", i), color = "Cell marker") + - ylab("Percentage of expression marker (marker-positive-cells/total_cell_count)") + - xlab("Drugs") + - theme(axis.text.x = element_text(angle = 45, hjust = 1.0)) + - scale_color_manual(values = colors) + - stat_summary( - fun = "median", geom = "pointrange", - mapping = aes(xend = after_stat(x) - 0.25, yend = after_stat(y)), - size = 1.5, alpha = 1.0, - position = position_dodge(width = 1) - ) + - stat_summary( - geom = "line", fun = "median", position = position_dodge(width = 1), - size = 1, alpha = 0.3, aes(group = marker_positivity) + # Filter for specific patient if requested + if (!is.null(isolate_a_specific_patient)) { + .data <- .data[.data[[patient_column_name]] == isolate_a_specific_patient, ] + if (nrow(.data) < 1) { + stop("No data found for patient: ", isolate_a_specific_patient) + } + } + + # Set up output directory if saving plots + if (save_plots) { + if (is.null(folder_name)) { + folder_name <- file.path(tempdir(), "figures") + } + dir.create(folder_name, showWarnings = FALSE, recursive = TRUE) + } + + # Process each patient + for (current_pid in unique(.data[[patient_column_name]])) { + message("Processing patient: ", current_pid) + + # Filter data for current patient + patient_data <- dplyr::filter(.data, .data[[patient_column_name]] == current_pid) + + # Create the plot + QC_plot <- ggplot2::ggplot(patient_data, + ggplot2::aes(x = .data[[x_plot_var]], + y = marker_positivity_ratio, + color = marker_positivity)) + + ggplot2::geom_boxplot(position = ggplot2::position_dodge(width = 1.0)) + + ggplot2::facet_wrap(~marker_positivity) + + ggplot2::geom_jitter(width = 0.15) + + ggplot2::theme_light() + + ggplot2::labs( + title = paste0("Cell marker ratios for PID: ", current_pid), + color = "Cell marker", + y = "Percentage of expression marker (marker-positive-cells/total_cell_count)", + x = "Drugs" ) + - theme( - axis.title.x = element_blank(), - plot.title = element_text(hjust = 0.5), - axis.ticks.x = element_blank(), - panel.grid = element_blank(), - strip.background = element_rect( + ggplot2::theme( + axis.text.x = ggplot2::element_text(angle = 45, hjust = 1.0), + axis.title.x = ggplot2::element_blank(), + plot.title = ggplot2::element_text(hjust = 0.5), + axis.ticks.x = ggplot2::element_blank(), + panel.grid = ggplot2::element_blank(), + strip.background = ggplot2::element_rect( colour = "black", fill = "grey1" ) + ) + + ggplot2::scale_color_manual(values = colors) + + ggplot2::stat_summary( + fun = "median", + geom = "point", + size = 3, + position = ggplot2::position_dodge(width = 1) + ) + + ggplot2::stat_summary( + geom = "line", + fun = "median", + position = ggplot2::position_dodge(width = 1), + linewidth = 1, + alpha = 0.3, + aes(group = marker_positivity) ) + # Save the plot if requested if (save_plots) { - if (!dir.exists(paths = paste0(getwd(), "/", folder_name, "/"))) dir.create(path = paste0(getwd(), "/", folder_name, "/"), showWarnings = F, recursive = T) + plot_filename <- file.path( + folder_name, + paste0("patients_QC_box_plots_", + current_pid, + "_median", + format(Sys.Date(), "%Y-%m-%d"), + ".pdf" + ) + ) - ggsave(QC_plot, - filename = paste0(folder_name, "/", "patients_QC_box_plots_", i, "_", "median", Sys.Date(), ".pdf"), + ggplot2::ggsave( + filename = plot_filename, + plot = QC_plot, device = "pdf", height = 12, width = 12 ) } } - message(paste0("If save_plots = TRUE, the plots will be saved here:", paste0(folder_name, "/", "patients_QC_box_plots_", "median", Sys.Date(), ".pdf"))) + + # Final message about plot locations + if (save_plots) { + message("Plots have been saved in: ", folder_name) + } + + invisible(NULL) } diff --git a/R/get_QC_plots_and_stats.R b/R/get_QC_plots_and_stats.R index 4daf74c4c6a2ac93dd9dd24347405dd2329f85e5..35ea9d7016eb89e9044f04bf176ad28cc3e150d8 100644 --- a/R/get_QC_plots_and_stats.R +++ b/R/get_QC_plots_and_stats.R @@ -1,37 +1,60 @@ -#' Plot some QC plots for the bound data +#' Plot QC plots and calculate statistics for bound data #' @description -#' This plot can show trends within the dataset and run some basic statistics. -#' -#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized -#' @param list_of_columns_to_plot The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized -#' @param save_plots Boolean, TRUE if plots should be saved (default is FALSE) -#' @param saving_plots_folder String indicating the folder where the plots should be stored (default is "figures") -#' @param PID_column_name String, indicating the name of the sample to subset (default is "PID") -#' @param isolate_specific_drug String, indicating if there should be a Treatment specific data subset (default is NULL) -#' @param isolate_specific_patient String, indicating a spacific sample to plot only (default is NULL) -#' @param PID_column_name String, indicating the name of the sample to subset (default is "Treatment") -#' @param save_list_of_plots Boolean, if TRUE returns a named list of all the plots ran (default is TRUE), this can be usefult to isolate specific plots -#' @param save_plots_in_patient_specific_subfolders Boolean, if TRUE the plots will be saved (if `save_plots` TRUE) in sample specific folders (default is TRUE) -#' @param fill_color_variable Boolean, String, indicating the name of the variable (discrete) to use for the plot's filling -#' @param p_height Integer, indicate the plot's height (default is 10 inches) -#' @param p_width Integer, indicate the plot's width (default is 10 inches) -#' @param drug_column_name String, indicate the column indicating the Drug/Treament (default is "Treatment") -#' @import ggplot2 -#' @import ggpubr -#' @importFrom readr write_excel_csv -#' @importFrom dplyr filter -#' @return A `list`/`NULL`. +#' This function creates quality control plots and calculates basic statistics for microscopy data. +#' The plots provide visual insights into marker expression patterns and data quality. +#' @param .data The preprocessed data frame to analyze +#' @param list_of_columns_to_plot Columns to include in plots. If NULL, all numeric columns are used. +#' @param save_plots Logical, whether to save plots to files. Defaults to FALSE. +#' @param saving_plots_folder Directory for saving plots. If NULL and save_plots=TRUE, uses a subdirectory of tempdir(). +#' @param save_plots_in_patient_specific_subfolders Logical, whether to create patient subdirectories. Defaults to TRUE. +#' @param fill_color_variable Variable name for plot color filling +#' @param PID_column_name Column name for patient IDs. Defaults to "PID". +#' @param isolate_specific_drug Drug name to subset data +#' @param isolate_specific_patient Patient ID to subset data +#' @param drug_column_name Column name for drug information. Defaults to "Treatment". +#' @param save_list_of_plots Logical, whether to return list of plot objects. Defaults to TRUE. +#' @param p_height Plot height in inches. Defaults to 10. +#' @param p_width Plot width in inches. Defaults to 10. +#' @param verbose Logical, whether to show progress messages. Defaults to TRUE. +#' @return If save_list_of_plots=TRUE, returns a named list of ggplot objects. Otherwise returns invisible(NULL). +#' @importFrom ggplot2 ggplot aes geom_violin geom_boxplot facet_wrap theme element_text labs +#' geom_jitter position_jitter stat_summary aes_string #' @examples #' \dontrun{ -#' qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE, -#' save_list_of_plots = FALSE) +#' # First load and process example data +#' example_path <- system.file("extdata/to_merge/", package = "drugsens") +#' raw_data <- data_binding(path_to_the_projects_folder = example_path) +#' count_data <- make_count_dataframe(raw_data) +#' processed_data <- change_data_format_to_longer(count_data) +#' +#' # Basic usage - create plots for all patients +#' plots <- get_QC_plots_parsed_merged_data(processed_data) +#' +#' # Save plots to a temporary directory +#' temp_dir <- file.path(tempdir(), "qc_plots") +#' plots <- get_QC_plots_parsed_merged_data( +#' processed_data, +#' save_plots = TRUE, +#' saving_plots_folder = temp_dir +#' ) +#' +#' # Focus on a specific patient +#' plots <- get_QC_plots_parsed_merged_data( +#' processed_data, +#' isolate_specific_patient = "B39" +#' ) +#' +#' # Color plots by tissue type +#' plots <- get_QC_plots_parsed_merged_data( +#' processed_data, +#' fill_color_variable = "Tissue" +#' ) #' } #' @export - get_QC_plots_parsed_merged_data <- function(.data, list_of_columns_to_plot = NULL, save_plots = FALSE, - saving_plots_folder = "figures", + saving_plots_folder = NULL, save_plots_in_patient_specific_subfolders = TRUE, fill_color_variable = NULL, PID_column_name = "PID", @@ -40,127 +63,153 @@ get_QC_plots_parsed_merged_data <- function(.data, drug_column_name = "Treatment", save_list_of_plots = TRUE, p_height = 10, - p_width = 10) { - # List where plots could be stored - list_plottos <- list() + p_width = 10, + verbose = TRUE) { + + # Define the helper function for creating individual QC plots + create_qc_plot <- function(data, metric, fill_var, pid, drug) { + p <- ggplot2::ggplot(data, ggplot2::aes(x = marker_positivity, y = unlist(data[[metric]]))) + + if (!is.null(fill_var)) { + p <- p + ggplot2::geom_violin(trim = FALSE, + ggplot2::aes_string(fill = fill_var), + color = NA) + } else { + p <- p + ggplot2::geom_violin(trim = FALSE, + fill = "#A4A4A4", + color = "darkred") + } - if (!is.data.frame(.data) | nrow(.data) < 1) stop("ERROR: the data provided must be not empty of dataframe type.") + p <- p + + theme_minimal() + + ggplot2::geom_boxplot(width = 0.1, fill = "white") + + ggplot2::facet_wrap(~Treatment) + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) + + ggplot2::labs( + title = metric, + x = "Cell Markers", + y = paste0("Intensity of ", metric, " (log2)"), + subtitle = paste0(pid, ".", drug) + ) + + ggplot2::geom_jitter(shape = 16, + position = ggplot2::position_jitter(0.01)) + + ggplot2::stat_summary(geom = "crossbar", + fun = mean, + colour = "red", + width = 0.21) + + return(p) + } - # get the number of possible plotting variables - if (is.null(list_of_columns_to_plot)) { - list_of_columns_to_plot <- colnames(.data)[which(sapply(.data, is.numeric))] + # Input validation + if (!is.data.frame(.data) || nrow(.data) < 1) { + stop("Input must be a non-empty data frame") } - # check that the fill_color_variable is in the dataset and not null - if (!is.null(fill_color_variable) & !fill_color_variable %in% colnames(.data)) stop("ERROR: the fill_color_variable must be in the colum names variables.") + # Initialize plot storage + list_plottos <- list() - # if the user decides to isolate a specific sample only - if (!is.null(isolate_specific_patient)) .data <- .data[.data[[PID_column_name]] == isolate_specific_patient, ] + # If user requested to isolate specific patient, filter the data + if (!is.null(isolate_specific_patient)) { + .data <- .data[.data[[PID_column_name]] == isolate_specific_patient, ] + if (nrow(.data) < 1) { + stop("No data found for specified patient: ", isolate_specific_patient) + } + } + + # Set up output directory if saving plots + if (save_plots) { + # Initialize the base directory for plots + saving_plots_folder <- if (is.null(saving_plots_folder)) { + file.path(tempdir(), "drugsens_plots") + } else { + saving_plots_folder + } + dir.create(saving_plots_folder, showWarnings = FALSE, recursive = TRUE) + } + + # Determine columns to plot + if (is.null(list_of_columns_to_plot)) { + list_of_columns_to_plot <- colnames(.data)[sapply(.data, is.numeric)] + } + # Process each patient for (pid in unique(.data[[PID_column_name]])) { + if (verbose) { + message("Processing patient: ", pid) + } + + # Subset data for current patient subset_data <- .data[.data[[PID_column_name]] == pid, ] - for (i in list_of_columns_to_plot) { - if (!is.null(isolate_specific_drug)) subset_data <- subset_data[subset_data[[drug_column_name]] %in% isolate_specific_drug, ] + # Apply drug filter if specified + if (!is.null(isolate_specific_drug)) { + subset_data <- subset_data[subset_data[[drug_column_name]] %in% isolate_specific_drug, ] + } - if (nrow(subset_data) < 1) { - message(unique(subset_data[[PID_column_name]])) - message(unique(subset_data[[drug_column_name]])) - stop("ERROR: Your filtering query has returned no observations") + # Skip if no data after filtering + if (nrow(subset_data) < 1) { + if (verbose) { + message("No data found for PID: ", pid) } + next + } - # browser() + # Create patient directory if needed + if (save_plots && save_plots_in_patient_specific_subfolders) { + patient_dir <- file.path(saving_plots_folder, pid) + dir.create(patient_dir, showWarnings = FALSE, recursive = TRUE) + } + + # Process each metric + for (i in list_of_columns_to_plot) { + # Create plot + p <- create_qc_plot(subset_data, i, fill_color_variable, + pid, isolate_specific_drug) - # Function to dynamically add layers to a ggplot object based on conditions - add_violin_layers <- function(p, fill_color_variable) { - if (!is.null(fill_color_variable)) { - p <- p + geom_violin(trim = FALSE, aes_string(fill = fill_color_variable), color = NA) + - geom_boxplot(width = 0.1, fill = "white") + # Save plot if requested + if (save_plots) { + plot_file <- sprintf("%s_%s_%s_%s.pdf", + format(Sys.Date(), "%Y%m%d"), + pid, + ifelse(is.null(isolate_specific_drug), "all", isolate_specific_drug), + make.names(i)) + + # Determine save directory + save_dir <- if (save_plots_in_patient_specific_subfolders) { + file.path(saving_plots_folder, pid) } else { - p <- p + geom_violin(trim = FALSE, fill = "#A4A4A4", color = "darkred") + - geom_boxplot(width = 0.1, fill = "white") + saving_plots_folder } - return(p) - } - # Initialize ggplot - p <- ggplot(subset_data, aes(x = Name, y = log2(unlist(subset_data[[i]])))) - - # Add violin and boxplot layers - p <- add_violin_layers(p, fill_color_variable) - - # More layers on top - p <- p + facet_wrap(~Treatment) + - theme(axis.text.x = element_text(angle = 45, hjust = 1)) + - labs( - title = colnames(subset_data[i]), - x = "Cell Markers", - y = paste0("Intensity of ", colnames(subset_data[i]), " (log2)"), - subtitle = paste0(pid, ".", isolate_specific_drug) - ) + - geom_jitter(shape = 16, position = position_jitter(0.01)) + - stat_summary(geom = "crossbar", fun = mean, colour = "red", width = 0.21) - - # Conditionally add to list of plots - if (save_list_of_plots) { - list_plottos[[paste0(isolate_specific_drug, ".", pid, ".", i)]] <- p - } + plot_path <- file.path(save_dir, plot_file) + ggplot2::ggsave(plot_path, + plot = p, + width = p_width, + height = p_height, + dpi = 600) - if (save_plots) { - if (save_plots_in_patient_specific_subfolders) { - - if (!dir.exists(paste0(saving_plots_folder, "/", pid))) dir.create(paste0(saving_plots_folder, "/", pid), showWarnings = F, recursive = T) - ggsave( - plot = p, - filename = paste0( - paste0(saving_plots_folder, "/", pid), - "/", - Sys.Date(), - "_", - pid, - ".", - isolate_specific_drug, - ".", - colnames(.data[i]), - ".pdf" - ), - device = "pdf", - dpi = 600 - ) - } else { - # Saving plots in .pdf at 600 dpi - if (!dir.exists(saving_plots_folder)) dir.create(saving_plots_folder, showWarnings = F, recursive = T) - ggsave( - plot = p, - width = p_width, - height = p_height, - filename = paste0( - saving_plots_folder, - "/", - Sys.Date(), - "_", - pid, - ".", - isolate_specific_drug, - ".", - colnames(.data[i]), - ".pdf" - ), - device = "pdf", - dpi = 600, - ) + if (verbose) { + message("Saved plot to: ", plot_path) } + } - message(paste0( - "plots for: ", - pid, - ".", - isolate_specific_drug, - ".", - colnames(.data[i]), " saved" - )) + # Store plot if requested + if (save_list_of_plots) { + plot_name <- paste(pid, i, sep = ".") + if (!is.null(isolate_specific_drug)) { + plot_name <- paste(isolate_specific_drug, plot_name, sep = ".") + } + list_plottos[[plot_name]] <- p } } } + + # Return results + if (save_list_of_plots) { + return(list_plottos) + } else { + invisible(NULL) + } } diff --git a/R/make_count_dataframe.R b/R/make_count_dataframe.R index 2bc835856376b17266125e93ca74bfa9a46cbf24..edd3564065b8ba7f72578b59816426a740215f14 100644 --- a/R/make_count_dataframe.R +++ b/R/make_count_dataframe.R @@ -18,7 +18,7 @@ #' plotting_ready_dataframe <- change_data_format_to_longer( #' counts_dataframe #' ) -##' make_count_dataframe( +# make_count_dataframe( #' data, #' name_of_the_markers_column = "Name", #' unique_name_row_identifier = "filter_image" diff --git a/R/make_run_config.R b/R/make_run_config.R index ab009a74c835fe90931b0d7eb36972c3eb8dcd61..3ae38e3ec352238a6f1b08a540094de5905c0497 100644 --- a/R/make_run_config.R +++ b/R/make_run_config.R @@ -14,7 +14,8 @@ #' make_run_config() #' } make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) { - currentPath <- if (is.null(forcePath)) getwd() else forcePath + + currentPath <- if (is.null(forcePath)) tempdir() else forcePath config_file <- file.path(currentPath, "config_drugsens.txt") if (file.exists(config_file)) { diff --git a/R/parsers.R b/R/parsers.R index 034cac0ec7c921aa9c13bb3032273755da56eb10..72c6055d08cc7824d21d5659a875acde753b1d1b 100644 --- a/R/parsers.R +++ b/R/parsers.R @@ -1,4 +1,4 @@ -#' Main parsing function +#' @title Parse image filenames to extract metadata #' @description #' This function will parse the data from the Image name and will return the metadata there contained #' The metadata will be then associated to the count file as well diff --git a/man/generate_qupath_script.Rd b/man/generate_qupath_script.Rd index 89b18160e079a24ebf48d517e7b513df0efa10ce..2f7f74d87817211de857e2d02c38cb8ee670165c 100644 --- a/man/generate_qupath_script.Rd +++ b/man/generate_qupath_script.Rd @@ -1,13 +1,16 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/generate_qu_path_script.R +% Please edit documentation in R/generate_qupath_script.R \name{generate_qupath_script} \alias{generate_qupath_script} \title{Generate the groovy script used for the analysis} \usage{ -generate_qupath_script() +generate_qupath_script(output_dir = NULL) +} +\arguments{ +\item{output_dir}{Directory where the script should be saved. If NULL, uses tempdir()} } \value{ -\code{script_for_qupath.txt} in local working directory. +Invisibly returns the path to the generated script file. } \description{ Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions @@ -15,6 +18,11 @@ followed during the package development. } \examples{ \dontrun{ + # Generate script in a temporary directory generate_qupath_script() + + # Generate script in a specific directory + output_dir <- tempdir() + generate_qupath_script(output_dir = output_dir) } } diff --git a/man/get_QC_plots.Rd b/man/get_QC_plots.Rd index 0b4825e4aded1c593f206b2172c7f3b463ef1b1d..14cc7ab84cde079df6b6a4234d5bd0d8a6cf3e5f 100644 --- a/man/get_QC_plots.Rd +++ b/man/get_QC_plots.Rd @@ -9,35 +9,57 @@ get_QC_plots( patient_column_name = "PID", colors = c("darkgreen", "red", "orange", "pink"), save_plots = FALSE, - folder_name = "figures", + folder_name = NULL, x_plot_var = "Treatment_complete", isolate_a_specific_patient = NULL ) } \arguments{ -\item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} +\item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) +merged data.frame that should be visualized} \item{patient_column_name}{The PID's column name in the merged data.frame (defaults to "PID")} -\item{colors}{A list of colors to supply to personalize the plot, as default 4 colors c("dark green", "red", "orange", "pink")} +\item{colors}{A list of colors to supply to personalize the plot, defaults to c("darkgreen", "red", "orange", "pink")} -\item{save_plots}{A Boolean value indicating if the plots should be saved or not, TRUE for saving in the current working directory, FALSE to not. Default is FALSE} +\item{save_plots}{A Boolean value indicating if the plots should be saved or not (default is FALSE)} -\item{folder_name}{A string indicating the name of the folder where to save the plots in case that save_plots = TRUE} +\item{folder_name}{A string indicating the name of the folder where to save the plots if save_plots is TRUE} \item{x_plot_var}{A string indicating the treatment's full name for the QC plots (default is "Treatment_complete")} \item{isolate_a_specific_patient}{A string indicating the patient name to isolate for single plot case (default is NULL)} } \value{ -A \code{dataframe}/\code{tibble}. +Invisibly returns NULL, but saves plots to disk if save_plots is TRUE } \description{ -Plot data to visualize immediate trends +Plot data to visualize immediate trends. This function expects data that has been processed +through make_count_dataframe() and change_data_format_to_longer() to ensure the correct +data structure for plotting. } \examples{ \dontrun{ - get_QC_plots(longer_format_dataframe, patient_column_name = "PID", - save_plots = TRUE, folder_name = "figures") +# First process example data +example_path <- system.file("extdata/to_merge/", package = "drugsens") +raw_data <- data_binding(path_to_the_projects_folder = example_path) +count_data <- make_count_dataframe(raw_data) +processed_data <- change_data_format_to_longer(count_data) + +# Create and save plots to temporary directory +temp_dir <- file.path(tempdir(), "qc_plots") +get_QC_plots( + processed_data, + save_plots = TRUE, + folder_name = temp_dir +) + +# Create plots for a specific patient +get_QC_plots( + processed_data, + isolate_a_specific_patient = "B39", + save_plots = TRUE, + folder_name = temp_dir +) } } diff --git a/man/get_QC_plots_parsed_merged_data.Rd b/man/get_QC_plots_parsed_merged_data.Rd index de2b2e59456f854836d974661dd327f540b712f0..084f98acc5d53208636c47c22dfffccb29ceb34c 100644 --- a/man/get_QC_plots_parsed_merged_data.Rd +++ b/man/get_QC_plots_parsed_merged_data.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/get_QC_plots_and_stats.R \name{get_QC_plots_parsed_merged_data} \alias{get_QC_plots_parsed_merged_data} -\title{Plot some QC plots for the bound data} +\title{Plot QC plots and calculate statistics for bound data} \usage{ get_QC_plots_parsed_merged_data( .data, list_of_columns_to_plot = NULL, save_plots = FALSE, - saving_plots_folder = "figures", + saving_plots_folder = NULL, save_plots_in_patient_specific_subfolders = TRUE, fill_color_variable = NULL, PID_column_name = "PID", @@ -17,45 +17,75 @@ get_QC_plots_parsed_merged_data( drug_column_name = "Treatment", save_list_of_plots = TRUE, p_height = 10, - p_width = 10 + p_width = 10, + verbose = TRUE ) } \arguments{ -\item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} +\item{.data}{The preprocessed data frame to analyze} -\item{list_of_columns_to_plot}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} +\item{list_of_columns_to_plot}{Columns to include in plots. If NULL, all numeric columns are used.} -\item{save_plots}{Boolean, TRUE if plots should be saved (default is FALSE)} +\item{save_plots}{Logical, whether to save plots to files. Defaults to FALSE.} -\item{saving_plots_folder}{String indicating the folder where the plots should be stored (default is "figures")} +\item{saving_plots_folder}{Directory for saving plots. If NULL and save_plots=TRUE, uses a subdirectory of tempdir().} -\item{save_plots_in_patient_specific_subfolders}{Boolean, if TRUE the plots will be saved (if \code{save_plots} TRUE) in sample specific folders (default is TRUE)} +\item{save_plots_in_patient_specific_subfolders}{Logical, whether to create patient subdirectories. Defaults to TRUE.} -\item{fill_color_variable}{Boolean, String, indicating the name of the variable (discrete) to use for the plot's filling} +\item{fill_color_variable}{Variable name for plot color filling} -\item{PID_column_name}{String, indicating the name of the sample to subset (default is "Treatment")} +\item{PID_column_name}{Column name for patient IDs. Defaults to "PID".} -\item{isolate_specific_drug}{String, indicating if there should be a Treatment specific data subset (default is NULL)} +\item{isolate_specific_drug}{Drug name to subset data} -\item{isolate_specific_patient}{String, indicating a spacific sample to plot only (default is NULL)} +\item{isolate_specific_patient}{Patient ID to subset data} -\item{drug_column_name}{String, indicate the column indicating the Drug/Treament (default is "Treatment")} +\item{drug_column_name}{Column name for drug information. Defaults to "Treatment".} -\item{save_list_of_plots}{Boolean, if TRUE returns a named list of all the plots ran (default is TRUE), this can be usefult to isolate specific plots} +\item{save_list_of_plots}{Logical, whether to return list of plot objects. Defaults to TRUE.} -\item{p_height}{Integer, indicate the plot's height (default is 10 inches)} +\item{p_height}{Plot height in inches. Defaults to 10.} -\item{p_width}{Integer, indicate the plot's width (default is 10 inches)} +\item{p_width}{Plot width in inches. Defaults to 10.} + +\item{verbose}{Logical, whether to show progress messages. Defaults to TRUE.} } \value{ -A \code{list}/\code{NULL}. +If save_list_of_plots=TRUE, returns a named list of ggplot objects. Otherwise returns invisible(NULL). } \description{ -This plot can show trends within the dataset and run some basic statistics. +This function creates quality control plots and calculates basic statistics for microscopy data. +The plots provide visual insights into marker expression patterns and data quality. } \examples{ \dontrun{ - qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE, - save_list_of_plots = FALSE) +# First load and process example data +example_path <- system.file("extdata/to_merge/", package = "drugsens") +raw_data <- data_binding(path_to_the_projects_folder = example_path) +count_data <- make_count_dataframe(raw_data) +processed_data <- change_data_format_to_longer(count_data) + +# Basic usage - create plots for all patients +plots <- get_QC_plots_parsed_merged_data(processed_data) + +# Save plots to a temporary directory +temp_dir <- file.path(tempdir(), "qc_plots") +plots <- get_QC_plots_parsed_merged_data( + processed_data, + save_plots = TRUE, + saving_plots_folder = temp_dir +) + +# Focus on a specific patient +plots <- get_QC_plots_parsed_merged_data( + processed_data, + isolate_specific_patient = "B39" +) + +# Color plots by tissue type +plots <- get_QC_plots_parsed_merged_data( + processed_data, + fill_color_variable = "Tissue" +) } } diff --git a/man/make_count_dataframe.Rd b/man/make_count_dataframe.Rd index 256353e95a1e776504baacad5edf2d833426549a..f1aa2bfd7012df9f380397ce7ad2a4393130c4b7 100644 --- a/man/make_count_dataframe.Rd +++ b/man/make_count_dataframe.Rd @@ -34,7 +34,6 @@ counts_dataframe <- make_count_dataframe(bind_data) plotting_ready_dataframe <- change_data_format_to_longer( counts_dataframe ) - make_count_dataframe( data, name_of_the_markers_column = "Name", unique_name_row_identifier = "filter_image" diff --git a/man/string_parsing.Rd b/man/string_parsing.Rd index 1317c7378cc70d3dc95cac42faf674ef9f6793f8..31fee436dc02935cdb6ff0337c41585ec8beed6a 100644 --- a/man/string_parsing.Rd +++ b/man/string_parsing.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/parsers.R \name{string_parsing} \alias{string_parsing} -\title{Main parsing function} +\title{Parse image filenames to extract metadata} \usage{ string_parsing(.data) } diff --git a/tests/testthat/test_cases.R b/tests/testthat/test_cases.R index fbd7cf5187f8e75e49b7beddbac83d685dcd01d9..3aae63c0874ac2163b0a74ec141465e595c5e326 100644 --- a/tests/testthat/test_cases.R +++ b/tests/testthat/test_cases.R @@ -1,110 +1,107 @@ test_that("Verify that the internal file in to_merge examples in exdata are available", { list_files_exdata <- system.file("extdata/to_merge/", package = "drugsens") |> list.files() - expect_true(length(list_files_exdata) > 3 ) + expect_true(length(list_files_exdata) > 3) }) test_that("Verify that the internal file examples in exdata merged are available", { list_files_exdata <- system.file("extdata/merged/", package = "drugsens") |> list.files() - expect_true(length(list_files_exdata) >= 1 ) + expect_true(length(list_files_exdata) >= 1) }) test_that("list_all_files returns correct file paths", { # Setup: Create temporary files and directory - temp_dir <- tempdir() + temp_dir <- file.path(tempdir(), "drugsens_test") + dir.create(temp_dir, recursive = TRUE, showWarnings = FALSE) + on.exit(unlink(temp_dir, recursive = TRUE)) + file.create(file.path(temp_dir, "file1.csv")) file.create(file.path(temp_dir, "file2.csv")) file.create(file.path(temp_dir, "file3.txt")) file.create(file.path(temp_dir, "file4.tsv")) - # Test 1 + # Test CSV files files_list <- list_all_files(define_path = temp_dir, extension = "\\.csv$", - recursive_search = T) + recursive_search = TRUE) expect_length(files_list, 2) expect_true(all(grepl("file1.csv|file2.csv", files_list))) - # Test 2 + # Test TXT files files_list <- list_all_files(define_path = temp_dir, extension = "\\.txt$", - recursive_search = T) + recursive_search = TRUE) expect_length(files_list, 1) expect_true(all(grepl("file3.txt", files_list))) - # Test 3 + # Test TSV files files_list <- list_all_files(define_path = temp_dir, extension = "\\.tsv$", - recursive_search = T) + recursive_search = TRUE) expect_length(files_list, 1) expect_true(all(grepl("file4.tsv", files_list))) - - # remove the dir - unlink(temp_dir, recursive = TRUE) - }) test_that("Config creation and reading works", { - temp_dir <- tempdir() - on.exit(unlink(file.path(temp_dir, "config_drugsens.txt"))) + temp_dir <- file.path(tempdir(), "drugsens_config_test") + dir.create(temp_dir, recursive = TRUE, showWarnings = FALSE) + on.exit(unlink(temp_dir, recursive = TRUE)) - make_run_config(forcePath = temp_dir) make_run_config(forcePath = temp_dir) expect_true(file.exists(file.path(temp_dir, "config_drugsens.txt"))) + + # Test re-running doesn't error + expect_silent(make_run_config(forcePath = temp_dir)) expect_true(exists("list_of_relabeling")) }) -test_that("Check that the example file can be read correctly", { - datas <- drugsens::data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")) +test_that("Example file can be read correctly", { + datas <- drugsens::data_binding( + path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens") + ) expect_true(exists("datas")) expect_equal(ncol(datas), expected = 28) }) -test_that("Check that the drugs combination have two unit and two concentration and control none", { - datas <- drugsens::data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")) - expect_true(datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1] == "GentamicinePaclitaxel100uM-10uM" || datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1] == "gentamicinePaclitaxel100uM-10uM") - expect_true(datas[datas$Treatment == "Control", "Treatment_complete"][1] == "Control" || datas[datas$Treatment == "Control", "Treatment_complete"][1] == "control") -}) - -test_that("Config file was there and removed correctly", { - expect_silent( file.remove(path.expand(paste0(getwd(), "/config_drugsens.txt"))) ) -}) +test_that("Drug combinations have correct units and concentrations", { + datas <- drugsens::data_binding( + path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens") + ) + # Test drug combination formatting + combo_row <- datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1] + expect_true(combo_row == "GentamicinePaclitaxel100uM-10uM" || + combo_row == "gentamicinePaclitaxel100uM-10uM") -test_that("The parsing is working", { - input_data <- data.frame(Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)") - expected_output <- data.frame( - Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)", - Image_number = "series.10", - PID = "PID1", - Tissue = "Tissue1", - Date1 = "2024-02-13", - DOC = "2024.02.13", - ReplicaOrNot = "Replica", - Treatment = "TreatmentRana", - Concentration1 = "10", - Concentration2 = "15", - ConcentrationUnits1 = "uM", - ConcentrationUnits2 = "nm", - Treatment_complete = "TreatmentRana10uM-15nm") - expect_equal(drugsens::string_parsing(input_data), expected = expected_output) + # Test control formatting + control_row <- datas[datas$Treatment == "Control", "Treatment_complete"][1] + expect_true(control_row == "Control" || control_row == "control") }) -test_that("Another parsing test", { - input_data <- data.frame(Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif") - expected_output <- data.frame( - Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif", - Image_number = "series 01", - PID = "B516", - Tissue = "Ascites", - Date1 = "2023-11-25", - DOC = "2020-12-14", - ReplicaOrNot = "Replica", - Treatment = "dmso", - Concentration1 = NA_character_, #WIP - Concentration2 = NA_integer_, - ConcentrationUnits1 = NA_character_, - ConcentrationUnits2 = NA_character_, - Treatment_complete = "dmso") - expect_equal(drugsens::string_parsing(input_data), expected = expected_output) - # Image1 <- "B516_Ascites_2023-11-25_DOC2020-12-14_CarboplatinPaclitaxel_100_uM_10_nM_Ecad_cCasp3_(series 01).tif" - # Image2 <- "A8759_Spleen_2020.11.10_DOC2001.10.05_compoundX34542_1000_uM_EpCAM_Ecad_cCasp3_(series 01).tif" - # Image3 <- "A8759_Spleen_2020.11.10_DOC2001.10.05_compoundX34542_1000_uM_EpCAM_Ecad_cCasp3_(series 01).tif" - # Image4 <- "B38_Eye_2023.11.10_DOC2023.10.05_GentamicinePaclitaxel_100_uM_10_nM_EpCAM_Ecad_cCasp3_(series 01).tif" +test_that("String parsing works correctly for single drug", { + input_data <- data.frame( + Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)" + ) + result <- drugsens::string_parsing(input_data) + + expect_equal(result$PID, "PID1") + expect_equal(result$Tissue, "Tissue1") + expect_equal(result$Date1, "2024-02-13") + expect_equal(result$DOC, "2024.02.13") + expect_equal(result$Treatment, "TreatmentRana") + expect_equal(result$Concentration1, "10") + expect_equal(result$ConcentrationUnits1, "uM") + expect_equal(result$Treatment_complete, "TreatmentRana10uM-15nm") }) +test_that("String parsing works correctly for DMSO control", { + input_data <- data.frame( + Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif" + ) + result <- drugsens::string_parsing(input_data) + + expect_equal(result$PID, "B516") + expect_equal(result$Tissue, "Ascites") + expect_equal(result$Date1, "2023-11-25") + expect_equal(result$DOC, "2020-12-14") + expect_equal(result$Treatment, "dmso") + expect_true(is.na(result$Concentration1)) + expect_true(is.na(result$ConcentrationUnits1)) + expect_equal(result$Treatment_complete, "dmso") +})