Skip to content
Snippets Groups Projects
Commit 0f5cd512 authored by Flavio Lombardo's avatar Flavio Lombardo :goal:
Browse files

Improved documentation, fixed folder creation in wd and improved examples

parent e20aba45
No related branches found
No related tags found
No related merge requests found
...@@ -10,7 +10,7 @@ Version: 0.1.0 ...@@ -10,7 +10,7 @@ Version: 0.1.0
BugReports: https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues BugReports: https://git.scicore.unibas.ch/ovca-research/drugsens/-/issues
SystemRequirements: QuPath™ 4.0.0 or higher SystemRequirements: QuPath™ 4.0.0 or higher
Authors@R: c( Authors@R: c(
person("Flavio", "Lombardo", , "flavio.lombardo@unibas.ch", role = c("aut", "cre", "cph")), person("Flavio", "Lombardo", , "flavio.lombardo@unibas.ch", role = c("aut", "cre", "cph"), comment = c(ORCID = "0000-0002-4853-6838")),
person("Ricardo Coelho", role = c("cph")), person("Ricardo Coelho", role = c("cph")),
person("Ovarian Cancer Research", role = c("cph")), person("Ovarian Cancer Research", role = c("cph")),
person("University of Basel and University Hospital Basel", role = c("cph")) person("University of Basel and University Hospital Basel", role = c("cph"))
...@@ -19,16 +19,16 @@ URL: https://git.scicore.unibas.ch/ovca-research/drugsens/ ...@@ -19,16 +19,16 @@ URL: https://git.scicore.unibas.ch/ovca-research/drugsens/
Maintainer: Flavio Lombardo <flavio.lombardo@unibas.ch> Maintainer: Flavio Lombardo <flavio.lombardo@unibas.ch>
License: MIT + file LICENSE License: MIT + file LICENSE
Imports: Imports:
utils,
dplyr, dplyr,
tidyr,
readr,
stringr,
knitr,
ggplot2, ggplot2,
ggpubr, ggpubr,
knitr,
roxygen2, roxygen2,
stats,
stringr,
tidyr,
tidyselect, tidyselect,
utils,
testthat (>= 3.0.0) testthat (>= 3.0.0)
Depends: Depends:
R (>= 4.2) R (>= 4.2)
......
...@@ -15,7 +15,18 @@ import(roxygen2) ...@@ -15,7 +15,18 @@ import(roxygen2)
import(testthat) import(testthat)
importFrom(dplyr,filter) importFrom(dplyr,filter)
importFrom(dplyr,select) importFrom(dplyr,select)
importFrom(readr,write_excel_csv) importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,element_text)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_boxplot)
importFrom(ggplot2,geom_jitter)
importFrom(ggplot2,geom_violin)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,position_jitter)
importFrom(ggplot2,stat_summary)
importFrom(ggplot2,theme)
importFrom(stats,setNames) importFrom(stats,setNames)
importFrom(stringr,str_count) importFrom(stringr,str_count)
importFrom(stringr,str_extract) importFrom(stringr,str_extract)
......
...@@ -2,13 +2,24 @@ ...@@ -2,13 +2,24 @@
#' @description #' @description
#' Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions #' Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions
#' followed during the package development. #' followed during the package development.
#' @return `script_for_qupath.txt` in local working directory. #' @param output_dir Directory where the script should be saved. If NULL, uses tempdir()
#' @return Invisibly returns the path to the generated script file.
#' @export #' @export
#' @examples #' @examples
#' \dontrun{ #' \dontrun{
#' # Generate script in a temporary directory
#' generate_qupath_script() #' generate_qupath_script()
#'
#' # Generate script in a specific directory
#' output_dir <- tempdir()
#' generate_qupath_script(output_dir = output_dir)
#' } #' }
generate_qupath_script <- function() { generate_qupath_script <- function(output_dir = NULL) {
if(is.null(output_dir)) {
output_dir <- tempdir()
}
output_file <- file.path(output_dir, "script_for_qupath.txt")
write( write(
x = paste0(' x = paste0('
//This code script was tested with QuPath 4 //This code script was tested with QuPath 4
...@@ -53,9 +64,9 @@ def exporter = new MeasurementExporter() ...@@ -53,9 +64,9 @@ def exporter = new MeasurementExporter()
print "Done!" print "Done!"
'), '),
file = paste0(path.expand(getwd()), "/script_for_qupath.txt") file = output_file
) )
message("You can now take the script and personalize it to your needs") message("You can now take the script and personalize it to your needs")
message(paste0(Sys.time(), " The script file was generated here: ", getwd(), "/")) message(paste0(Sys.time(), " The script file was generated here: ", output_file, "/"))
message(paste0(Sys.time(), " Please make sure to follow the name convention here proposed, or it might fail to get all the information")) message(paste0(Sys.time(), " Please make sure to follow the name convention here proposed, or it might fail to get all the information"))
} }
#' Plot some QC plots to define that everything ran correctly #' Plot some QC plots to define that everything ran correctly
#' @description #' @description
#' Plot data to visualize immediate trends #' Plot data to visualize immediate trends. This function expects data that has been processed
#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized #' through make_count_dataframe() and change_data_format_to_longer() to ensure the correct
#' data structure for plotting.
#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer())
#' merged data.frame that should be visualized
#' @param patient_column_name The PID's column name in the merged data.frame (defaults to "PID") #' @param patient_column_name The PID's column name in the merged data.frame (defaults to "PID")
#' @param colors A list of colors to supply to personalize the plot, as default 4 colors c("dark green", "red", "orange", "pink") #' @param colors A list of colors to supply to personalize the plot, defaults to c("darkgreen", "red", "orange", "pink")
#' @param save_plots A Boolean value indicating if the plots should be saved or not, TRUE for saving in the current working directory, FALSE to not. Default is FALSE #' @param save_plots A Boolean value indicating if the plots should be saved or not (default is FALSE)
#' @param folder_name A string indicating the name of the folder where to save the plots in case that save_plots = TRUE #' @param folder_name A string indicating the name of the folder where to save the plots if save_plots is TRUE
#' @param isolate_a_specific_patient A string indicating the patient name to isolate for single plot case (default is NULL) #' @param isolate_a_specific_patient A string indicating the patient name to isolate for single plot case (default is NULL)
#' @param x_plot_var A string indicating the treatment's full name for the QC plots (default is "Treatment_complete") #' @param x_plot_var A string indicating the treatment's full name for the QC plots (default is "Treatment_complete")
#' @import ggplot2 #' @import ggplot2
#' @import ggpubr #' @import ggpubr
#' @importFrom dplyr filter #' @importFrom dplyr filter
#' @return A `dataframe`/`tibble`. #' @return Invisibly returns NULL, but saves plots to disk if save_plots is TRUE
#' @examples #' @examples
#' \dontrun{ #' \dontrun{
#' get_QC_plots(longer_format_dataframe, patient_column_name = "PID", #' # First process example data
#' save_plots = TRUE, folder_name = "figures") #' example_path <- system.file("extdata/to_merge/", package = "drugsens")
#' raw_data <- data_binding(path_to_the_projects_folder = example_path)
#' count_data <- make_count_dataframe(raw_data)
#' processed_data <- change_data_format_to_longer(count_data)
#'
#' # Create and save plots to temporary directory
#' temp_dir <- file.path(tempdir(), "qc_plots")
#' get_QC_plots(
#' processed_data,
#' save_plots = TRUE,
#' folder_name = temp_dir
#' )
#'
#' # Create plots for a specific patient
#' get_QC_plots(
#' processed_data,
#' isolate_a_specific_patient = "B39",
#' save_plots = TRUE,
#' folder_name = temp_dir
#' )
#' } #' }
#' @export #' @export
get_QC_plots <- function(.data, get_QC_plots <- function(.data,
patient_column_name = "PID", patient_column_name = "PID",
colors = c("darkgreen", "red", "orange", "pink"), colors = c("darkgreen", "red", "orange", "pink"),
save_plots = FALSE, save_plots = FALSE,
folder_name = "figures", folder_name = NULL,
x_plot_var = "Treatment_complete", x_plot_var = "Treatment_complete",
isolate_a_specific_patient = NULL) { isolate_a_specific_patient = NULL) {
if (!is.null(isolate_a_specific_patient)) .data <- .data[.data[[patient_column_name]] == isolate_a_specific_patient, ] # Input validation
if (nrow(.data) < 1) stop("The data cannot be empty") if (!is.data.frame(.data)) {
stop("Input must be a data frame")
}
# run for every unique PID the QC plot # Check required columns exist
for (i in unique(.data[patient_column_name])) { required_cols <- c(patient_column_name, x_plot_var, "marker_positivity", "marker_positivity_ratio")
message(paste0("Running the QC plot function for PID: ", i)) missing_cols <- setdiff(required_cols, colnames(.data))
if (length(missing_cols) > 0) {
stop("Missing required columns: ", paste(missing_cols, collapse = ", "),
". Please ensure data has been processed with make_count_dataframe() and change_data_format_to_longer()")
}
QC_plot <- .data |> # Filter for specific patient if requested
dplyr::filter(.data[[patient_column_name]] == i) |> if (!is.null(isolate_a_specific_patient)) {
ggplot(aes(x = .data[[x_plot_var]], .data <- .data[.data[[patient_column_name]] == isolate_a_specific_patient, ]
y = .data$marker_positivity_ratio, if (nrow(.data) < 1) {
col = .data$marker_positivity)) + stop("No data found for patient: ", isolate_a_specific_patient)
geom_boxplot( }
position = position_dodge(width = 1.0), }
) +
facet_wrap(~marker_positivity) + # Set up output directory if saving plots
geom_jitter(width = 0.15) + if (save_plots) {
theme_light() + if (is.null(folder_name)) {
labs(title = paste0("Cell marker ratios for PID: ", i), color = "Cell marker") + folder_name <- file.path(tempdir(), "figures")
ylab("Percentage of expression marker (marker-positive-cells/total_cell_count)") + }
xlab("Drugs") + dir.create(folder_name, showWarnings = FALSE, recursive = TRUE)
theme(axis.text.x = element_text(angle = 45, hjust = 1.0)) + }
scale_color_manual(values = colors) +
stat_summary( # Process each patient
fun = "median", geom = "pointrange", for (current_pid in unique(.data[[patient_column_name]])) {
mapping = aes(xend = after_stat(x) - 0.25, yend = after_stat(y)), message("Processing patient: ", current_pid)
size = 1.5, alpha = 1.0,
position = position_dodge(width = 1) # Filter data for current patient
) + patient_data <- dplyr::filter(.data, .data[[patient_column_name]] == current_pid)
stat_summary(
geom = "line", fun = "median", position = position_dodge(width = 1), # Create the plot
size = 1, alpha = 0.3, aes(group = marker_positivity) QC_plot <- ggplot2::ggplot(patient_data,
ggplot2::aes(x = .data[[x_plot_var]],
y = marker_positivity_ratio,
color = marker_positivity)) +
ggplot2::geom_boxplot(position = ggplot2::position_dodge(width = 1.0)) +
ggplot2::facet_wrap(~marker_positivity) +
ggplot2::geom_jitter(width = 0.15) +
ggplot2::theme_light() +
ggplot2::labs(
title = paste0("Cell marker ratios for PID: ", current_pid),
color = "Cell marker",
y = "Percentage of expression marker (marker-positive-cells/total_cell_count)",
x = "Drugs"
) + ) +
theme( ggplot2::theme(
axis.title.x = element_blank(), axis.text.x = ggplot2::element_text(angle = 45, hjust = 1.0),
plot.title = element_text(hjust = 0.5), axis.title.x = ggplot2::element_blank(),
axis.ticks.x = element_blank(), plot.title = ggplot2::element_text(hjust = 0.5),
panel.grid = element_blank(), axis.ticks.x = ggplot2::element_blank(),
strip.background = element_rect( panel.grid = ggplot2::element_blank(),
strip.background = ggplot2::element_rect(
colour = "black", colour = "black",
fill = "grey1" fill = "grey1"
) )
) +
ggplot2::scale_color_manual(values = colors) +
ggplot2::stat_summary(
fun = "median",
geom = "point",
size = 3,
position = ggplot2::position_dodge(width = 1)
) +
ggplot2::stat_summary(
geom = "line",
fun = "median",
position = ggplot2::position_dodge(width = 1),
linewidth = 1,
alpha = 0.3,
aes(group = marker_positivity)
) )
# Save the plot if requested
if (save_plots) { if (save_plots) {
if (!dir.exists(paths = paste0(getwd(), "/", folder_name, "/"))) dir.create(path = paste0(getwd(), "/", folder_name, "/"), showWarnings = F, recursive = T) plot_filename <- file.path(
folder_name,
paste0("patients_QC_box_plots_",
current_pid,
"_median",
format(Sys.Date(), "%Y-%m-%d"),
".pdf"
)
)
ggsave(QC_plot, ggplot2::ggsave(
filename = paste0(folder_name, "/", "patients_QC_box_plots_", i, "_", "median", Sys.Date(), ".pdf"), filename = plot_filename,
plot = QC_plot,
device = "pdf", device = "pdf",
height = 12, height = 12,
width = 12 width = 12
) )
} }
} }
message(paste0("If save_plots = TRUE, the plots will be saved here:", paste0(folder_name, "/", "patients_QC_box_plots_", "median", Sys.Date(), ".pdf")))
# Final message about plot locations
if (save_plots) {
message("Plots have been saved in: ", folder_name)
}
invisible(NULL)
} }
#' Plot some QC plots for the bound data #' Plot QC plots and calculate statistics for bound data
#' @description #' @description
#' This plot can show trends within the dataset and run some basic statistics. #' This function creates quality control plots and calculates basic statistics for microscopy data.
#' #' The plots provide visual insights into marker expression patterns and data quality.
#' @param .data The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized #' @param .data The preprocessed data frame to analyze
#' @param list_of_columns_to_plot The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized #' @param list_of_columns_to_plot Columns to include in plots. If NULL, all numeric columns are used.
#' @param save_plots Boolean, TRUE if plots should be saved (default is FALSE) #' @param save_plots Logical, whether to save plots to files. Defaults to FALSE.
#' @param saving_plots_folder String indicating the folder where the plots should be stored (default is "figures") #' @param saving_plots_folder Directory for saving plots. If NULL and save_plots=TRUE, uses a subdirectory of tempdir().
#' @param PID_column_name String, indicating the name of the sample to subset (default is "PID") #' @param save_plots_in_patient_specific_subfolders Logical, whether to create patient subdirectories. Defaults to TRUE.
#' @param isolate_specific_drug String, indicating if there should be a Treatment specific data subset (default is NULL) #' @param fill_color_variable Variable name for plot color filling
#' @param isolate_specific_patient String, indicating a spacific sample to plot only (default is NULL) #' @param PID_column_name Column name for patient IDs. Defaults to "PID".
#' @param PID_column_name String, indicating the name of the sample to subset (default is "Treatment") #' @param isolate_specific_drug Drug name to subset data
#' @param save_list_of_plots Boolean, if TRUE returns a named list of all the plots ran (default is TRUE), this can be usefult to isolate specific plots #' @param isolate_specific_patient Patient ID to subset data
#' @param save_plots_in_patient_specific_subfolders Boolean, if TRUE the plots will be saved (if `save_plots` TRUE) in sample specific folders (default is TRUE) #' @param drug_column_name Column name for drug information. Defaults to "Treatment".
#' @param fill_color_variable Boolean, String, indicating the name of the variable (discrete) to use for the plot's filling #' @param save_list_of_plots Logical, whether to return list of plot objects. Defaults to TRUE.
#' @param p_height Integer, indicate the plot's height (default is 10 inches) #' @param p_height Plot height in inches. Defaults to 10.
#' @param p_width Integer, indicate the plot's width (default is 10 inches) #' @param p_width Plot width in inches. Defaults to 10.
#' @param drug_column_name String, indicate the column indicating the Drug/Treament (default is "Treatment") #' @param verbose Logical, whether to show progress messages. Defaults to TRUE.
#' @import ggplot2 #' @return If save_list_of_plots=TRUE, returns a named list of ggplot objects. Otherwise returns invisible(NULL).
#' @import ggpubr #' @importFrom ggplot2 ggplot aes geom_violin geom_boxplot facet_wrap theme element_text labs
#' @importFrom readr write_excel_csv #' geom_jitter position_jitter stat_summary aes_string
#' @importFrom dplyr filter
#' @return A `list`/`NULL`.
#' @examples #' @examples
#' \dontrun{ #' \dontrun{
#' qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE, #' # First load and process example data
#' save_list_of_plots = FALSE) #' example_path <- system.file("extdata/to_merge/", package = "drugsens")
#' raw_data <- data_binding(path_to_the_projects_folder = example_path)
#' count_data <- make_count_dataframe(raw_data)
#' processed_data <- change_data_format_to_longer(count_data)
#'
#' # Basic usage - create plots for all patients
#' plots <- get_QC_plots_parsed_merged_data(processed_data)
#'
#' # Save plots to a temporary directory
#' temp_dir <- file.path(tempdir(), "qc_plots")
#' plots <- get_QC_plots_parsed_merged_data(
#' processed_data,
#' save_plots = TRUE,
#' saving_plots_folder = temp_dir
#' )
#'
#' # Focus on a specific patient
#' plots <- get_QC_plots_parsed_merged_data(
#' processed_data,
#' isolate_specific_patient = "B39"
#' )
#'
#' # Color plots by tissue type
#' plots <- get_QC_plots_parsed_merged_data(
#' processed_data,
#' fill_color_variable = "Tissue"
#' )
#' } #' }
#' @export #' @export
get_QC_plots_parsed_merged_data <- function(.data, get_QC_plots_parsed_merged_data <- function(.data,
list_of_columns_to_plot = NULL, list_of_columns_to_plot = NULL,
save_plots = FALSE, save_plots = FALSE,
saving_plots_folder = "figures", saving_plots_folder = NULL,
save_plots_in_patient_specific_subfolders = TRUE, save_plots_in_patient_specific_subfolders = TRUE,
fill_color_variable = NULL, fill_color_variable = NULL,
PID_column_name = "PID", PID_column_name = "PID",
...@@ -40,127 +63,153 @@ get_QC_plots_parsed_merged_data <- function(.data, ...@@ -40,127 +63,153 @@ get_QC_plots_parsed_merged_data <- function(.data,
drug_column_name = "Treatment", drug_column_name = "Treatment",
save_list_of_plots = TRUE, save_list_of_plots = TRUE,
p_height = 10, p_height = 10,
p_width = 10) { p_width = 10,
# List where plots could be stored verbose = TRUE) {
list_plottos <- list()
# Define the helper function for creating individual QC plots
create_qc_plot <- function(data, metric, fill_var, pid, drug) {
p <- ggplot2::ggplot(data, ggplot2::aes(x = marker_positivity, y = unlist(data[[metric]])))
if (!is.null(fill_var)) {
p <- p + ggplot2::geom_violin(trim = FALSE,
ggplot2::aes_string(fill = fill_var),
color = NA)
} else {
p <- p + ggplot2::geom_violin(trim = FALSE,
fill = "#A4A4A4",
color = "darkred")
}
if (!is.data.frame(.data) | nrow(.data) < 1) stop("ERROR: the data provided must be not empty of dataframe type.") p <- p +
theme_minimal() +
ggplot2::geom_boxplot(width = 0.1, fill = "white") +
ggplot2::facet_wrap(~Treatment) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1)) +
ggplot2::labs(
title = metric,
x = "Cell Markers",
y = paste0("Intensity of ", metric, " (log2)"),
subtitle = paste0(pid, ".", drug)
) +
ggplot2::geom_jitter(shape = 16,
position = ggplot2::position_jitter(0.01)) +
ggplot2::stat_summary(geom = "crossbar",
fun = mean,
colour = "red",
width = 0.21)
return(p)
}
# get the number of possible plotting variables # Input validation
if (is.null(list_of_columns_to_plot)) { if (!is.data.frame(.data) || nrow(.data) < 1) {
list_of_columns_to_plot <- colnames(.data)[which(sapply(.data, is.numeric))] stop("Input must be a non-empty data frame")
} }
# check that the fill_color_variable is in the dataset and not null # Initialize plot storage
if (!is.null(fill_color_variable) & !fill_color_variable %in% colnames(.data)) stop("ERROR: the fill_color_variable must be in the colum names variables.") list_plottos <- list()
# if the user decides to isolate a specific sample only # If user requested to isolate specific patient, filter the data
if (!is.null(isolate_specific_patient)) .data <- .data[.data[[PID_column_name]] == isolate_specific_patient, ] if (!is.null(isolate_specific_patient)) {
.data <- .data[.data[[PID_column_name]] == isolate_specific_patient, ]
if (nrow(.data) < 1) {
stop("No data found for specified patient: ", isolate_specific_patient)
}
}
# Set up output directory if saving plots
if (save_plots) {
# Initialize the base directory for plots
saving_plots_folder <- if (is.null(saving_plots_folder)) {
file.path(tempdir(), "drugsens_plots")
} else {
saving_plots_folder
}
dir.create(saving_plots_folder, showWarnings = FALSE, recursive = TRUE)
}
# Determine columns to plot
if (is.null(list_of_columns_to_plot)) {
list_of_columns_to_plot <- colnames(.data)[sapply(.data, is.numeric)]
}
# Process each patient
for (pid in unique(.data[[PID_column_name]])) { for (pid in unique(.data[[PID_column_name]])) {
if (verbose) {
message("Processing patient: ", pid)
}
# Subset data for current patient
subset_data <- .data[.data[[PID_column_name]] == pid, ] subset_data <- .data[.data[[PID_column_name]] == pid, ]
for (i in list_of_columns_to_plot) { # Apply drug filter if specified
if (!is.null(isolate_specific_drug)) subset_data <- subset_data[subset_data[[drug_column_name]] %in% isolate_specific_drug, ] if (!is.null(isolate_specific_drug)) {
subset_data <- subset_data[subset_data[[drug_column_name]] %in% isolate_specific_drug, ]
}
if (nrow(subset_data) < 1) { # Skip if no data after filtering
message(unique(subset_data[[PID_column_name]])) if (nrow(subset_data) < 1) {
message(unique(subset_data[[drug_column_name]])) if (verbose) {
stop("ERROR: Your filtering query has returned no observations") message("No data found for PID: ", pid)
} }
next
}
# browser() # Create patient directory if needed
if (save_plots && save_plots_in_patient_specific_subfolders) {
patient_dir <- file.path(saving_plots_folder, pid)
dir.create(patient_dir, showWarnings = FALSE, recursive = TRUE)
}
# Process each metric
for (i in list_of_columns_to_plot) {
# Create plot
p <- create_qc_plot(subset_data, i, fill_color_variable,
pid, isolate_specific_drug)
# Function to dynamically add layers to a ggplot object based on conditions # Save plot if requested
add_violin_layers <- function(p, fill_color_variable) { if (save_plots) {
if (!is.null(fill_color_variable)) { plot_file <- sprintf("%s_%s_%s_%s.pdf",
p <- p + geom_violin(trim = FALSE, aes_string(fill = fill_color_variable), color = NA) + format(Sys.Date(), "%Y%m%d"),
geom_boxplot(width = 0.1, fill = "white") pid,
ifelse(is.null(isolate_specific_drug), "all", isolate_specific_drug),
make.names(i))
# Determine save directory
save_dir <- if (save_plots_in_patient_specific_subfolders) {
file.path(saving_plots_folder, pid)
} else { } else {
p <- p + geom_violin(trim = FALSE, fill = "#A4A4A4", color = "darkred") + saving_plots_folder
geom_boxplot(width = 0.1, fill = "white")
} }
return(p)
}
# Initialize ggplot plot_path <- file.path(save_dir, plot_file)
p <- ggplot(subset_data, aes(x = Name, y = log2(unlist(subset_data[[i]]))))
# Add violin and boxplot layers
p <- add_violin_layers(p, fill_color_variable)
# More layers on top
p <- p + facet_wrap(~Treatment) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(
title = colnames(subset_data[i]),
x = "Cell Markers",
y = paste0("Intensity of ", colnames(subset_data[i]), " (log2)"),
subtitle = paste0(pid, ".", isolate_specific_drug)
) +
geom_jitter(shape = 16, position = position_jitter(0.01)) +
stat_summary(geom = "crossbar", fun = mean, colour = "red", width = 0.21)
# Conditionally add to list of plots
if (save_list_of_plots) {
list_plottos[[paste0(isolate_specific_drug, ".", pid, ".", i)]] <- p
}
ggplot2::ggsave(plot_path,
plot = p,
width = p_width,
height = p_height,
dpi = 600)
if (save_plots) { if (verbose) {
if (save_plots_in_patient_specific_subfolders) { message("Saved plot to: ", plot_path)
if (!dir.exists(paste0(saving_plots_folder, "/", pid))) dir.create(paste0(saving_plots_folder, "/", pid), showWarnings = F, recursive = T)
ggsave(
plot = p,
filename = paste0(
paste0(saving_plots_folder, "/", pid),
"/",
Sys.Date(),
"_",
pid,
".",
isolate_specific_drug,
".",
colnames(.data[i]),
".pdf"
),
device = "pdf",
dpi = 600
)
} else {
# Saving plots in .pdf at 600 dpi
if (!dir.exists(saving_plots_folder)) dir.create(saving_plots_folder, showWarnings = F, recursive = T)
ggsave(
plot = p,
width = p_width,
height = p_height,
filename = paste0(
saving_plots_folder,
"/",
Sys.Date(),
"_",
pid,
".",
isolate_specific_drug,
".",
colnames(.data[i]),
".pdf"
),
device = "pdf",
dpi = 600,
)
} }
}
message(paste0( # Store plot if requested
"plots for: ", if (save_list_of_plots) {
pid, plot_name <- paste(pid, i, sep = ".")
".", if (!is.null(isolate_specific_drug)) {
isolate_specific_drug, plot_name <- paste(isolate_specific_drug, plot_name, sep = ".")
".", }
colnames(.data[i]), " saved" list_plottos[[plot_name]] <- p
))
} }
} }
} }
# Return results
if (save_list_of_plots) {
return(list_plottos)
} else {
invisible(NULL)
}
} }
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
#' plotting_ready_dataframe <- change_data_format_to_longer( #' plotting_ready_dataframe <- change_data_format_to_longer(
#' counts_dataframe #' counts_dataframe
#' ) #' )
##' make_count_dataframe( # make_count_dataframe(
#' data, #' data,
#' name_of_the_markers_column = "Name", #' name_of_the_markers_column = "Name",
#' unique_name_row_identifier = "filter_image" #' unique_name_row_identifier = "filter_image"
......
...@@ -14,7 +14,8 @@ ...@@ -14,7 +14,8 @@
#' make_run_config() #' make_run_config()
#' } #' }
make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) { make_run_config <- function(overwrite_config = FALSE, forcePath = NULL) {
currentPath <- if (is.null(forcePath)) getwd() else forcePath
currentPath <- if (is.null(forcePath)) tempdir() else forcePath
config_file <- file.path(currentPath, "config_drugsens.txt") config_file <- file.path(currentPath, "config_drugsens.txt")
if (file.exists(config_file)) { if (file.exists(config_file)) {
......
#' Main parsing function #' @title Parse image filenames to extract metadata
#' @description #' @description
#' This function will parse the data from the Image name and will return the metadata there contained #' This function will parse the data from the Image name and will return the metadata there contained
#' The metadata will be then associated to the count file as well #' The metadata will be then associated to the count file as well
......
% Generated by roxygen2: do not edit by hand % Generated by roxygen2: do not edit by hand
% Please edit documentation in R/generate_qu_path_script.R % Please edit documentation in R/generate_qupath_script.R
\name{generate_qupath_script} \name{generate_qupath_script}
\alias{generate_qupath_script} \alias{generate_qupath_script}
\title{Generate the groovy script used for the analysis} \title{Generate the groovy script used for the analysis}
\usage{ \usage{
generate_qupath_script() generate_qupath_script(output_dir = NULL)
}
\arguments{
\item{output_dir}{Directory where the script should be saved. If NULL, uses tempdir()}
} }
\value{ \value{
\code{script_for_qupath.txt} in local working directory. Invisibly returns the path to the generated script file.
} }
\description{ \description{
Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions Generate a useful script to consistently save the output data from QuPath in .csv format following the naming conventions
...@@ -15,6 +18,11 @@ followed during the package development. ...@@ -15,6 +18,11 @@ followed during the package development.
} }
\examples{ \examples{
\dontrun{ \dontrun{
# Generate script in a temporary directory
generate_qupath_script() generate_qupath_script()
# Generate script in a specific directory
output_dir <- tempdir()
generate_qupath_script(output_dir = output_dir)
} }
} }
...@@ -9,35 +9,57 @@ get_QC_plots( ...@@ -9,35 +9,57 @@ get_QC_plots(
patient_column_name = "PID", patient_column_name = "PID",
colors = c("darkgreen", "red", "orange", "pink"), colors = c("darkgreen", "red", "orange", "pink"),
save_plots = FALSE, save_plots = FALSE,
folder_name = "figures", folder_name = NULL,
x_plot_var = "Treatment_complete", x_plot_var = "Treatment_complete",
isolate_a_specific_patient = NULL isolate_a_specific_patient = NULL
) )
} }
\arguments{ \arguments{
\item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} \item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer())
merged data.frame that should be visualized}
\item{patient_column_name}{The PID's column name in the merged data.frame (defaults to "PID")} \item{patient_column_name}{The PID's column name in the merged data.frame (defaults to "PID")}
\item{colors}{A list of colors to supply to personalize the plot, as default 4 colors c("dark green", "red", "orange", "pink")} \item{colors}{A list of colors to supply to personalize the plot, defaults to c("darkgreen", "red", "orange", "pink")}
\item{save_plots}{A Boolean value indicating if the plots should be saved or not, TRUE for saving in the current working directory, FALSE to not. Default is FALSE} \item{save_plots}{A Boolean value indicating if the plots should be saved or not (default is FALSE)}
\item{folder_name}{A string indicating the name of the folder where to save the plots in case that save_plots = TRUE} \item{folder_name}{A string indicating the name of the folder where to save the plots if save_plots is TRUE}
\item{x_plot_var}{A string indicating the treatment's full name for the QC plots (default is "Treatment_complete")} \item{x_plot_var}{A string indicating the treatment's full name for the QC plots (default is "Treatment_complete")}
\item{isolate_a_specific_patient}{A string indicating the patient name to isolate for single plot case (default is NULL)} \item{isolate_a_specific_patient}{A string indicating the patient name to isolate for single plot case (default is NULL)}
} }
\value{ \value{
A \code{dataframe}/\code{tibble}. Invisibly returns NULL, but saves plots to disk if save_plots is TRUE
} }
\description{ \description{
Plot data to visualize immediate trends Plot data to visualize immediate trends. This function expects data that has been processed
through make_count_dataframe() and change_data_format_to_longer() to ensure the correct
data structure for plotting.
} }
\examples{ \examples{
\dontrun{ \dontrun{
get_QC_plots(longer_format_dataframe, patient_column_name = "PID", # First process example data
save_plots = TRUE, folder_name = "figures") example_path <- system.file("extdata/to_merge/", package = "drugsens")
raw_data <- data_binding(path_to_the_projects_folder = example_path)
count_data <- make_count_dataframe(raw_data)
processed_data <- change_data_format_to_longer(count_data)
# Create and save plots to temporary directory
temp_dir <- file.path(tempdir(), "qc_plots")
get_QC_plots(
processed_data,
save_plots = TRUE,
folder_name = temp_dir
)
# Create plots for a specific patient
get_QC_plots(
processed_data,
isolate_a_specific_patient = "B39",
save_plots = TRUE,
folder_name = temp_dir
)
} }
} }
...@@ -2,13 +2,13 @@ ...@@ -2,13 +2,13 @@
% Please edit documentation in R/get_QC_plots_and_stats.R % Please edit documentation in R/get_QC_plots_and_stats.R
\name{get_QC_plots_parsed_merged_data} \name{get_QC_plots_parsed_merged_data}
\alias{get_QC_plots_parsed_merged_data} \alias{get_QC_plots_parsed_merged_data}
\title{Plot some QC plots for the bound data} \title{Plot QC plots and calculate statistics for bound data}
\usage{ \usage{
get_QC_plots_parsed_merged_data( get_QC_plots_parsed_merged_data(
.data, .data,
list_of_columns_to_plot = NULL, list_of_columns_to_plot = NULL,
save_plots = FALSE, save_plots = FALSE,
saving_plots_folder = "figures", saving_plots_folder = NULL,
save_plots_in_patient_specific_subfolders = TRUE, save_plots_in_patient_specific_subfolders = TRUE,
fill_color_variable = NULL, fill_color_variable = NULL,
PID_column_name = "PID", PID_column_name = "PID",
...@@ -17,45 +17,75 @@ get_QC_plots_parsed_merged_data( ...@@ -17,45 +17,75 @@ get_QC_plots_parsed_merged_data(
drug_column_name = "Treatment", drug_column_name = "Treatment",
save_list_of_plots = TRUE, save_list_of_plots = TRUE,
p_height = 10, p_height = 10,
p_width = 10 p_width = 10,
verbose = TRUE
) )
} }
\arguments{ \arguments{
\item{.data}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} \item{.data}{The preprocessed data frame to analyze}
\item{list_of_columns_to_plot}{The preprocessed data (after running make_count_dataframe() and change_data_format_to_longer()) merged data.frame that should be visualized} \item{list_of_columns_to_plot}{Columns to include in plots. If NULL, all numeric columns are used.}
\item{save_plots}{Boolean, TRUE if plots should be saved (default is FALSE)} \item{save_plots}{Logical, whether to save plots to files. Defaults to FALSE.}
\item{saving_plots_folder}{String indicating the folder where the plots should be stored (default is "figures")} \item{saving_plots_folder}{Directory for saving plots. If NULL and save_plots=TRUE, uses a subdirectory of tempdir().}
\item{save_plots_in_patient_specific_subfolders}{Boolean, if TRUE the plots will be saved (if \code{save_plots} TRUE) in sample specific folders (default is TRUE)} \item{save_plots_in_patient_specific_subfolders}{Logical, whether to create patient subdirectories. Defaults to TRUE.}
\item{fill_color_variable}{Boolean, String, indicating the name of the variable (discrete) to use for the plot's filling} \item{fill_color_variable}{Variable name for plot color filling}
\item{PID_column_name}{String, indicating the name of the sample to subset (default is "Treatment")} \item{PID_column_name}{Column name for patient IDs. Defaults to "PID".}
\item{isolate_specific_drug}{String, indicating if there should be a Treatment specific data subset (default is NULL)} \item{isolate_specific_drug}{Drug name to subset data}
\item{isolate_specific_patient}{String, indicating a spacific sample to plot only (default is NULL)} \item{isolate_specific_patient}{Patient ID to subset data}
\item{drug_column_name}{String, indicate the column indicating the Drug/Treament (default is "Treatment")} \item{drug_column_name}{Column name for drug information. Defaults to "Treatment".}
\item{save_list_of_plots}{Boolean, if TRUE returns a named list of all the plots ran (default is TRUE), this can be usefult to isolate specific plots} \item{save_list_of_plots}{Logical, whether to return list of plot objects. Defaults to TRUE.}
\item{p_height}{Integer, indicate the plot's height (default is 10 inches)} \item{p_height}{Plot height in inches. Defaults to 10.}
\item{p_width}{Integer, indicate the plot's width (default is 10 inches)} \item{p_width}{Plot width in inches. Defaults to 10.}
\item{verbose}{Logical, whether to show progress messages. Defaults to TRUE.}
} }
\value{ \value{
A \code{list}/\code{NULL}. If save_list_of_plots=TRUE, returns a named list of ggplot objects. Otherwise returns invisible(NULL).
} }
\description{ \description{
This plot can show trends within the dataset and run some basic statistics. This function creates quality control plots and calculates basic statistics for microscopy data.
The plots provide visual insights into marker expression patterns and data quality.
} }
\examples{ \examples{
\dontrun{ \dontrun{
qc <- get_QC_plots_parsed_merged_data(bind_data, save_plots = TRUE, # First load and process example data
save_list_of_plots = FALSE) example_path <- system.file("extdata/to_merge/", package = "drugsens")
raw_data <- data_binding(path_to_the_projects_folder = example_path)
count_data <- make_count_dataframe(raw_data)
processed_data <- change_data_format_to_longer(count_data)
# Basic usage - create plots for all patients
plots <- get_QC_plots_parsed_merged_data(processed_data)
# Save plots to a temporary directory
temp_dir <- file.path(tempdir(), "qc_plots")
plots <- get_QC_plots_parsed_merged_data(
processed_data,
save_plots = TRUE,
saving_plots_folder = temp_dir
)
# Focus on a specific patient
plots <- get_QC_plots_parsed_merged_data(
processed_data,
isolate_specific_patient = "B39"
)
# Color plots by tissue type
plots <- get_QC_plots_parsed_merged_data(
processed_data,
fill_color_variable = "Tissue"
)
} }
} }
...@@ -34,7 +34,6 @@ counts_dataframe <- make_count_dataframe(bind_data) ...@@ -34,7 +34,6 @@ counts_dataframe <- make_count_dataframe(bind_data)
plotting_ready_dataframe <- change_data_format_to_longer( plotting_ready_dataframe <- change_data_format_to_longer(
counts_dataframe counts_dataframe
) )
make_count_dataframe(
data, data,
name_of_the_markers_column = "Name", name_of_the_markers_column = "Name",
unique_name_row_identifier = "filter_image" unique_name_row_identifier = "filter_image"
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
% Please edit documentation in R/parsers.R % Please edit documentation in R/parsers.R
\name{string_parsing} \name{string_parsing}
\alias{string_parsing} \alias{string_parsing}
\title{Main parsing function} \title{Parse image filenames to extract metadata}
\usage{ \usage{
string_parsing(.data) string_parsing(.data)
} }
......
test_that("Verify that the internal file in to_merge examples in exdata are available", { test_that("Verify that the internal file in to_merge examples in exdata are available", {
list_files_exdata <- system.file("extdata/to_merge/", package = "drugsens") |> list.files() list_files_exdata <- system.file("extdata/to_merge/", package = "drugsens") |> list.files()
expect_true(length(list_files_exdata) > 3 ) expect_true(length(list_files_exdata) > 3)
}) })
test_that("Verify that the internal file examples in exdata merged are available", { test_that("Verify that the internal file examples in exdata merged are available", {
list_files_exdata <- system.file("extdata/merged/", package = "drugsens") |> list.files() list_files_exdata <- system.file("extdata/merged/", package = "drugsens") |> list.files()
expect_true(length(list_files_exdata) >= 1 ) expect_true(length(list_files_exdata) >= 1)
}) })
test_that("list_all_files returns correct file paths", { test_that("list_all_files returns correct file paths", {
# Setup: Create temporary files and directory # Setup: Create temporary files and directory
temp_dir <- tempdir() temp_dir <- file.path(tempdir(), "drugsens_test")
dir.create(temp_dir, recursive = TRUE, showWarnings = FALSE)
on.exit(unlink(temp_dir, recursive = TRUE))
file.create(file.path(temp_dir, "file1.csv")) file.create(file.path(temp_dir, "file1.csv"))
file.create(file.path(temp_dir, "file2.csv")) file.create(file.path(temp_dir, "file2.csv"))
file.create(file.path(temp_dir, "file3.txt")) file.create(file.path(temp_dir, "file3.txt"))
file.create(file.path(temp_dir, "file4.tsv")) file.create(file.path(temp_dir, "file4.tsv"))
# Test 1 # Test CSV files
files_list <- list_all_files(define_path = temp_dir, extension = "\\.csv$", files_list <- list_all_files(define_path = temp_dir, extension = "\\.csv$",
recursive_search = T) recursive_search = TRUE)
expect_length(files_list, 2) expect_length(files_list, 2)
expect_true(all(grepl("file1.csv|file2.csv", files_list))) expect_true(all(grepl("file1.csv|file2.csv", files_list)))
# Test 2 # Test TXT files
files_list <- list_all_files(define_path = temp_dir, extension = "\\.txt$", files_list <- list_all_files(define_path = temp_dir, extension = "\\.txt$",
recursive_search = T) recursive_search = TRUE)
expect_length(files_list, 1) expect_length(files_list, 1)
expect_true(all(grepl("file3.txt", files_list))) expect_true(all(grepl("file3.txt", files_list)))
# Test 3 # Test TSV files
files_list <- list_all_files(define_path = temp_dir, extension = "\\.tsv$", files_list <- list_all_files(define_path = temp_dir, extension = "\\.tsv$",
recursive_search = T) recursive_search = TRUE)
expect_length(files_list, 1) expect_length(files_list, 1)
expect_true(all(grepl("file4.tsv", files_list))) expect_true(all(grepl("file4.tsv", files_list)))
# remove the dir
unlink(temp_dir, recursive = TRUE)
}) })
test_that("Config creation and reading works", { test_that("Config creation and reading works", {
temp_dir <- tempdir() temp_dir <- file.path(tempdir(), "drugsens_config_test")
on.exit(unlink(file.path(temp_dir, "config_drugsens.txt"))) dir.create(temp_dir, recursive = TRUE, showWarnings = FALSE)
on.exit(unlink(temp_dir, recursive = TRUE))
make_run_config(forcePath = temp_dir)
make_run_config(forcePath = temp_dir) make_run_config(forcePath = temp_dir)
expect_true(file.exists(file.path(temp_dir, "config_drugsens.txt"))) expect_true(file.exists(file.path(temp_dir, "config_drugsens.txt")))
# Test re-running doesn't error
expect_silent(make_run_config(forcePath = temp_dir))
expect_true(exists("list_of_relabeling")) expect_true(exists("list_of_relabeling"))
}) })
test_that("Check that the example file can be read correctly", { test_that("Example file can be read correctly", {
datas <- drugsens::data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")) datas <- drugsens::data_binding(
path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")
)
expect_true(exists("datas")) expect_true(exists("datas"))
expect_equal(ncol(datas), expected = 28) expect_equal(ncol(datas), expected = 28)
}) })
test_that("Check that the drugs combination have two unit and two concentration and control none", { test_that("Drug combinations have correct units and concentrations", {
datas <- drugsens::data_binding(path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")) datas <- drugsens::data_binding(
expect_true(datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1] == "GentamicinePaclitaxel100uM-10uM" || datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1] == "gentamicinePaclitaxel100uM-10uM") path_to_the_projects_folder = system.file("extdata/to_merge/", package = "drugsens")
expect_true(datas[datas$Treatment == "Control", "Treatment_complete"][1] == "Control" || datas[datas$Treatment == "Control", "Treatment_complete"][1] == "control") )
})
test_that("Config file was there and removed correctly", {
expect_silent( file.remove(path.expand(paste0(getwd(), "/config_drugsens.txt"))) )
})
# Test drug combination formatting
combo_row <- datas[datas$Treatment == "GentamicinePaclitaxel", "Treatment_complete"][1]
expect_true(combo_row == "GentamicinePaclitaxel100uM-10uM" ||
combo_row == "gentamicinePaclitaxel100uM-10uM")
test_that("The parsing is working", { # Test control formatting
input_data <- data.frame(Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)") control_row <- datas[datas$Treatment == "Control", "Treatment_complete"][1]
expected_output <- data.frame( expect_true(control_row == "Control" || control_row == "control")
Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)",
Image_number = "series.10",
PID = "PID1",
Tissue = "Tissue1",
Date1 = "2024-02-13",
DOC = "2024.02.13",
ReplicaOrNot = "Replica",
Treatment = "TreatmentRana",
Concentration1 = "10",
Concentration2 = "15",
ConcentrationUnits1 = "uM",
ConcentrationUnits2 = "nm",
Treatment_complete = "TreatmentRana10uM-15nm")
expect_equal(drugsens::string_parsing(input_data), expected = expected_output)
}) })
test_that("Another parsing test", { test_that("String parsing works correctly for single drug", {
input_data <- data.frame(Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif") input_data <- data.frame(
expected_output <- data.frame( Image = "PID1_Tissue1_2024-02-13_DOC2024.02.13_TreatmentRana_10_uM_15_nm_Replica_(series.10)"
Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif", )
Image_number = "series 01", result <- drugsens::string_parsing(input_data)
PID = "B516",
Tissue = "Ascites", expect_equal(result$PID, "PID1")
Date1 = "2023-11-25", expect_equal(result$Tissue, "Tissue1")
DOC = "2020-12-14", expect_equal(result$Date1, "2024-02-13")
ReplicaOrNot = "Replica", expect_equal(result$DOC, "2024.02.13")
Treatment = "dmso", expect_equal(result$Treatment, "TreatmentRana")
Concentration1 = NA_character_, #WIP expect_equal(result$Concentration1, "10")
Concentration2 = NA_integer_, expect_equal(result$ConcentrationUnits1, "uM")
ConcentrationUnits1 = NA_character_, expect_equal(result$Treatment_complete, "TreatmentRana10uM-15nm")
ConcentrationUnits2 = NA_character_,
Treatment_complete = "dmso")
expect_equal(drugsens::string_parsing(input_data), expected = expected_output)
# Image1 <- "B516_Ascites_2023-11-25_DOC2020-12-14_CarboplatinPaclitaxel_100_uM_10_nM_Ecad_cCasp3_(series 01).tif"
# Image2 <- "A8759_Spleen_2020.11.10_DOC2001.10.05_compoundX34542_1000_uM_EpCAM_Ecad_cCasp3_(series 01).tif"
# Image3 <- "A8759_Spleen_2020.11.10_DOC2001.10.05_compoundX34542_1000_uM_EpCAM_Ecad_cCasp3_(series 01).tif"
# Image4 <- "B38_Eye_2023.11.10_DOC2023.10.05_GentamicinePaclitaxel_100_uM_10_nM_EpCAM_Ecad_cCasp3_(series 01).tif"
}) })
test_that("String parsing works correctly for DMSO control", {
input_data <- data.frame(
Image = "B516_Ascites_2023-11-25_DOC2020-12-14_dmso_rep_Ecad_cCasp3_(series 01).tif"
)
result <- drugsens::string_parsing(input_data)
expect_equal(result$PID, "B516")
expect_equal(result$Tissue, "Ascites")
expect_equal(result$Date1, "2023-11-25")
expect_equal(result$DOC, "2020-12-14")
expect_equal(result$Treatment, "dmso")
expect_true(is.na(result$Concentration1))
expect_true(is.na(result$ConcentrationUnits1))
expect_equal(result$Treatment_complete, "dmso")
})
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment