Skip to content
Snippets Groups Projects
Commit e8d43d8c authored by Selim Bouaouina's avatar Selim Bouaouina
Browse files

updated mutant matrix file and run_GSEA.R

parent b7c3f562
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,7 @@ kegg_to_t2g = function(data_set){
GSEA_kegg_list <- list()
for (org_id in kegg_orgids){
tmp_subset <- data_set[[org_id]]
tmp_subset$label_joined <- paste(tmp_subset$pathway_id, tmp_subset$pathway_name, sep="_")
tmp_subset$label_joined <- paste(gsub("path:","",tmp_subset$pathway_id), gsub(" - Mycobacterium tuberculosis CDC1551| - Mycobacterium tuberculosis H37Rv","",tmp_subset$pathway_name), sep="_")
GSEA_kegg_list[[org_id]] <- tmp_subset[,c("label_joined","uniprot_id")]
}
return(GSEA_kegg_list)
......@@ -44,31 +44,32 @@ kegg_to_t2g = function(data_set){
run_GSEA = function(data_set, kingdom, pvalCOff,MAIN_PATH, kegg_list,mycobrowser_data){
set.seed(1234)
if(!(kingdom %in% c("GO_CC","GO_BP","GO_MF", "KEGG", "MYCOBROWSER_FC", "MYCOBROWSER_FU", "MYCOBROWSER_PR"))){print("WARNING, choose kingdom you want to analyze. Options:\"GO_CC\",\"GO_BP\",\"GO_MF\", \"KEGG\", \"MYCOBROWSER_FC\", \"MYCOBROWSER_FU\", \"MYCOBROWSER_PR\".")}
if(kingdom == "GO_CC"){
print("You are running GSEA on GeneOntology category \"Cellular Compartment\".")
load(paste(MAIN_PATH, "annotation_files", "cc_table_for_gmt.Rdata", sep="/")) # loads the data set: cc_table_for_gmt
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=cc_table_for_gmt, by="fgsea")) #cutoff for adjusted pvalue
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=cc_table_for_gmt, by="fgsea", seed=T)) #cutoff for adjusted pvalue
}
if(kingdom == "GO_BP"){
print("You are running GSEA on GeneOntology category \"Biological Process\".")
load(paste(MAIN_PATH, "annotation_files", "bp_table_for_gmt.Rdata", sep="/")) # loads the data set: bp_table_for_gmt
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=bp_table_for_gmt, by="fgsea"))
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=bp_table_for_gmt, by="fgsea", seed=T))
}
if(kingdom == "GO_MF"){
print("You are running GSEA on GeneOntology category \"Molecular Function\".")
load(paste(MAIN_PATH, "annotation_files", "mf_table_for_gmt.Rdata", sep="/")) # loads the data set: mf_table_for_gmt
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mf_table_for_gmt, by="fgsea"))
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mf_table_for_gmt, by="fgsea", seed=T))
}
if(kingdom == "KEGG"){
print("You are running GSEA on kegg pathways. The code runs for every Mtb organism ID on KEGG separately, so the output will be a list, not a table as when running the function for GO terms.")
print("You are running GSEA on kegg pathways. The code runs for every Mtb organism ID on KEGG separately, so the output will be a list, not a table as when running the function for GO terms. Some KEGG organism IDs have contain different protein identifiers, not recognized by the GSEA Function. This could be corrected, but because they are highly redundant, this is not done here. Most important are results from organism IDs: mtu & mtv, which both are Mtb H37Rv.")
GSEA_kegg_list = kegg_to_t2g(kegg_list)
GSEA_kegg_result_list <- list()
kegg_orgids <- names(GSEA_kegg_list)
for (org_id in kegg_orgids){
message(paste("Running GSEA on comparison_",org_id," right now. Computing..."))
runfun <- tryCatch(
res_GSEA <- GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=GSEA_kegg_list[[org_id]], by="fgsea"),
res_GSEA <- GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=GSEA_kegg_list[[org_id]], by="fgsea", seed=T),
error = function(e) {
message(paste("Running GSEA on the comparison: ",org_id," ended up in error...And Now for Something Completely Different."))
NA
......@@ -81,17 +82,17 @@ run_GSEA = function(data_set, kingdom, pvalCOff,MAIN_PATH, kegg_list,mycobrowser
if(kingdom == "MYCOBROWSER_FC"){
print("You are running GSEA on mycobrowser category \"Functional Category\".")
mycobrowser_FunCat <- unique(mycobrowser_data[,c("Functional_Category","UniProt_AC")])
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_FunCat, by="fgsea"))
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_FunCat, by="fgsea", seed=T))
}
if(kingdom == "MYCOBROWSER_FU"){
print("You are running GSEA on mycobrowser category \"Function\".")
mycobrowser_Function <- unique(mycobrowser_data[,c("Function","UniProt_AC")])
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_Function, by="fgsea"))
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_Function, by="fgsea", seed=T))
}
if(kingdom == "MYCOBROWSER_PR"){
print("You are running GSEA on mycobrowser category \"Product\".")
mycobrowser_Product <- unique(mycobrowser_data[,c("Product","UniProt_AC")])
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_Product, by="fgsea"))
return(GSEA(rev(data_set), minGSSize = 1, maxGSSize = (Inf), eps = 0, pvalueCutoff = pvalCOff, pAdjustMethod = "BH", TERM2GENE=mycobrowser_Product, by="fgsea", seed=T))
}
}
......
......@@ -25,6 +25,7 @@ run_QC = function(input_data,qc_level){
input_data$norm_quantity <- 2^(input_data$normalised_intensity_log2)
# 4.3 CVs
print(qc_cvs(data = input_data, grouping = pep_grouping_key, condition = r_condition, intensity = pep_quantity, plot = TRUE, plot_style = "violin"))
print("Plot below shows CVs post normalization by 'pep_quantity'.")
print(qc_cvs(data = input_data, grouping = pep_grouping_key, condition = r_condition, intensity = norm_quantity, plot = TRUE, plot_style = "violin"))
# 4.4 PCA qc_pca( data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity = normalised_intensity_log2, condition = r_condition, digestion = NULL, plot_style = "scree" )
print(qc_pca(data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity = normalised_intensity_log2, condition = r_condition, components= c("PC1","PC2"), plot_style = "pca" ) )
......@@ -35,6 +36,7 @@ run_QC = function(input_data,qc_level){
print(qc_ids(data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity = norm_quantity, condition = r_condition, title = "Peptide identifications per sample", plot = TRUE ) )
# 4.7 intensity distribution
print(qc_intensity_distribution(data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity_log2 = transformed_quantity_log2 , plot_style = "boxplot") )
print("Plot below shows intensity distribution post normalization by 'pep_quantity'.")
print(qc_intensity_distribution(data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity_log2 = normalised_intensity_log2, plot_style = "boxplot"))
# 4.8 data completeness
print(qc_data_completeness(data = input_data, sample = r_file_name, grouping = pep_grouping_key, intensity = normalised_intensity_log2, plot = TRUE) )
......@@ -48,6 +50,7 @@ run_QC = function(input_data,qc_level){
input_data$norm_quantity <- 2^(input_data$normalised_intensity_log2)
# 4.3 CVs
print(qc_cvs(data = input_data, grouping = pg_protein_groups, condition = r_condition, intensity = pg_quantity, plot = TRUE, plot_style = "violin"))
print("Plot below shows CVs post normalization by 'pg_quantity'.")
print(qc_cvs(data = input_data, grouping = pg_protein_groups, condition = r_condition, intensity = norm_quantity, plot = TRUE, plot_style = "violin"))
# 4.4 PCA qc_pca( data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity = normalised_intensity_log2, condition = r_condition, digestion = NULL, plot_style = "scree" )
print(qc_pca(data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity = normalised_intensity_log2, condition = r_condition, components= c("PC1","PC2"), plot_style = "pca" ) )
......@@ -58,6 +61,7 @@ run_QC = function(input_data,qc_level){
print(qc_ids(data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity = norm_quantity, condition = r_condition, title = "Peptide identifications per sample", plot = TRUE ) )
# 4.7 intensity distribution
print(qc_intensity_distribution(data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity_log2 = transformed_quantity_log2 , plot_style = "boxplot") )
print("Plot below shows intensity distribution post normalization by 'pg_quantity'.")
print(qc_intensity_distribution(data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity_log2 = normalised_intensity_log2, plot_style = "boxplot") )
# 4.8 data completeness
print(qc_data_completeness(data = input_data, sample = r_file_name, grouping = pg_protein_groups, intensity = normalised_intensity_log2, plot = TRUE) )
......
......@@ -9,8 +9,8 @@ N2030,N0157,L1,HF,L1_HF,rpoB,S450L,na,1
N2031,N0157,L1,LF,L1_LF,rpoB,H445R,na,1
N1888,N0145,L2,HF,L2_HF,rpoB,S450L,na,1
N1890,N0145,L2,LF,L2_LF,rpoB,H445R,na,1
N2502,N1283,L4,HF,L4_HF,rpoB,S450L,na,1
N2850,N1283,L4,LF,L4_LF,rpoB,H445R,na,1
N2502,N1283,L4,LF,L4_LF,rpoB,H445R,na,1
N2850,N1283,L4,HF,L4_HF,rpoB,S450L,na,1
N0157,N0157,L1,WT,L1_WT,WT,WT,1,0
N0145,N0145,L2,WT,L2_WT,WT,WT,1,0
N1283,N1283,L4,WT,L4_WT,WT,WT,1,0
\ No newline at end of file
N1283,N1283,L4,WT,L4_WT,WT,WT,1,0
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment