Created
November 19, 2020 20:28
-
-
Save kstawiski/c3dfed76406f50d5cdf78bba07cf049e to your computer and use it in GitHub Desktop.
OmicSelector DeepLearning snippets
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(OmicSelector); set.seed(1); | |
| if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") } | |
| OmicSelector_load_extension("deeplearning") | |
| library(data.table) | |
| #use_session_with_seed(1) | |
| #selected_miRNAs = c("hsa.miR.192.5p", | |
| # "hsa.let.7g.5p", | |
| # "hsa.let.7a.5p", | |
| # "hsa.let.7d.5p", | |
| # "hsa.miR.194.5p", | |
| # "hsa.miR.98.5p", | |
| # "hsa.let.7f.5p", | |
| # "hsa.miR.122.5p", | |
| # "hsa.miR.340.5p", | |
| # "hsa.miR.26b.5p" | |
| # ,"hsa.miR.17.5p", | |
| # "hsa.miR.199a.3p", | |
| # "hsa.miR.28.3p", | |
| # "hsa.miR.92a.3p" | |
| #) | |
| balanced = F | |
| nazwa_konfiguracji = "pancreatic_ourseq.csv" | |
| t = data.table::fread("mixed_train.csv") | |
| studyMirs = make.names(c('hsa-miR-192-5p', 'hsa-let-7g-5p', 'hsa-let-7a-5p', 'hsa-let-7d-5p', 'hsa-miR-194-5p', 'hsa-miR-98-5p', 'hsa-let-7f-5p', 'hsa-miR-122-5p', 'hsa-miR-340-5p', 'hsa-miR-26b-5p')) | |
| norm3_1 = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p')) | |
| norm2 = make.names(c('hsa-miR-28-3p', 'hsa-miR-92a-3p')) | |
| normQ = make.names('hsa-miR-23a-3p') | |
| all_norm = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p', 'hsa-miR-23a-3p', 'hsa-miR-28-3p')) | |
| # study | |
| selected_miRNAs = studyMirs | |
| # autoencoder ma softmax na deep feature | |
| hyperparameters_part1 = expand.grid(layer1 = seq(2,10, by = 1), layer2 = c(0), layer3 = c(0), | |
| activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu"), activation_function_layer3 = c("relu"), | |
| dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0), | |
| layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F), | |
| optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T), | |
| stringsAsFactors = F) | |
| hyperparameters_part2 = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(seq(3,11, by = 2)), layer3 = c(seq(0,11, by = 2)), | |
| activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu","sigmoid","selu"), activation_function_layer3 = c("relu","sigmoid","selu"), | |
| dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0), | |
| layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F), | |
| optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F), | |
| stringsAsFactors = F) | |
| hyperparameters = rbind(hyperparameters_part1, hyperparameters_part2) | |
| head(hyperparameters) | |
| ile = nrow(hyperparameters) | |
| ile_w_batchu = 250 | |
| cat(paste0("\nHow many to check: ", ile)) | |
| if(!file.exists(nazwa_konfiguracji)) { | |
| batch_start = 1 | |
| } else { | |
| tempres = data.table::fread(nazwa_konfiguracji) | |
| last = as.numeric(str_split(tempres$model_id, "-")[[nrow(tempres)]][1]) | |
| if(last >= ile) { stop(paste0(last, " - Task already finished.")) } | |
| batch_start = as.numeric(last)+1 | |
| } | |
| ile_batchy = ceiling(nrow(hyperparameters)/ile_w_batchu - batch_start/ile_w_batchu) | |
| cat(paste0("\nBatch start: ", batch_start)) | |
| cat(paste0("\nHow many in batch: ", ile_w_batchu)) | |
| for (i in 1:ile_batchy) { | |
| batch_end = batch_start + (ile_w_batchu-1) | |
| if (batch_end > ile) { batch_end = ile } | |
| cat(paste0("\n\nProcessing batch no ", i , " of ", ile_batchy, " (", batch_start, "-", batch_end, ")")) | |
| OmicSelector_deep_learning(selected_miRNAs = selected_miRNAs, wd = getwd(), save_threshold_trainacc = 0.75, save_threshold_testacc = 0.7, hyperparameters = hyperparameters, | |
| SMOTE = balanced, start = batch_start, end = batch_end, output_file = nazwa_konfiguracji, keras_threads = 30, | |
| keras_epoch = 2000, keras_patience = 100, automatic_weight = F) | |
| batch_start = batch_end + 1 | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # remotes::install_github("kstawiski/OmicSelector", ref = "dev") | |
| setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1") | |
| library(OmicSelector) | |
| library(dplyr) | |
| library(ggplot2) | |
| library("scatterplot3d") | |
| OmicSelector_load_extension("deeplearning") | |
| # Get TCGA results | |
| tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv") | |
| ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv") | |
| tcga_models$task = "TCGA" | |
| ourseq_models$task = "Circulating miRNA-seq" | |
| tcga_models$no_layers = 1 | |
| tcga_models$no_layers[tcga_models$layer2 != 0] = 2 | |
| tcga_models$no_layers[tcga_models$layer3 != 0] = 3 | |
| ourseq_models$no_layers = 1 | |
| ourseq_models$no_layers[tcga_models$layer2 != 0] = 2 | |
| ourseq_models$no_layers[tcga_models$layer3 != 0] = 3 | |
| tcga_models$hyperparameter_id = 1:nrow(tcga_models) | |
| ourseq_models$hyperparameter_id = 1:nrow(ourseq_models) | |
| tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3 | |
| hist(tcga_models$metaindex) | |
| summary(tcga_models$metaindex) | |
| tcga_models = arrange(tcga_models, desc(metaindex)) | |
| top_tcga_models = tcga_models[1:100,] | |
| plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy) | |
| plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy) | |
| plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy) | |
| scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16, | |
| grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
| ourseq_models$metaindex = (ourseq_models$training_Accuracy + ourseq_models$test_Accuracy + ourseq_models$valid_Accuracy) / 3 | |
| hist(ourseq_models$metaindex) | |
| summary(ourseq_models$metaindex) | |
| ourseq_models = arrange(ourseq_models, desc(metaindex)) | |
| top_ourseq_models = ourseq_models[1:100,] | |
| plot(top_ourseq_models$training_Accuracy, top_ourseq_models$valid_Accuracy) | |
| plot(top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy) | |
| plot(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy) | |
| scatterplot3d(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy, angle = 45, pch = 16, | |
| grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
| # Which task was easier? | |
| temp = rbind(tcga_models, ourseq_models) | |
| boxplot(temp$metaindex ~ temp$task) | |
| p <- ggplot(temp, aes(x=task, y=metaindex)) + | |
| geom_violin(trim=FALSE, fill="gray") + | |
| labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+ | |
| geom_boxplot(width=0.1)+ | |
| theme_classic() | |
| p | |
| t.test(temp$metaindex ~ temp$task) | |
| wilcox.test(temp$metaindex ~ temp$task) | |
| # In which task top 100 models performed better? | |
| temp = rbind(top_tcga_models, top_ourseq_models) | |
| boxplot(temp$metaindex ~ temp$task) | |
| p <- ggplot(temp, aes(x=task, y=metaindex)) + | |
| geom_violin(trim=FALSE, fill="gray") + | |
| labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+ | |
| geom_boxplot(width=0.1)+ | |
| theme_classic() | |
| p | |
| t.test(temp$metaindex ~ temp$task) | |
| wilcox.test(temp$metaindex ~ temp$task) | |
| # Are there significant differences in the structure of top 100 models between tasks? | |
| temp = rbind(top_tcga_models, top_ourseq_models) | |
| # No layers | |
| temp$no_layers = 1 | |
| temp$no_layers[temp$layer2 != 0] = 2 | |
| temp$no_layers[temp$layer3 != 0] = 3 | |
| hist(temp$no_layers) | |
| table(temp$no_layers, temp$task) | |
| chisq.test(temp$no_layers, temp$task) | |
| table(tcga_models$no_layers) | |
| layer3_ref = 81000/nrow(tcga_models) | |
| prop.test(85, 100, p=layer3_ref) | |
| prop.test(80, 100, p=layer3_ref) | |
| OmicSelector_correlation_plot(temp$no_layers, temp$metaindex, "Number of layers", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| ## Layers | |
| OmicSelector_correlation_plot(tcga_models$layer1, tcga_models$metaindex, "Layer 1", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| OmicSelector_correlation_plot(tcga_models$layer2, tcga_models$metaindex, "Layer 2", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| OmicSelector_correlation_plot(tcga_models$layer3, tcga_models$metaindex, "Layer 3", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| boxplot(tcga_models$metaindex ~ tcga_models$layer1) | |
| boxplot(tcga_models$metaindex ~ tcga_models$layer2) | |
| boxplot(tcga_models$metaindex ~ tcga_models$layer3) | |
| OmicSelector_correlation_plot(ourseq_models$layer1, ourseq_models$metaindex, "Layer 1", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| OmicSelector_correlation_plot(ourseq_models$layer2, ourseq_models$metaindex, "Layer 2", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| OmicSelector_correlation_plot(ourseq_models$layer3, ourseq_models$metaindex, "Layer 3", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$layer1) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$layer2) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$layer3) | |
| boxplot(temp$layer1 ~ temp$task) | |
| t.test(temp$layer1 ~ temp$task) | |
| boxplot(temp$layer2 ~ temp$task) | |
| t.test(temp$layer2 ~ temp$task) | |
| boxplot(temp$layer3 ~ temp$task) | |
| t.test(temp$layer3 ~ temp$task) | |
| ## Activation function | |
| table(temp$activation_function_layer1, temp$task) | |
| chisq.test(temp$activation_function_layer1, temp$task) | |
| table(temp$activation_function_layer2, temp$task) | |
| chisq.test(temp$activation_function_layer2, temp$task) | |
| table(temp$activation_function_layer3, temp$task) | |
| chisq.test(temp$activation_function_layer3, temp$task) | |
| ## Dropout | |
| boxplot(tcga_models$metaindex ~ tcga_models$dropout_layer1) | |
| t.test(tcga_models$metaindex ~ tcga_models$dropout_layer1) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$dropout_layer1) | |
| t.test(ourseq_models$metaindex ~ ourseq_models$dropout_layer1) | |
| table(temp$dropout_layer1, temp$task) | |
| chisq.test(temp$dropout_layer1, temp$task) | |
| ## Regularization | |
| boxplot(tcga_models$metaindex ~ tcga_models$layer1_regularizer) | |
| t.test(tcga_models$metaindex ~ tcga_models$layer1_regularizer) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer) | |
| t.test(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer) | |
| table(temp$task, temp$layer1_regularizer) | |
| chisq.test(temp$task, temp$layer1_regularizer) | |
| ## Optimizer | |
| boxplot(tcga_models$metaindex ~ tcga_models$optimizer) | |
| a = aov(tcga_models$metaindex ~ tcga_models$optimizer) | |
| summary(a) | |
| TukeyHSD(a) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$optimizer) | |
| psych::describeBy(ourseq_models$metaindex, ourseq_models$optimizer, digits = 4, mat = T) | |
| a = aov(ourseq_models$metaindex ~ ourseq_models$optimizer) | |
| summary(a) | |
| TukeyHSD(a) | |
| table(temp$task, temp$optimizer) | |
| chisq.test(temp$task, temp$optimizer) | |
| ## Scaling | |
| boxplot(tcga_models$metaindex ~ tcga_models$scaled) | |
| t.test(tcga_models$metaindex ~ tcga_models$scaled) | |
| boxplot(ourseq_models$metaindex ~ ourseq_models$scaled) | |
| t.test(ourseq_models$metaindex ~ ourseq_models$scaled) | |
| table(temp$task, temp$scaled) | |
| chisq.test(temp$task, temp$scaled) | |
| # Paired structure analysis | |
| models_paired = top_tcga_models | |
| to_join = ourseq_models[match(top_tcga_models$hyperparameter_id, ourseq_models$hyperparameter_id),] %>% dplyr::rename_all(function(x) paste0("circ_", x)) | |
| models_paired = cbind(models_paired, to_join) | |
| suppressMessages(library(PairedData)) | |
| suppressMessages(library(profileR)) | |
| pd = paired(as.numeric(models_paired$metaindex)[1:50], as.numeric(models_paired$circ_metaindex)[1:50]) | |
| colnames(pd) = c("Metaindex TCGA", "Metaindex circulating miRNA-seq") | |
| plot2 = OmicSelector_profileplot(pd, Method.id = models_paired$name[1:50], standardize = F) | |
| plot2 | |
| # Transfer network performance analysis | |
| temp1 = data.table::fread("transfered_networks_structure.csv") | |
| temp1$transfer_type = "structure" | |
| temp2 = data.table::fread("transfered_networks_layer1.csv") | |
| temp2$transfer_type = "layer1" | |
| temp3 = data.table::fread("transfered_networks_layer2.csv") | |
| temp3$transfer_type = "layer2" | |
| temp4 = data.table::fread("transfered_networks_layer3.csv") | |
| temp4$transfer_type = "layer3" | |
| transfer_performance = rbind(temp1, temp2, temp3, temp4) | |
| # meta = apply(expand.grid(transfer_performance$transfer_type, transfer_performance$type), 1, paste, collapse="_") | |
| transfer_performance$meta = paste0(transfer_performance$transfer_type, "_", transfer_performance$type) | |
| transfer_performance = transfer_performance[transfer_performance$meta != "layer1_crude",] | |
| transfer_performance = transfer_performance[transfer_performance$meta != "layer2_crude",] | |
| transfer_performance = transfer_performance[transfer_performance$meta != "layer3_crude",] | |
| transfer_performance$meta = as.factor(transfer_performance$meta) | |
| transfer_performance$meta = factor(transfer_performance$meta, levels = c("structure_crude", "layer1_recalibrated", "layer2_recalibrated", "layer3_recalibrated", "structure_recalibrated")) | |
| transfer_performance$meta | |
| transfer_performance$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3 | |
| boxplot(transfer_performance$metaindex ~ transfer_performance$meta) | |
| oneway.test(transfer_performance$metaindex ~ transfer_performance$meta) | |
| a = aov(transfer_performance$metaindex ~ transfer_performance$meta) | |
| TukeyHSD(a) | |
| psych::describeBy(transfer_performance$metaindex, transfer_performance$meta, mat = T, digits = 4) | |
| merged = merge(transfer_performance, ourseq_models, | |
| by = c('layer1','layer2','layer3','activation_function_layer1','activation_function_layer2','activation_function_layer3','dropout_layer1','dropout_layer2','dropout_layer3','layer1_regularizer','layer2_regularizer','layer3_regularizer','optimizer','autoencoder','balanced', 'scaled'), | |
| suffixes = c("_TCGA", "_circ"), all.x = TRUE) | |
| colnames(merged) | |
| merged$metaindex_circ | |
| merged$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3 | |
| # merged$metaindex11 = (merged$new_test_Accuracy + merged$new_valid_Accuracy) / 2 | |
| # merged$metaindex11_circ = (merged$test_Accuracy_circ + merged$valid_Accuracy_circ ) / 2 | |
| merged$metaindex11 = (merged$new_test_F1 + merged$new_valid_F1) / 2 | |
| merged$metaindex11_circ = (merged$test_F1_circ + merged$valid_F1_circ ) / 2 | |
| boxplot(merged$metaindex_circ, merged$metaindex) | |
| t.test(merged$metaindex_circ, merged$metaindex) | |
| boxplot(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"]) | |
| t.test(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"]) | |
| # boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
| # t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
| # | |
| # boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
| # t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
| # | |
| # boxplot(merged$metaindex11_circ, merged$metaindex11) | |
| # t.test(merged$metaindex11_circ, merged$metaindex11) | |
| merged$delta = merged$metaindex - merged$metaindex_circ | |
| a = aov(merged$delta ~ merged$meta) | |
| summary(a) | |
| boxplot(merged$delta ~ merged$meta) | |
| p <- ggplot(merged, aes(x=meta, y=delta)) + | |
| geom_violin(trim=FALSE, fill="gray") + | |
| labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+ | |
| geom_boxplot(width=0.1) + | |
| geom_hline(aes(yintercept=0), linetype="dashed") + | |
| ylim(-0.3, 0.3) + | |
| theme_classic() | |
| p | |
| hist(merged$delta) | |
| which(merged$delta>0) | |
| merged$benefit = merged$delta>0 | |
| library(lme4) | |
| library(lmerTest) | |
| model = lmer(delta ~ meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|merged$hyperparameter_id), | |
| data=merged, | |
| REML=TRUE) | |
| anova(model) | |
| confint(model) | |
| summary(model) | |
| library(tidyverse) | |
| broom.mixed::tidy(model, conf.int = TRUE) | |
| if (require("brms") && require("dotwhisker") && require("ggplot2")) { | |
| L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed")) | |
| gg0 <- (broom.mixed::tidy(model) | |
| ## disambiguate | |
| %>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE), | |
| paste(group,term,sep="."), | |
| term)) | |
| %>% dwplot | |
| ) | |
| gg0 + geom_vline(xintercept=0,lty=2) | |
| } | |
| OmicSelector_correlation_plot(merged$metaindex, merged$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| OmicSelector_correlation_plot(merged$metaindex_circ, merged$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| OmicSelector_correlation_plot(merged$metaindex_TCGA, merged$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| # linearMod <- glm(benefit ~ as.factor(transfer_type) + layer1 + layer2 + layer3 + as.factor(activation_function_layer1) + as.factor(activation_function_layer2) | |
| # + as.factor(activation_function_layer3) + as.numeric(as.factor(dropout_layer1)) + as.numeric(as.factor(layer1_regularizer)) + as.factor(optimizer), | |
| # data=merged) | |
| # summary(linearMod) | |
| # library(caret) | |
| # library(party) | |
| # library(partykit) | |
| # library(rattle) | |
| # # Argumenty: paste0(colnames(transfer_performance), collapse = " + ") | |
| # merged$benefit = as.factor(ifelse(as.factor(as.numeric(as.factor(merged$benefit))) == 1, "No", "Benefit")) | |
| # train_control<- trainControl(method="repeatedcv", number=10, repeats = 10, classProbs=TRUE) | |
| # model = caret::train(benefit ~ transfer_type + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + dropout_layer2 + dropout_layer3 + layer1_regularizer + layer2_regularizer + layer3_regularizer + optimizer + autoencoder + balanced + scaled, | |
| # method = "rpart2", data = merged, trControl=train_control, tuneLength=20, maxdepth = 2) | |
| # model | |
| # fancyRpartPlot(model$finalModel) | |
| # plot(model$finalModel) | |
| # Finalne podejscie: top 100 sieci po transfer learningu wzgledem metaindexu, czy skorzystaly? | |
| merged2 = merged[order(merged$metaindex, decreasing = T)] | |
| table(merged$benefit[1:100]) | |
| merged2 = merged2[1:100,] | |
| boxplot(merged2$metaindex_circ, merged2$metaindex) | |
| t.test(merged2$metaindex_circ, merged2$metaindex, paired = T) | |
| hist(merged$delta) | |
| shapiro.test(merged$delta) | |
| hist(merged2$delta) | |
| shapiro.test(merged2$delta) | |
| t = paired(merged2$metaindex_circ, merged2$metaindex) | |
| a = aov(merged2$delta ~ merged2$meta) | |
| summary(a) | |
| boxplot(merged2$delta ~ merged2$meta, ylab = "Difference in metaindex (mean accuracy on training, testing and validation datasets)", xlab = "Method of transfer") | |
| TukeyHSD(a) | |
| p <- ggplot(merged2, aes(x=meta, y=delta)) + | |
| geom_violin(trim=FALSE, fill="gray") + | |
| labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+ | |
| geom_boxplot(width=0.1) + | |
| geom_hline(aes(yintercept=0), linetype="dashed") + | |
| ylim(-0.3, 0.3) + | |
| theme_classic() | |
| p | |
| model = lmer(delta ~ -1 + meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|hyperparameter_id), | |
| data=merged2) | |
| anova(model) | |
| confint(model) | |
| summary(model) | |
| library(tidyverse) | |
| broom.mixed::tidy(model, conf.int = TRUE) | |
| if (require("brms") && require("dotwhisker") && require("ggplot2")) { | |
| L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed")) | |
| gg0 <- (broom.mixed::tidy(model) | |
| ## disambiguate | |
| %>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE), | |
| paste(group,term,sep="."), | |
| term)) | |
| %>% dwplot | |
| ) | |
| gg0 + geom_vline(xintercept=0,lty=2) + | |
| theme_classic() | |
| } | |
| OmicSelector_correlation_plot(merged2$metaindex, merged2$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| OmicSelector_correlation_plot(merged2$metaindex_circ, merged2$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| OmicSelector_correlation_plot(merged2$metaindex_TCGA, merged2$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
| # Best network analysis | |
| best_recalibrated = merged[which(merged$metaindex == max(merged$metaindex)),,] | |
| best_circ = ourseq_models[which(ourseq_models$metaindex == max(ourseq_models$metaindex)),] | |
| best_recalibrated | |
| best_circ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # remotes::install_github("kstawiski/OmicSelector", ref = "dev") | |
| setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1") | |
| library(OmicSelector) | |
| library(dplyr) | |
| library("scatterplot3d") | |
| OmicSelector_load_extension("deeplearning") | |
| # Get TCGA results | |
| tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv") | |
| ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv") | |
| tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3 | |
| hist(tcga_models$metaindex) | |
| summary(tcga_models$metaindex) | |
| tcga_models = arrange(tcga_models, desc(metaindex)) | |
| top_tcga_models = tcga_models[1:100,] | |
| plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy) | |
| plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy) | |
| plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy) | |
| scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16, | |
| grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
| data.table::fwrite(top_tcga_models, "top_tcga_models.csv") | |
| transfered_networks_structure = data.frame() | |
| transfered_networks_layer1 = data.frame() | |
| transfered_networks_layer2 = data.frame() | |
| transfered_networks_layer3 = data.frame() | |
| for(i in 1:100) { | |
| tcga_model_modelfile = paste0("../tcga/models/pancreatic_tcga/pancreatic_tcga_", top_tcga_models$model_id[i], ".zip") | |
| file.exists(tcga_model_modelfile) | |
| try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 0, freeze_to = 0, | |
| train = data.table::fread("../circulating_our/mixed_train.csv"), | |
| test = data.table::fread("../circulating_our/mixed_test.csv"), | |
| valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
| transfered_networks_structure = rbind.fill(transfered_networks_structure, temp2) }) | |
| try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 1, | |
| train = data.table::fread("../circulating_our/mixed_train.csv"), | |
| test = data.table::fread("../circulating_our/mixed_test.csv"), | |
| valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
| transfered_networks_layer1 = rbind.fill(transfered_networks_layer1, temp2) }) | |
| try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 2, | |
| train = data.table::fread("../circulating_our/mixed_train.csv"), | |
| test = data.table::fread("../circulating_our/mixed_test.csv"), | |
| valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
| transfered_networks_layer2 = rbind.fill(transfered_networks_layer2, temp2) }) | |
| try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 3, | |
| train = data.table::fread("../circulating_our/mixed_train.csv"), | |
| test = data.table::fread("../circulating_our/mixed_test.csv"), | |
| valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
| transfered_networks_layer3 = rbind.fill(transfered_networks_layer3, temp2) }) | |
| if(!dir.exists("transfer_models")) { dir.create("transfer_models") } | |
| file.copy(list.files(".", "\\.zip$"), "transfer_models/") | |
| file.remove(list.files(".", "\\.zip$")) | |
| } | |
| data.table::fwrite(transfered_networks_structure, "transfered_networks_structure.csv") | |
| data.table::fwrite(transfered_networks_layer1, "transfered_networks_layer1.csv") | |
| data.table::fwrite(transfered_networks_layer2, "transfered_networks_layer2.csv") | |
| data.table::fwrite(transfered_networks_layer3, "transfered_networks_layer3.csv") | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| suppressMessages(library(plyr)) | |
| suppressMessages(library(dplyr)) | |
| suppressMessages(library(caret)) | |
| suppressMessages(library(epiDisplay)) | |
| suppressMessages(library(pROC)) | |
| suppressMessages(library(ggplot2)) | |
| suppressMessages(library(DMwR)) | |
| suppressMessages(library(ROSE)) | |
| suppressMessages(library(gridExtra)) | |
| suppressMessages(library(gplots)) | |
| suppressMessages(library(devtools)) | |
| suppressMessages(library(stringr)) | |
| suppressMessages(library(data.table)) | |
| suppressMessages(library(tidyverse)) | |
| library(OmicSelector) | |
| suppressMessages(library(funModeling)) | |
| message("OmicSelector: DeepLearning extension loaded.") | |
| if(!dir.exists(paste0("models"))) { dir.create(paste0("models")) } | |
| if(!dir.exists(paste0("temp"))) { dir.create(paste0("temp")) } | |
| OmicSelector_keras_create_model <- function(i, hyperparameters, how_many_features = ncol(x_train_scale)) { | |
| # tempmodel <- keras_model_sequential() %>% | |
| # { if(hyperparameters[i,10]==T) { layer_dense(. , units = hyperparameters[i,1], kernel_regularizer = regularizer_l2(l = 0.001), | |
| # activation = hyperparameters[i,4], input_shape = c(ncol(x_train_scale))) } else | |
| # { layer_dense(. , units = hyperparameters[i,1], activation = hyperparameters[i,4], | |
| # input_shape = c(ncol(x_train_scale))) } } %>% | |
| # { if(hyperparameters[i,7]>0) { layer_dropout(. , rate = hyperparameters[i,7]) } else { . } } %>% | |
| # { if(hyperparameters[i,2]>0) { | |
| # if(hyperparameters[i,11]==T) { layer_dense(. , units = hyperparameters[i,2], activation = hyperparameters[i,5], | |
| # kernel_regularizer = regularizer_l2(l = 0.001)) } else { | |
| # layer_dense(units = hyperparameters[i,2], activation = hyperparameters[i,5]) } } } %>% | |
| # { if(hyperparameters[i,8]>0) { layer_dropout(rate = hyperparameters[i,8]) } else { . } } %>% | |
| # { if(hyperparameters[i,3]>0) { | |
| # if(hyperparameters[i,12]==T) { layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6], | |
| # kernel_regularizer = regularizer_l2(l = 0.001)) } else | |
| # {layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6])} } else { . } } %>% | |
| # { if(hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) } else { . } } %>% | |
| # layer_dense(units = 1, activation = 'sigmoid') | |
| library(keras) | |
| tempmodel <- keras_model_sequential() | |
| if(hyperparameters[i,10]==T) { layer_dense(tempmodel , units = hyperparameters[i,1], kernel_regularizer = regularizer_l1(l = 0.001), | |
| activation = hyperparameters[i,4], input_shape = c(how_many_features)) } else | |
| { layer_dense(tempmodel , units = hyperparameters[i,1], activation = hyperparameters[i,4], | |
| input_shape = c(how_many_features)) } | |
| if(hyperparameters[i,7]>0) { layer_dropout(tempmodel , rate = hyperparameters[i,7]) } | |
| if(hyperparameters[i,2]>0) { | |
| if(hyperparameters[i,11]==T) { layer_dense(tempmodel , units = hyperparameters[i,2], activation = hyperparameters[i,5], | |
| kernel_regularizer = regularizer_l1(l = 0.001)) } else | |
| {layer_dense(tempmodel, units = hyperparameters[i,2], activation = hyperparameters[i,5]) } } | |
| if(hyperparameters[i,2]>0 & hyperparameters[i,8]>0) { layer_dropout(tempmodel, rate = hyperparameters[i,8]) } | |
| if(hyperparameters[i,3]>0) { | |
| if(hyperparameters[i,12]==T) { layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6], | |
| kernel_regularizer = regularizer_l1(l = 0.001)) } else | |
| { layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6])} } | |
| if(hyperparameters[i,3]>0 & hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) } | |
| layer_dense(tempmodel, units = 2, activation = 'softmax') | |
| print(tempmodel) | |
| dnn_class_model = keras::compile(tempmodel, optimizer = hyperparameters[i,13], | |
| loss = 'binary_crossentropy', | |
| metrics = 'accuracy') | |
| } | |
| # autoencoder ma softmax na deep feature | |
| # jesli zmienna autoencoder >0 budujemy autoencoder 3-wartwowy, jesli <0 budujemy autoencoder 3-warstwowy z regularyzacja (sparse autoencoder) | |
| OmicSelector_deep_learning = function(selected_miRNAs = ".", wd = getwd(), | |
| SMOTE = F, keras_batch_size = 64, clean_temp_files = T, | |
| save_threshold_trainacc = 0.85, save_threshold_testacc = 0.8, keras_epochae = 5000, | |
| keras_epoch = 2000, keras_patience = 50, | |
| hyperparameters = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(0,seq(3,11, by = 2)), layer3 = c(0,seq(3,11, by = 2)), | |
| activation_function_layer1 = c("relu","sigmoid"), activation_function_layer2 = c("relu","sigmoid"), activation_function_layer3 = c("relu","sigmoid"), | |
| dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0, 0.1), dropout_layer3 = c(0), | |
| layer1_regularizer = c(T,F), layer2_regularizer = c(T,F), layer3_regularizer = c(T,F), | |
| optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0,7,-7), balanced = SMOTE, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F), | |
| stringsAsFactors = F), add_features_to_predictions = F, | |
| keras_threads = ceiling(parallel::detectCores()/2), start = 1, end = nrow(hyperparameters), output_file = "deeplearning_results.csv", save_all_vars = F, automatic_weight = F) | |
| { | |
| # library(OmicSelector) | |
| # OmicSelector_load_extension("deeplearning") | |
| codename = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(output_file)) | |
| #options(warn=-1) | |
| oldwd = getwd() | |
| setwd = setwd(wd) | |
| set.seed(1) | |
| if(dir.exists("/OmicSelector")) { | |
| if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") } | |
| temp_dir = "/OmicSelector/temp/" | |
| } else { | |
| temp_dir = tempdir() | |
| } | |
| if(!dir.exists("temp")) { dir.create("temp") } | |
| if(!dir.exists("models")) { dir.create("models") } | |
| options(bitmapType = 'cairo', device = 'png') | |
| library(plyr) | |
| library(dplyr) | |
| library(keras) | |
| library(foreach) | |
| library(doParallel) | |
| #library(doSNOW) | |
| library(data.table) | |
| fwrite(hyperparameters, paste0("hyperparameters_",output_file)) | |
| dane = OmicSelector_load_datamix(wd = wd, replace_smote = F); train = dane[[1]]; test = dane[[2]]; valid = dane[[3]]; train_smoted = dane[[4]]; trainx = dane[[5]]; trainx_smoted = dane[[6]] | |
| if (SMOTE == T) { train = train_smoted } | |
| message("Checkpoint passed: load lib and data") | |
| #cores=detectCores() | |
| cat(paste0("\nTemp dir: ", temp_dir, "\n")) | |
| cat("\nStarting preparing cluster..\n") | |
| #cl <- makePSOCKcluster(keras_threads) #not to overload your computer | |
| cl = makeCluster(keras_threads, outfile=paste0("temp/", ceiling(as.numeric(Sys.time())), "deeplearning_cluster.log")) | |
| registerDoParallel(cl) | |
| on.exit(stopCluster(cl)) | |
| cat("\nCluster prepared..\n") | |
| #message("Checkpoint passed: cluster prepared") | |
| #args= names(mget(ls())) | |
| #export = export[!export %in% args] | |
| # tu musi isc iteracja | |
| cat(paste0("\nStarting parallel loop.. There are: ", end-start+1, " hyperparameter sets to be checked.\n")) | |
| final <- foreach(i=as.numeric(start):as.numeric(end), .combine=rbind, .verbose=F, .inorder=F, .export = ls() | |
| #,.packages = loadedNamespaces() | |
| ) %dopar% { | |
| Sys.setenv(TF_FORCE_GPU_ALLOW_GROWTH = 'true') | |
| library(OmicSelector) | |
| OmicSelector_load_extension("deeplearning") | |
| if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models")) } | |
| if(!dir.exists(paste0(temp_dir,"/temp"))) { dir.create(paste0(temp_dir,"/temp")) } | |
| start_time <- Sys.time() | |
| library(keras) | |
| library(ggplot2) | |
| library(dplyr) | |
| library(data.table) | |
| set.seed(1) | |
| # library(tensorflow) | |
| # gpu = tf$test$is_gpu_available() | |
| # if(gpu) { | |
| # gpux <- tf$config$experimental$get_visible_devices('GPU')[[1]] | |
| # tf$config$experimental$set_memory_growth(device = gpux, enable = TRUE) | |
| # py_run_string("gpus = tf.config.experimental.list_physical_devices('GPU')") | |
| # py_run_string("tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])") | |
| # } | |
| cat("\nStarting hyperparameters..\n") | |
| print(hyperparameters[i,]) | |
| message(paste0("OmicSelector: Starting training network no ", i, ".")) | |
| message(paste0(hyperparameters[i,], collapse = ", ")) | |
| options(bitmapType = 'cairo', device = 'png') | |
| model_id = paste0(format(i, scientific = FALSE), "-", ceiling(as.numeric(Sys.time()))) | |
| if(SMOTE == T) { model_id = paste0(format(i, scientific = FALSE), "-SMOTE-", ceiling(as.numeric(Sys.time()))) } | |
| tempwyniki = data.frame(model_id=model_id) | |
| tempwyniki[1, "model_id"] = model_id | |
| #message("Checkpoint passed: chunk 1") | |
| if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models"))} | |
| if(!dir.exists(paste0(temp_dir,"/models/keras",model_id))) { dir.create(paste0(temp_dir,"/models/keras",model_id))} | |
| cat(paste0("\nTraining model: ",temp_dir,"/models/keras",model_id,"\n")) | |
| #message("Checkpoint passed: chunk 2") | |
| #pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf"), paper="a4") | |
| con <- file(paste0(temp_dir,"/models/keras",model_id,"/training.log")) | |
| sink(con, append=TRUE, split =TRUE) | |
| sink(con, append=TRUE, type="message") | |
| early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = keras_patience) | |
| cp_callback <- callback_model_checkpoint( | |
| filepath = paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"), | |
| save_best_only = TRUE, period = 10, monitor = "val_loss", | |
| verbose = 0 | |
| ) | |
| ae_cp_callback <- callback_model_checkpoint( | |
| filepath = paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"), | |
| save_best_only = TRUE, save_weights_only = T, period = 10, monitor = "val_loss", | |
| verbose = 0 | |
| ) | |
| x_train <- train %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_train <- train %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
| x_test <- test %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_test <- test %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
| x_valid <- valid %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_valid <- valid %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
| #message("Checkpoint passed: chunk 3") | |
| if(automatic_weight) { | |
| counter=funModeling::freq(to_categorical(y_train), plot=F) %>% select(var, frequency) | |
| majority=max(counter$frequency) | |
| counter$weight=ceil(majority/counter$frequency) | |
| l_weights=setNames(as.list(counter$weight), counter$var) | |
| message(l_weights) | |
| } else { | |
| l_weights = NULL | |
| } | |
| if(hyperparameters[i, 17] == T) { | |
| x_train_scale = x_train %>% scale() | |
| col_mean_train <- attr(x_train_scale, "scaled:center") | |
| saveRDS(col_mean_train, paste0(temp_dir,"/models/keras",model_id,"/col_mean_train.RDS")) | |
| col_sd_train <- attr(x_train_scale, "scaled:scale") | |
| saveRDS(col_sd_train, paste0(temp_dir,"/models/keras",model_id,"/col_sd_train.RDS")) | |
| x_test_scale <- x_test %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| x_valid_scale <- x_valid %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| } | |
| else { | |
| x_train_scale = x_train | |
| x_test_scale <- x_test | |
| x_valid_scale <- x_valid | |
| } | |
| input_layer <- layer_input(shape = c(ncol(x_train_scale))) | |
| #message("Checkpoint passed: chunk 4") | |
| #psych::describe(x_test_scale) | |
| # czy autoenkoder? | |
| if(hyperparameters[i,14] != 0) { | |
| n1 <- hyperparameters[i,14] | |
| n3 <- ncol(x_train_scale) | |
| n2 = 7 | |
| if(ceiling(n3/2) > 7) { n2 = ceiling(n3/2) } | |
| if (hyperparameters[i,14]>0) { | |
| encoder <- | |
| input_layer %>% | |
| layer_dense(units = n2, activation = hyperparameters[i,6]) %>% | |
| layer_dense(units = n1, activation = "softmax") # dimensions of final encoding layer | |
| decoder <- encoder %>% | |
| layer_dense(units = n2, activation = hyperparameters[i,6]) %>% | |
| layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable | |
| } | |
| else { | |
| n1 = -n1 # korekta dla ujemnej wartosci w hiperparametrach | |
| encoder <- | |
| input_layer %>% | |
| layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>% | |
| layer_dense(units = n1, activation = "softmax", kernel_regularizer = regularizer_l1(l = 0.01)) # dimensions of final encoding layer | |
| decoder <- encoder %>% | |
| layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>% | |
| layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable | |
| } | |
| ae_model <- keras_model(inputs = input_layer, outputs = decoder) | |
| ae_model | |
| ae_model %>% | |
| keras::compile(loss = "mean_absolute_error", | |
| optimizer = hyperparameters[i,13], | |
| metrics = c("mean_squared_error")) | |
| summary(ae_model) | |
| #message("Checkpoint passed: chunk 5") | |
| #ae_tempmodelfile = tempfile() | |
| ae_history <- fit(ae_model, x = x_train_scale, | |
| y = x_train_scale, | |
| epochs = keras_epochae, batch_size = keras_batch_size, | |
| shuffle = T, verbose = 0, | |
| view_metrics = FALSE, | |
| validation_data = list(x_test_scale, x_test_scale), | |
| callbacks = list(ae_cp_callback, early_stop)) | |
| #file.copy(ae_tempmodelfile, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"), overwrite = T) | |
| #ae_history_df <- as.data.frame(ae_history) | |
| #fwrite(ae_history_df, paste0(temp_dir,"/models/keras",model_id,"/autoencoder_history_df.csv.gz")) | |
| saveRDS(ae_history, paste0(temp_dir,"/models/keras",model_id,"/ae_history.RDS")) | |
| compare_cx <- data.frame( | |
| train_loss = ae_history$metrics$loss, | |
| test_loss = ae_history$metrics$val_loss | |
| ) %>% | |
| tibble::rownames_to_column() %>% | |
| mutate(rowname = as.integer(rowname)) %>% | |
| tidyr::gather(key = "type", value = "value", -rowname) | |
| plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
| geom_line() + | |
| xlab("epoch") + | |
| ylab("loss") + | |
| theme_bw() | |
| #message("Checkpoint passed: chunk 6") | |
| ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_autoencoder.png"), grid.arrange(plot1, nrow =1, top = "Training of autoencoder")) | |
| cat("\n- Reloading autoencoder to get weights...\n") | |
| encoder_model <- keras_model(inputs = input_layer, outputs = encoder) | |
| encoder_model %>% load_model_weights_hdf5(paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"), | |
| skip_mismatch = T, | |
| by_name = T) | |
| cat("\n- Autoencoder saving...\n") | |
| save_model_hdf5(encoder_model, paste0(temp_dir,"/models/keras",model_id,"/autoencoder.hdf5")) | |
| cat("\n- Creating deep features...\n") | |
| #message("Checkpoint passed: chunk 7") | |
| ae_x_train_scale <- encoder_model %>% | |
| predict(x_train_scale) %>% | |
| as.matrix() | |
| fwrite(ae_x_train_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_train.csv")) | |
| ae_x_test_scale <- encoder_model %>% | |
| predict(x_test_scale) %>% | |
| as.matrix() | |
| fwrite(ae_x_test_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_test.csv")) | |
| ae_x_valid_scale <- encoder_model %>% | |
| predict(x_valid_scale) %>% | |
| as.matrix() | |
| fwrite(ae_x_valid_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_valid.csv")) | |
| # # podmiana eby nie edytowa kodu | |
| x_train_scale = as.matrix(ae_x_train_scale) | |
| x_test_scale = as.matrix(ae_x_test_scale) | |
| x_valid_scale = as.matrix(ae_x_valid_scale) | |
| cat("\n- Training model based on deep features...\n") | |
| dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale)) | |
| #message("Checkpoint passed: chunk 8") | |
| message("Starting training...") | |
| #tempmodelfile = tempfile() | |
| history <- fit(dnn_class_model, x = x_train_scale, | |
| y = to_categorical(y_train), | |
| epochs = keras_epoch, | |
| validation_data = list(x_test_scale, to_categorical(y_test)), | |
| callbacks = list(cp_callback, early_stop), | |
| verbose = 0, | |
| view_metrics = FALSE, | |
| batch_size = keras_batch_size, shuffle = T, class_weight = l_weights) | |
| message(history) | |
| print(history) | |
| #message("Checkpoint passed: chunk 8") | |
| #plot(history, col="black") | |
| #history_df <- as.data.frame(history) | |
| # fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz")) | |
| saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS")) | |
| #message("Checkpoint passed: chunk 9") | |
| cat("\n- Saving history and plots...\n") | |
| compare_cx <- data.frame( | |
| train_loss = history$metrics$loss, | |
| test_loss = history$metrics$val_loss | |
| ) %>% | |
| tibble::rownames_to_column() %>% | |
| mutate(rowname = as.integer(rowname)) %>% | |
| tidyr::gather(key = "type", value = "value", -rowname) | |
| plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
| geom_line() + | |
| xlab("Epoch") + | |
| ylab("Loss") + | |
| theme_bw() | |
| #message("Checkpoint passed: chunk 10") | |
| compare_cx <- data.frame( | |
| train_accuracy = history$metrics$accuracy, | |
| test_accuracy = history$metrics$val_accuracy | |
| ) %>% | |
| tibble::rownames_to_column() %>% | |
| mutate(rowname = as.integer(rowname)) %>% | |
| tidyr::gather(key = "type", value = "value", -rowname) | |
| plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
| geom_line() + | |
| xlab("Epoch") + | |
| ylab("Accuracy") + | |
| theme_bw() | |
| #message("Checkpoint passed: chunk 12") | |
| ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network")) | |
| # ewaluacja | |
| cat("\n- Saving final model...\n") | |
| dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")) | |
| y_train_pred <- predict(object = dnn_class_model, x = x_train_scale) | |
| y_test_pred <- predict(object = dnn_class_model, x = x_test_scale) | |
| y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale) | |
| #message("Checkpoint passed: chunk 13") | |
| # wybranie odciecia | |
| pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred) | |
| library(cutpointr) | |
| cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
| summary(cutoff) | |
| ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff)) | |
| wybrany_cutoff = cutoff$optimal_cutpoint | |
| #wyniki[i, "training_AUC"] = cutoff$AUC | |
| tempwyniki[1, "training_AUC"] = cutoff$AUC | |
| #wyniki[i, "cutoff"] = wybrany_cutoff | |
| tempwyniki[1, "cutoff"] = wybrany_cutoff | |
| cat(paste0("\n\n---- TRAINING AUC: ",cutoff$AUC," ----\n\n")) | |
| cat(paste0("\n\n---- OPTIMAL CUTOFF: ",wybrany_cutoff," ----\n\n")) | |
| cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_train) | |
| #message("Checkpoint passed: chunk 14") | |
| t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
| tempwyniki[1, "training_AUC2"] = t1_roc$auc | |
| tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
| tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
| saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS")) | |
| #message("Checkpoint passed: chunk 15") | |
| tempwyniki[1, "training_Accuracy"] = cm_train$overall[1] | |
| tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1] | |
| tempwyniki[1, "training_Specificity"] = cm_train$byClass[2] | |
| tempwyniki[1, "training_PPV"] = cm_train$byClass[3] | |
| tempwyniki[1, "training_NPV"] = cm_train$byClass[4] | |
| tempwyniki[1, "training_F1"] = cm_train$byClass[7] | |
| saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS")) | |
| #message("Checkpoint passed: chunk 16") | |
| cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_test) | |
| tempwyniki[1, "test_Accuracy"] = cm_test$overall[1] | |
| tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1] | |
| tempwyniki[1, "test_Specificity"] = cm_test$byClass[2] | |
| tempwyniki[1, "test_PPV"] = cm_test$byClass[3] | |
| tempwyniki[1, "test_NPV"] = cm_test$byClass[4] | |
| tempwyniki[1, "test_F1"] = cm_test$byClass[7] | |
| saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS")) | |
| #message("Checkpoint passed: chunk 17") | |
| cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_valid) | |
| tempwyniki[1, "valid_Accuracy"] = cm_test$overall[1] | |
| tempwyniki[1, "valid_Sensitivity"] = cm_test$byClass[1] | |
| tempwyniki[1, "valid_Specificity"] = cm_test$byClass[2] | |
| tempwyniki[1, "valid_PPV"] = cm_test$byClass[3] | |
| tempwyniki[1, "valid_NPV"] = cm_test$byClass[4] | |
| tempwyniki[1, "valid_F1"] = cm_test$byClass[7] | |
| saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS")) | |
| #message("Checkpoint passed: chunk 18") | |
| if(add_features_to_predictions) { | |
| mix = rbind(train,test,valid) | |
| mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
| y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
| mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))) | |
| mix$Pred = y_mixx_pred[,2] | |
| mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control") | |
| fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else { | |
| mix = rbind(train,test,valid) | |
| mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
| y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
| mix2 = data.frame( | |
| `Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))), | |
| `Pred` = y_mixx_pred[,2], | |
| `PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control") | |
| ) | |
| fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) | |
| } | |
| fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv")) | |
| #message("Checkpoint passed: chunk 19") | |
| wagi = get_weights(dnn_class_model) | |
| saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS")) | |
| save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5")) | |
| saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS")) | |
| #message("Checkpoint passed: chunk 20") | |
| # czy jest sens zapisywac? | |
| #sink() | |
| #sink(type="message") | |
| message(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
| cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
| if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) { | |
| # zapisywanie modelu do waciwego katalogu | |
| if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) } | |
| if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) } | |
| if(dir.exists("/OmicSelector")) { | |
| system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'")) | |
| } else { | |
| zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) } | |
| # file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"_",model_id,"_deeplearningresults.csv")) | |
| if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) } | |
| file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"/",model_id,"_deeplearningresults.csv")) | |
| #message("Checkpoint passed: chunk 21") | |
| #dev.off() | |
| } | |
| } else { | |
| x_train <- train %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_train <- train %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
| #message("Checkpoint passed: chunk 22") | |
| x_test <- test %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_test <- test %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
| x_valid <- valid %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_valid <- valid %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
| #message("Checkpoint passed: chunk 23") | |
| if(hyperparameters[i, 17] == T) { | |
| x_train_scale = x_train %>% scale() | |
| col_mean_train <- attr(x_train_scale, "scaled:center") | |
| col_sd_train <- attr(x_train_scale, "scaled:scale") | |
| x_test_scale <- x_test %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| x_valid_scale <- x_valid %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| } | |
| else { | |
| x_train_scale = x_train | |
| x_test_scale <- x_test | |
| x_valid_scale <- x_valid | |
| } | |
| #message("Checkpoint passed: chunk 24") | |
| dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale)) | |
| message("Starting training...") | |
| history <- fit(dnn_class_model, x = x_train_scale, | |
| y = to_categorical(y_train), | |
| epochs = keras_epoch, | |
| validation_data = list(x_test_scale, to_categorical(y_test)), | |
| callbacks = list( | |
| cp_callback, | |
| #callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1), | |
| #callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")), | |
| early_stop), | |
| verbose = 0, | |
| view_metrics = FALSE, | |
| batch_size = keras_batch_size, shuffle = T, class_weight = l_weights) | |
| print(history) | |
| #message("Checkpoint passed: chunk 25") | |
| message(history) | |
| #plot(history, col="black") | |
| saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS")) | |
| #message("Checkpoint passed: chunk 26") | |
| #fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz")) | |
| # pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf")) | |
| compare_cx <- data.frame( | |
| train_loss = history$metrics$loss, | |
| test_loss = history$metrics$val_loss | |
| ) %>% | |
| tibble::rownames_to_column() %>% | |
| mutate(rowname = as.integer(rowname)) %>% | |
| tidyr::gather(key = "type", value = "value", -rowname) | |
| plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
| geom_line() + | |
| xlab("epoch") + | |
| ylab("loss") + | |
| theme_bw() | |
| #message("Checkpoint passed: chunk 27") | |
| compare_cx <- data.frame( | |
| train_accuracy = history$metrics$accuracy, | |
| test_accuracy = history$metrics$val_accuracy | |
| ) %>% | |
| tibble::rownames_to_column() %>% | |
| mutate(rowname = as.integer(rowname)) %>% | |
| tidyr::gather(key = "type", value = "value", -rowname) | |
| plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
| geom_line() + | |
| xlab("epoch") + | |
| ylab("loss") + | |
| theme_bw() | |
| #message("Checkpoint passed: chunk 28") | |
| ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network")) | |
| # ewaluacja | |
| dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")) | |
| y_train_pred <- predict(object = dnn_class_model, x = x_train_scale) | |
| y_test_pred <- predict(object = dnn_class_model, x = x_test_scale) | |
| y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale) | |
| #message("Checkpoint passed: chunk 29") | |
| # wybranie odciecia | |
| pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred) | |
| library(cutpointr) | |
| cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
| print(summary(cutoff)) | |
| ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff)) | |
| wybrany_cutoff = cutoff$optimal_cutpoint | |
| #wyniki[i, "training_AUC"] = cutoff$AUC | |
| tempwyniki[1, "training_AUC"] = cutoff$AUC | |
| #wyniki[i, "cutoff"] = wybrany_cutoff | |
| tempwyniki[1, "cutoff"] = wybrany_cutoff | |
| #message("Checkpoint passed: chunk 30") | |
| cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_train) | |
| #message("Checkpoint passed: chunk 31") | |
| t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
| tempwyniki[1, "training_AUC2"] = t1_roc$auc | |
| tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
| tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
| saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS")) | |
| #ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
| #message("Checkpoint passed: chunk 32") | |
| tempwyniki[1, "training_Accuracy"] = cm_train$overall[1] | |
| tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1] | |
| tempwyniki[1, "training_Specificity"] = cm_train$byClass[2] | |
| tempwyniki[1, "training_PPV"] = cm_train$byClass[3] | |
| tempwyniki[1, "training_NPV"] = cm_train$byClass[4] | |
| tempwyniki[1, "training_F1"] = cm_train$byClass[7] | |
| saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS")) | |
| #message("Checkpoint passed: chunk 33") | |
| cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_test) | |
| tempwyniki[1, "test_Accuracy"] = cm_test$overall[1] | |
| tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1] | |
| tempwyniki[1, "test_Specificity"] = cm_test$byClass[2] | |
| tempwyniki[1, "test_PPV"] = cm_test$byClass[3] | |
| tempwyniki[1, "test_NPV"] = cm_test$byClass[4] | |
| tempwyniki[1, "test_F1"] = cm_test$byClass[7] | |
| saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS")) | |
| #message("Checkpoint passed: chunk 34") | |
| cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_valid) | |
| tempwyniki[1, "valid_Accuracy"] = cm_valid$overall[1] | |
| tempwyniki[1, "valid_Sensitivity"] = cm_valid$byClass[1] | |
| tempwyniki[1, "valid_Specificity"] = cm_valid$byClass[2] | |
| tempwyniki[1, "valid_PPV"] = cm_valid$byClass[3] | |
| tempwyniki[1, "valid_NPV"] = cm_valid$byClass[4] | |
| tempwyniki[1, "valid_F1"] = cm_valid$byClass[7] | |
| saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS")) | |
| #message("Checkpoint passed: chunk 35") | |
| if(add_features_to_predictions) { | |
| mix = rbind(train,test,valid) | |
| mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
| y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
| mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))) | |
| mix$Pred = y_mixx_pred[,2] | |
| mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control") | |
| fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else { | |
| mix = rbind(train,test,valid) | |
| mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
| y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
| mix2 = data.frame( | |
| `Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))), | |
| `Pred` = y_mixx_pred[,2], | |
| `PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control") | |
| ) | |
| fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) | |
| } | |
| fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv")) | |
| wagi = get_weights(dnn_class_model) | |
| saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS")) | |
| save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5")) | |
| saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS")) | |
| #message("Checkpoint passed: chunk 36") | |
| # czy jest sens zapisywac? | |
| cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
| if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) { | |
| # zapisywanie modelu do waciwego katalogu | |
| #message("Checkpoint passed: chunk 37e") | |
| if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) } | |
| #message("Checkpoint passed: chunk 37d") | |
| if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) } | |
| #message("Checkpoint passed: chunk 37c") | |
| if(dir.exists("/OmicSelector")) { | |
| system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'")) | |
| } else { | |
| zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) } | |
| #message("Checkpoint passed: chunk 37b") | |
| if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) } | |
| #message("Checkpoint passed: chunk 37a") | |
| file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("models/",codename,"/",model_id,"_deeplearningresults.csv")) | |
| #message("Checkpoint passed: chunk 37") | |
| } } | |
| sink() | |
| sink(type="message") | |
| message(paste0("OmicSelector: Finished training network id: ",model_id, " : training_acc: ", tempwyniki[1, "training_Accuracy"], ", testing_acc: ", tempwyniki[1, "test_Accuracy"], " ==> worth_saving: ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
| #dev.off() | |
| tempwyniki2 = cbind(hyperparameters[i,],tempwyniki) | |
| tempwyniki2[1,"name"] = paste0(codename,"_", model_id) | |
| tempwyniki2[1,"worth_saving"] = as.character(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) | |
| end_time <- Sys.time() | |
| tempwyniki2[1,"training_time"] = as.character(end_time - start_time) | |
| tempwyniki2 | |
| } | |
| saveRDS(final, paste0(output_file,".RDS")) | |
| cat("\nAll done!! Ending..\n") | |
| if (file.exists(output_file)) { | |
| tempfi = fread(output_file) | |
| final = rbind(tempfi, final) } | |
| fwrite(final, output_file) | |
| setwd(oldwd) | |
| #options(warn=0) | |
| # sprztanie | |
| if(clean_temp_files) { | |
| keras::k_clear_session() | |
| unlink(paste0(normalizePath(temp_dir), "/", dir(temp_dir)), recursive = TRUE) | |
| } | |
| } | |
| #' OmicSelector_transfer_learning_neural_network() | |
| #' | |
| #' This function allows to perform simple transfer learning of the network created in the process above by freezing the weights on particular layers. | |
| #' | |
| #' @param selected_miRNAs Which features should be selected? | |
| #' @param new_scaling If to generate new scaling for scaled models (it is recommended for recalibration procedures). If set to false you should set old_train_csv_to_restore_scaling - used to recreate initial scaling. | |
| #' @param model_path Point zip file to be used as ininital model for transfer learning. | |
| #' @param save_scaling Whould you like to save scaling parameters for further reference? Default: yes | |
| #' @param freeze_from Which layers to freeze? Set from in https://keras.rstudio.com/reference/freeze_weights.html. If set to 0 or below - the algorithm will omit freezing. | |
| #' @param freeze_to Which layers to freeze? Set to in https://keras.rstudio.com/reference/freeze_weights.html | |
| #' @param train Provide train set. | |
| #' @param test Provide test set. | |
| #' @param valid Provide validation set. | |
| #' | |
| #' @return Results of uncalibrated and recalibrated models. | |
| #' | |
| #' @export | |
| OmicSelector_transfer_learning_neural_network = function(selected_miRNAs = ".", new_scaling = TRUE, model_path = "tcga_models/pancreatic_tcga_165592-1601307872.zip", save_scaling = TRUE, | |
| old_train_csv_to_restore_scaling = "../tcga/mixed_train.csv" # used if save_scaling is F and no scaling properties were saved in the model; this regenerates scaling based on previous training set. | |
| , freeze_from = 1 | |
| , freeze_to = 2, | |
| train = fread("circ_data/mixed_train.csv"), | |
| test = fread("circ_data/mixed_test.csv"), | |
| valid = fread("circ_data/mixed_valid.csv")) { | |
| tempwyniki = data.frame() | |
| library(keras) | |
| # library(OmicSelector) | |
| # OmicSelector_load_extension("deeplearning") | |
| library(data.table) | |
| x_train <- train %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_train <- train %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
| x_test <- test %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_test <- test %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
| x_valid <- valid %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
| as.matrix() | |
| y_valid <- valid %>% | |
| dplyr::select("Class") %>% | |
| as.matrix() | |
| y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
| ##### | |
| # INIT MODEL | |
| model_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("finalmodel.hdf5",Name))[1,"Name"] | |
| unzip(model_path, model_path_in_zip, exdir = tempdir()) | |
| model_path_unzipped = paste0(tempdir(), "/", model_path_in_zip) | |
| init_model = keras::load_model_hdf5(model_path_unzipped) | |
| init_model | |
| wagi_wyjsciowe = get_weights(init_model) | |
| wyniki_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("wyniki.csv",Name))[1,"Name"] | |
| unzip(model_path, wyniki_path_in_zip, exdir = tempdir()) | |
| wyniki_path_unzipped = paste0(tempdir(), "/", wyniki_path_in_zip) | |
| pre_conf = data.table::fread(wyniki_path_unzipped) | |
| # | |
| # If scaled | |
| if(as.logical(pre_conf[1,"scaled"])) { | |
| if(new_scaling) { | |
| x_train_scale = x_train %>% scale() | |
| col_mean_train <- attr(x_train_scale, "scaled:center") | |
| col_sd_train <- attr(x_train_scale, "scaled:scale") | |
| saveRDS(col_mean_train, "transfer_col_mean_train.RDS") | |
| saveRDS(col_sd_train, "transfer_col_sd_train.RDS") | |
| x_test_scale <- x_test %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| x_valid_scale <- x_valid %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| } else { | |
| tcga_train = fread(old_train_csv_to_restore_scaling) | |
| tcga_train <- tcga_train %>% | |
| { if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
| as.matrix() %>% scale() | |
| col_mean_train <- attr(tcga_train, "scaled:center") | |
| col_sd_train <- attr(tcga_train, "scaled:scale") | |
| saveRDS(col_mean_train, "transfer_col_mean_train.RDS") | |
| saveRDS(col_sd_train, "transfer_col_sd_train.RDS") | |
| x_train_scale <- x_train %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| x_test_scale <- x_test %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| x_valid_scale <- x_valid %>% | |
| scale(center = col_mean_train, | |
| scale = col_sd_train) | |
| } | |
| } else { | |
| x_train_scale <- x_train | |
| x_test_scale <- x_test | |
| x_valid_scale <- x_valid | |
| } | |
| preds = predict(init_model, x_train_scale)[,2] | |
| library(pROC) | |
| # wybranie odciecia | |
| pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(init_model, x_train_scale)) | |
| library(cutpointr) | |
| cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
| print(summary(cutoff)) | |
| ggplot2::ggsave(file = paste0("cutoff.png"), plot(cutoff)) | |
| wybrany_cutoff = cutoff$optimal_cutpoint | |
| y_train_pred <- predict(object = init_model, x = x_train_scale) | |
| y_test_pred <- predict(object = init_model, x = x_test_scale) | |
| y_valid_pred <- predict(object = init_model, x = x_valid_scale) | |
| #message("Checkpoint passed: chunk 29") | |
| #wyniki[i, "training_AUC"] = cutoff$AUC | |
| tempwyniki[1, "new_training_AUC"] = cutoff$AUC | |
| #wyniki[i, "cutoff"] = wybrany_cutoff | |
| tempwyniki[1, "new_cutoff"] = wybrany_cutoff | |
| #message("Checkpoint passed: chunk 30") | |
| cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_train) | |
| #message("Checkpoint passed: chunk 31") | |
| t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
| tempwyniki[1, "new_training_AUC2"] = t1_roc$auc | |
| tempwyniki[1, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
| tempwyniki[1, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
| saveRDS(t1_roc, paste0("init_training_ROC.RDS")) | |
| #ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
| #message("Checkpoint passed: chunk 32") | |
| tempwyniki[1, "new_training_Accuracy"] = cm_train$overall[1] | |
| tempwyniki[1, "new_training_Sensitivity"] = cm_train$byClass[1] | |
| tempwyniki[1, "new_training_Specificity"] = cm_train$byClass[2] | |
| tempwyniki[1, "new_training_PPV"] = cm_train$byClass[3] | |
| tempwyniki[1, "new_training_NPV"] = cm_train$byClass[4] | |
| tempwyniki[1, "new_training_F1"] = cm_train$byClass[7] | |
| saveRDS(cm_train, paste0("init_cm_train.RDS")) | |
| #message("Checkpoint passed: chunk 33") | |
| cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_test) | |
| tempwyniki[1, "new_test_Accuracy"] = cm_test$overall[1] | |
| tempwyniki[1, "new_test_Sensitivity"] = cm_test$byClass[1] | |
| tempwyniki[1, "new_test_Specificity"] = cm_test$byClass[2] | |
| tempwyniki[1, "new_test_PPV"] = cm_test$byClass[3] | |
| tempwyniki[1, "new_test_NPV"] = cm_test$byClass[4] | |
| tempwyniki[1, "new_test_F1"] = cm_test$byClass[7] | |
| saveRDS(cm_test, paste0("init_cm_test.RDS")) | |
| #message("Checkpoint passed: chunk 34") | |
| cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_valid) | |
| tempwyniki[1, "new_valid_Accuracy"] = cm_valid$overall[1] | |
| tempwyniki[1, "new_valid_Sensitivity"] = cm_valid$byClass[1] | |
| tempwyniki[1, "new_valid_Specificity"] = cm_valid$byClass[2] | |
| tempwyniki[1, "new_valid_PPV"] = cm_valid$byClass[3] | |
| tempwyniki[1, "new_valid_NPV"] = cm_valid$byClass[4] | |
| tempwyniki[1, "new_valid_F1"] = cm_valid$byClass[7] | |
| saveRDS(cm_valid, paste0("init_cm_valid.RDS")) | |
| #message("Checkpoint passed: chunk 35") | |
| ##### | |
| # RECALIBRATION | |
| scenario_i = 2 | |
| scenario = "trans1" | |
| trans_model = clone_model(init_model) | |
| early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = 200) | |
| cp_callback <- callback_model_checkpoint( | |
| filepath = paste0(scenario,"_model.hdf5"), | |
| save_best_only = TRUE, period = 10, monitor = "val_loss", | |
| verbose = 0 | |
| ) | |
| keras_batch_size = 64 | |
| # Opcje transferu: | |
| compile(trans_model, loss = 'binary_crossentropy', | |
| metrics = 'accuracy', optimizer = pre_conf$optimizer) | |
| set_weights(trans_model, get_weights(init_model)) | |
| if(freeze_from>0) { | |
| freeze_weights(trans_model, from = freeze_from, to = freeze_to) | |
| compile(trans_model, loss = 'binary_crossentropy', | |
| metrics = 'accuracy', optimizer = pre_conf$optimizer) } | |
| # Rekalibracja: | |
| history <- fit(trans_model, x = x_train_scale, | |
| y = to_categorical(y_train), | |
| epochs = 5000, | |
| validation_data = list(x_test_scale, to_categorical(y_test)), | |
| callbacks = list( | |
| cp_callback, | |
| #callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1), | |
| #callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")), | |
| early_stop), | |
| verbose = 0, | |
| view_metrics = FALSE, | |
| batch_size = keras_batch_size, shuffle = T) | |
| history | |
| get_weights(init_model) | |
| get_weights(trans_model) | |
| # Ocena po rekalibracji: | |
| preds = predict(trans_model, x_train_scale)[,2] | |
| library(pROC) | |
| # wybranie odciecia | |
| pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(trans_model, x_train_scale)) | |
| library(cutpointr) | |
| cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
| print(summary(cutoff)) | |
| ggplot2::ggsave(file = paste0("trans_cutoff.png"), plot(cutoff)) | |
| wybrany_cutoff = cutoff$optimal_cutpoint | |
| y_train_pred <- predict(object = trans_model, x = x_train_scale) | |
| y_test_pred <- predict(object = trans_model, x = x_test_scale) | |
| y_valid_pred <- predict(object = trans_model, x = x_valid_scale) | |
| #message("Checkpoint passed: chunk 29") | |
| #wyniki[i, "training_AUC"] = cutoff$AUC | |
| tempwyniki[scenario_i, "new_training_AUC"] = cutoff$AUC | |
| #wyniki[i, "cutoff"] = wybrany_cutoff | |
| tempwyniki[scenario_i, "new_cutoff"] = wybrany_cutoff | |
| #message("Checkpoint passed: chunk 30") | |
| cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_train) | |
| #message("Checkpoint passed: chunk 31") | |
| t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
| tempwyniki[scenario_i, "new_training_AUC2"] = t1_roc$auc | |
| tempwyniki[scenario_i, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
| tempwyniki[scenario_i, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
| saveRDS(t1_roc, paste0("new_trans_training_ROC.RDS")) | |
| #ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
| #message("Checkpoint passed: chunk 32") | |
| tempwyniki[scenario_i, "new_training_Accuracy"] = cm_train$overall[1] | |
| tempwyniki[scenario_i, "new_training_Sensitivity"] = cm_train$byClass[1] | |
| tempwyniki[scenario_i, "new_training_Specificity"] = cm_train$byClass[2] | |
| tempwyniki[scenario_i, "new_training_PPV"] = cm_train$byClass[3] | |
| tempwyniki[scenario_i, "new_training_NPV"] = cm_train$byClass[4] | |
| tempwyniki[scenario_i, "new_training_F1"] = cm_train$byClass[7] | |
| saveRDS(cm_train, paste0("trans_cm_train.RDS")) | |
| #message("Checkpoint passed: chunk 33") | |
| cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_test) | |
| tempwyniki[scenario_i, "new_test_Accuracy"] = cm_test$overall[1] | |
| tempwyniki[scenario_i, "new_test_Sensitivity"] = cm_test$byClass[1] | |
| tempwyniki[scenario_i, "new_test_Specificity"] = cm_test$byClass[2] | |
| tempwyniki[scenario_i, "new_test_PPV"] = cm_test$byClass[3] | |
| tempwyniki[scenario_i, "new_test_NPV"] = cm_test$byClass[4] | |
| tempwyniki[scenario_i, "new_test_F1"] = cm_test$byClass[7] | |
| saveRDS(cm_test, paste0("trans_cm_test.RDS")) | |
| #message("Checkpoint passed: chunk 34") | |
| cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
| pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred) | |
| pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
| pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
| cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
| print(cm_valid) | |
| tempwyniki[scenario_i, "new_valid_Accuracy"] = cm_valid$overall[1] | |
| tempwyniki[scenario_i, "new_valid_Sensitivity"] = cm_valid$byClass[1] | |
| tempwyniki[scenario_i, "new_valid_Specificity"] = cm_valid$byClass[2] | |
| tempwyniki[scenario_i, "new_valid_PPV"] = cm_valid$byClass[3] | |
| tempwyniki[scenario_i, "new_valid_NPV"] = cm_valid$byClass[4] | |
| tempwyniki[scenario_i, "new_valid_F1"] = cm_valid$byClass[7] | |
| saveRDS(cm_valid, paste0("trans_cm_valid.RDS")) | |
| #message("Checkpoint passed: chunk 35") | |
| resu = cbind(pre_conf, tempwyniki) | |
| resu$type = c("crude","recalibrated") | |
| resu$based_on = basename(model_path) | |
| resu$based_on_path = model_path | |
| new_name = make.names(paste0("transfer-",as.numeric(Sys.time()))) | |
| resu$new_model_name = c("",new_name) | |
| fwrite(resu, "resu.csv") | |
| zip(zipfile = paste0(new_name,".zip"), files = c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png"))) | |
| unlink(c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png"))) | |
| resu | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| until Rscript 02_deeplearning.R; do | |
| echo "Wywalil sie... powtarzam.." | |
| sleep 10 | |
| done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment