Skip to content

Instantly share code, notes, and snippets.

@asalt
Created March 12, 2025 04:40
Show Gist options
  • Select an option

  • Save asalt/9e5518f9e98bda5e3c9fad675f10b1eb to your computer and use it in GitHub Desktop.

Select an option

Save asalt/9e5518f9e98bda5e3c9fad675f10b1eb to your computer and use it in GitHub Desktop.
# Load required libraries
library(cmapR) # For handling GCT files
library(ComplexHeatmap) # For visualization, see https://jokergoo.github.io/ComplexHeatmap-reference/book/ for a complete reference
library(RColorBrewer) # For color palettes
library(dplyr)
make_random_gct <- function(nrow = 100, ncol = 10) {
set.seed(369)
nrow <- max(nrow, 1)
ncol <- max(ncol, 1)
# Define Sample Groups and Batches
sample_groups <- sample(c("A", "B", "C"), ncol, replace = TRUE)
batch_numbers <- sample(1:3, ncol, replace = TRUE)
# Generate base pattern index (same for all columns)
base_index <- seq(1, nrow) / max(nrow) * 2 * pi
# Create an empty matrix and fill it based on group assignment
expr_matrix <- matrix(0, nrow = nrow, ncol = ncol)
# Vectorized pattern assignment
expr_matrix[, sample_groups == "A"] <- sin(base_index)
expr_matrix[, sample_groups == "B"] <- -cos(base_index)
expr_matrix[, sample_groups == "C"] <- sin(base_index) - cos(base_index)
# Add random noise
expr_matrix <- expr_matrix + matrix(rnorm(nrow * ncol, mean = 0, sd = 0.5), nrow = nrow, ncol = ncol)
# Apply batch effect (small shift per batch)
expr_matrix <- expr_matrix + (matrix(batch_numbers - 2, nrow = nrow, ncol = ncol, byrow = TRUE) * 0.5)
# Assign sample & gene identifiers
.rids <- paste0("Gene", seq_len(nrow))
.cids <- paste0("Sample", seq_len(ncol))
# Metadata (samples)
.cdesc <- data.frame(
Group = sample_groups,
Batch = as.factor(batch_numbers)
)
rownames(.cdesc) <- .cids # Required for cmapR
# Metadata (genes)
.rdesc <- data.frame(
Gene_Type = sample(c("Protein Coding", "Pseudogene", "lncRNA"), nrow, replace = TRUE)
)
rownames(.rdesc) <- .rids # Required for cmapR
# Create GCT object
gct <- cmapR::GCT(mat = expr_matrix, rid = .rids, cid = .cids, cdesc = .cdesc, rdesc = .rdesc)
return(gct)
}
## ==
# πŸ“Œ Generate a Random GCT Object
gct <- make_random_gct(nrow = 3400, ncol = 22)
# πŸ” Extract Expression Matrix & Metadata
expr_matrix <- gct@mat # The gene expression values
sample_annotations <- gct@cdesc # Column (sample) metadata
# 🏷️ Define Heatmap Annotations (Sample Groups)
annotation_colors <- list(
Group = c("A" = "blue", "B" = "red", "C" = "green"),
Batch = c("1" = "purple", "2" = "orange", "3" = "yellow")
)
ha <- HeatmapAnnotation(df = sample_annotations, col = annotation_colors)
# 🎨 Define a Heatmap Color Palette
heatmap_palette <- colorRampPalette(c("blue", "grey80", "red"))(100) #
# πŸ”₯ Create the Heatmap
Heatmap(
expr_matrix,
name = "Expression",
top_annotation = ha,
col = heatmap_palette,
cluster_rows = TRUE, # Cluster genes
cluster_columns = TRUE, # Cluster samples
show_row_names = FALSE, # Hide gene names for clarity
show_column_names = TRUE, # Show sample names
column_names_side = "top",
# column_split = gct@cdesc$Group
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment