Last active
March 12, 2026 11:00
-
-
Save krlmlr/32dabe5480010207c854943ccdf7d1ce to your computer and use it in GitHub Desktop.
Demo for DuckDB's ALTREP row names
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Self-contained demo: ALTREP row names for a data frame | |
| # Adapted from DuckDB's src/reltoaltrep.cpp and src/transform.cpp | |
| # | |
| # The row names are an ALTREP ascending integer sequence 1..n. | |
| # Length, Elt, and Dataptr are all implemented; no backing array is allocated | |
| # until Dataptr is actually called (lazy materialisation). | |
| # | |
| # Each ALTREP callback prints its name so you can see which access path fires. | |
| library(Rcpp) | |
| cpp_code <- r"( | |
| #include <Rcpp.h> | |
| #include <R_ext/Altrep.h> | |
| // ── ALTREP class handle ──────────────────────────────────────────────────── | |
| static R_altrep_class_t compact_rownames_class; | |
| // ── Backing data: ascending sequence 1..n ──────────────────────────────── | |
| struct SeqRownamesData { | |
| int n; // length of the sequence | |
| int *materialized = nullptr; // lazily allocated full array | |
| explicit SeqRownamesData(int n_) : n(n_) {} | |
| ~SeqRownamesData() { delete[] materialized; } | |
| // Materialize 1..n into a heap array on first call. | |
| int *ensure_array() { | |
| if (!materialized) { | |
| materialized = new int[n]; | |
| for (int i = 0; i < n; ++i) materialized[i] = i + 1; | |
| Rprintf("[row.names ALTREP] Materialized full array 1..%d\n", n); | |
| } | |
| return materialized; | |
| } | |
| }; | |
| static void finalizer(SEXP ptr) { | |
| delete static_cast<SeqRownamesData *>(R_ExternalPtrAddr(ptr)); | |
| R_ClearExternalPtr(ptr); | |
| } | |
| static SEXP make_seq_rownames(int n) { | |
| Rprintf("[make_seq_rownames] n=%d, class ptr=%p\n", n, (void *)compact_rownames_class.ptr); | |
| auto *d = new SeqRownamesData(n); | |
| Rprintf("[make_seq_rownames] SeqRownamesData allocated at %p\n", (void *)d); | |
| SEXP xp = PROTECT(R_MakeExternalPtr(d, R_NilValue, R_NilValue)); | |
| R_RegisterCFinalizer(xp, finalizer); | |
| Rprintf("[make_seq_rownames] calling R_new_altrep\n"); | |
| SEXP result = R_new_altrep(compact_rownames_class, xp, R_NilValue); | |
| Rprintf("[make_seq_rownames] R_new_altrep returned %p\n", (void *)result); | |
| UNPROTECT(1); | |
| return result; | |
| } | |
| // ── ALTREP callbacks (each prints its own name) ─────────────────────────── | |
| static Rboolean rownames_inspect(SEXP x, int pre, int deep, int pvec, | |
| void (*inspect_subtree)(SEXP, int, int, int)) { | |
| auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x))); | |
| Rprintf("[row.names ALTREP] Inspect — ascending sequence 1..%d\n", d->n); | |
| return TRUE; | |
| } | |
| static R_xlen_t rownames_length(SEXP x) { | |
| auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x))); | |
| Rprintf("[row.names ALTREP] Length = %d\n", d->n); | |
| return (R_xlen_t)d->n; | |
| } | |
| // Returns NULL until materialised; avoids eager allocation. | |
| static const void *rownames_dataptr_or_null(SEXP x) { | |
| auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x))); | |
| Rprintf("[row.names ALTREP] Dataptr_or_null (materialized=%s)\n", | |
| d->materialized ? "yes" : "no"); | |
| return d->materialized; // nullptr = not yet materialized | |
| } | |
| static void *rownames_dataptr(SEXP x, Rboolean writeable) { | |
| Rprintf("[row.names ALTREP] Dataptr\n"); | |
| auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x))); | |
| return d->ensure_array(); | |
| } | |
| static int rownames_elt(SEXP x, R_xlen_t i) { | |
| // No materialisation needed: 1-based index is just i+1 | |
| int val = (int)i + 1; | |
| Rprintf("[row.names ALTREP] Elt(%d) = %d\n", (int)i, val); | |
| return val; | |
| } | |
| // ── Register ALTREP class ───────────────────────────────────────────────── | |
| // [[Rcpp::init]] is not processed by sourceCpp, so we register lazily on | |
| // first use via ensure_class_registered() instead. | |
| static bool class_registered = false; | |
| static void ensure_class_registered() { | |
| if (class_registered) { | |
| Rprintf("[init] class already registered, class ptr=%p\n", (void *)compact_rownames_class.ptr); | |
| return; | |
| } | |
| Rprintf("[init] entering init_compact_rownames\n"); | |
| // R_GetCurrentEnv() gives us the DllInfo-equivalent for the loaded SO. | |
| // For sourceCpp we obtain the DllInfo via R_getDllInfo on the loaded lib. | |
| // The simplest portable path is R_MakeExternalPtr trickery, but Rcpp | |
| // exposes the DllInfo at load time via the generated R_init_<name> | |
| // already called by R; we just need to retrieve it. | |
| // Since all we need is *any* DllInfo to register an altinteger class, | |
| // we can obtain the one for the base R package (which is always loaded). | |
| DllInfo *dll = R_getEmbeddingDllInfo(); | |
| compact_rownames_class = R_make_altinteger_class("compact_rownames", "demo", dll); | |
| R_set_altrep_Inspect_method(compact_rownames_class, rownames_inspect); | |
| R_set_altrep_Length_method(compact_rownames_class, rownames_length); | |
| R_set_altvec_Dataptr_or_null_method(compact_rownames_class, rownames_dataptr_or_null); | |
| R_set_altvec_Dataptr_method(compact_rownames_class, rownames_dataptr); | |
| R_set_altinteger_Elt_method(compact_rownames_class, rownames_elt); | |
| class_registered = true; | |
| } | |
| // ── Borrowed from DuckDB transform.cpp: install_new_attrib ──────────────── | |
| // On R < 4.6.0, Rf_setAttrib for R_RowNamesSymbol calls INTEGER() on the | |
| // value, which materialises an ALTREP vector. The workaround is to first | |
| // clear the attribute, then splice the new value directly into the pairlist. | |
| // On R >= 4.6.0 Rf_setAttrib is safe to call directly. | |
| static void install_new_attrib(SEXP vec, SEXP name, SEXP val) { | |
| Rf_setAttrib(vec, name, R_NilValue); // remove old value cleanly | |
| Rf_setAttrib(vec, name, val); | |
| } | |
| // ── Build the demo data frame ───────────────────────────────────────────── | |
| // [[Rcpp::export]] | |
| SEXP make_df_altrep_rownames() { | |
| ensure_class_registered(); | |
| Rprintf("[make_df] step 1: class ptr=%p\n", (void *)compact_rownames_class.ptr); | |
| // One column "value" = c(10L, 20L, 30L) | |
| SEXP col = PROTECT(Rf_allocVector(INTSXP, 3)); | |
| INTEGER(col)[0] = 10; | |
| INTEGER(col)[1] = 20; | |
| INTEGER(col)[2] = 30; | |
| Rprintf("[make_df] step 2: column allocated\n"); | |
| SEXP df = PROTECT(Rf_allocVector(VECSXP, 1)); | |
| SET_VECTOR_ELT(df, 0, col); | |
| Rprintf("[make_df] step 3: list allocated\n"); | |
| SEXP nms = PROTECT(Rf_mkString("value")); | |
| Rf_setAttrib(df, R_NamesSymbol, nms); | |
| Rprintf("[make_df] step 4: names set\n"); | |
| SEXP cls = PROTECT(Rf_mkString("data.frame")); | |
| Rf_setAttrib(df, R_ClassSymbol, cls); | |
| Rprintf("[make_df] step 5: class set\n"); | |
| // Attach the ALTREP ascending row names | |
| Rprintf("[make_df] step 6: calling make_seq_rownames(3)\n"); | |
| SEXP rn = PROTECT(make_seq_rownames(3)); | |
| Rprintf("[make_df] step 7: calling install_new_attrib\n"); | |
| install_new_attrib(df, R_RowNamesSymbol, rn); | |
| Rprintf("[make_df] step 8: done, returning\n"); | |
| UNPROTECT(5); | |
| return df; | |
| } | |
| )" | |
| # ── Compile and load ────────────────────────────────────────────────────── | |
| tmp <- tempfile(fileext = ".cpp") | |
| writeLines(cpp_code, tmp) | |
| sourceCpp(tmp) | |
| # ── Demo ────────────────────────────────────────────────────────────────── | |
| cat("=== make_df_altrep_rownames() ===\n") | |
| df <- make_df_altrep_rownames() | |
| cat("\n=== Expecting no materialization here ===\n") | |
| class(df) | |
| cat("\n=== Accessing row names (should trigger materialization) ===\n") | |
| rn <- rownames(df) | |
| invisible(deparse(rn)) | |
| rn |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment