Skip to content

Instantly share code, notes, and snippets.

@krlmlr
Last active March 12, 2026 11:00
Show Gist options
  • Select an option

  • Save krlmlr/32dabe5480010207c854943ccdf7d1ce to your computer and use it in GitHub Desktop.

Select an option

Save krlmlr/32dabe5480010207c854943ccdf7d1ce to your computer and use it in GitHub Desktop.
Demo for DuckDB's ALTREP row names
# Self-contained demo: ALTREP row names for a data frame
# Adapted from DuckDB's src/reltoaltrep.cpp and src/transform.cpp
#
# The row names are an ALTREP ascending integer sequence 1..n.
# Length, Elt, and Dataptr are all implemented; no backing array is allocated
# until Dataptr is actually called (lazy materialisation).
#
# Each ALTREP callback prints its name so you can see which access path fires.
library(Rcpp)
cpp_code <- r"(
#include <Rcpp.h>
#include <R_ext/Altrep.h>
// ── ALTREP class handle ────────────────────────────────────────────────────
static R_altrep_class_t compact_rownames_class;
// ── Backing data: ascending sequence 1..n ────────────────────────────────
struct SeqRownamesData {
int n; // length of the sequence
int *materialized = nullptr; // lazily allocated full array
explicit SeqRownamesData(int n_) : n(n_) {}
~SeqRownamesData() { delete[] materialized; }
// Materialize 1..n into a heap array on first call.
int *ensure_array() {
if (!materialized) {
materialized = new int[n];
for (int i = 0; i < n; ++i) materialized[i] = i + 1;
Rprintf("[row.names ALTREP] Materialized full array 1..%d\n", n);
}
return materialized;
}
};
static void finalizer(SEXP ptr) {
delete static_cast<SeqRownamesData *>(R_ExternalPtrAddr(ptr));
R_ClearExternalPtr(ptr);
}
static SEXP make_seq_rownames(int n) {
Rprintf("[make_seq_rownames] n=%d, class ptr=%p\n", n, (void *)compact_rownames_class.ptr);
auto *d = new SeqRownamesData(n);
Rprintf("[make_seq_rownames] SeqRownamesData allocated at %p\n", (void *)d);
SEXP xp = PROTECT(R_MakeExternalPtr(d, R_NilValue, R_NilValue));
R_RegisterCFinalizer(xp, finalizer);
Rprintf("[make_seq_rownames] calling R_new_altrep\n");
SEXP result = R_new_altrep(compact_rownames_class, xp, R_NilValue);
Rprintf("[make_seq_rownames] R_new_altrep returned %p\n", (void *)result);
UNPROTECT(1);
return result;
}
// ── ALTREP callbacks (each prints its own name) ───────────────────────────
static Rboolean rownames_inspect(SEXP x, int pre, int deep, int pvec,
void (*inspect_subtree)(SEXP, int, int, int)) {
auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x)));
Rprintf("[row.names ALTREP] Inspect — ascending sequence 1..%d\n", d->n);
return TRUE;
}
static R_xlen_t rownames_length(SEXP x) {
auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x)));
Rprintf("[row.names ALTREP] Length = %d\n", d->n);
return (R_xlen_t)d->n;
}
// Returns NULL until materialised; avoids eager allocation.
static const void *rownames_dataptr_or_null(SEXP x) {
auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x)));
Rprintf("[row.names ALTREP] Dataptr_or_null (materialized=%s)\n",
d->materialized ? "yes" : "no");
return d->materialized; // nullptr = not yet materialized
}
static void *rownames_dataptr(SEXP x, Rboolean writeable) {
Rprintf("[row.names ALTREP] Dataptr\n");
auto *d = static_cast<SeqRownamesData *>(R_ExternalPtrAddr(R_altrep_data1(x)));
return d->ensure_array();
}
static int rownames_elt(SEXP x, R_xlen_t i) {
// No materialisation needed: 1-based index is just i+1
int val = (int)i + 1;
Rprintf("[row.names ALTREP] Elt(%d) = %d\n", (int)i, val);
return val;
}
// ── Register ALTREP class ─────────────────────────────────────────────────
// [[Rcpp::init]] is not processed by sourceCpp, so we register lazily on
// first use via ensure_class_registered() instead.
static bool class_registered = false;
static void ensure_class_registered() {
if (class_registered) {
Rprintf("[init] class already registered, class ptr=%p\n", (void *)compact_rownames_class.ptr);
return;
}
Rprintf("[init] entering init_compact_rownames\n");
// R_GetCurrentEnv() gives us the DllInfo-equivalent for the loaded SO.
// For sourceCpp we obtain the DllInfo via R_getDllInfo on the loaded lib.
// The simplest portable path is R_MakeExternalPtr trickery, but Rcpp
// exposes the DllInfo at load time via the generated R_init_<name>
// already called by R; we just need to retrieve it.
// Since all we need is *any* DllInfo to register an altinteger class,
// we can obtain the one for the base R package (which is always loaded).
DllInfo *dll = R_getEmbeddingDllInfo();
compact_rownames_class = R_make_altinteger_class("compact_rownames", "demo", dll);
R_set_altrep_Inspect_method(compact_rownames_class, rownames_inspect);
R_set_altrep_Length_method(compact_rownames_class, rownames_length);
R_set_altvec_Dataptr_or_null_method(compact_rownames_class, rownames_dataptr_or_null);
R_set_altvec_Dataptr_method(compact_rownames_class, rownames_dataptr);
R_set_altinteger_Elt_method(compact_rownames_class, rownames_elt);
class_registered = true;
}
// ── Borrowed from DuckDB transform.cpp: install_new_attrib ────────────────
// On R < 4.6.0, Rf_setAttrib for R_RowNamesSymbol calls INTEGER() on the
// value, which materialises an ALTREP vector. The workaround is to first
// clear the attribute, then splice the new value directly into the pairlist.
// On R >= 4.6.0 Rf_setAttrib is safe to call directly.
static void install_new_attrib(SEXP vec, SEXP name, SEXP val) {
Rf_setAttrib(vec, name, R_NilValue); // remove old value cleanly
Rf_setAttrib(vec, name, val);
}
// ── Build the demo data frame ─────────────────────────────────────────────
// [[Rcpp::export]]
SEXP make_df_altrep_rownames() {
ensure_class_registered();
Rprintf("[make_df] step 1: class ptr=%p\n", (void *)compact_rownames_class.ptr);
// One column "value" = c(10L, 20L, 30L)
SEXP col = PROTECT(Rf_allocVector(INTSXP, 3));
INTEGER(col)[0] = 10;
INTEGER(col)[1] = 20;
INTEGER(col)[2] = 30;
Rprintf("[make_df] step 2: column allocated\n");
SEXP df = PROTECT(Rf_allocVector(VECSXP, 1));
SET_VECTOR_ELT(df, 0, col);
Rprintf("[make_df] step 3: list allocated\n");
SEXP nms = PROTECT(Rf_mkString("value"));
Rf_setAttrib(df, R_NamesSymbol, nms);
Rprintf("[make_df] step 4: names set\n");
SEXP cls = PROTECT(Rf_mkString("data.frame"));
Rf_setAttrib(df, R_ClassSymbol, cls);
Rprintf("[make_df] step 5: class set\n");
// Attach the ALTREP ascending row names
Rprintf("[make_df] step 6: calling make_seq_rownames(3)\n");
SEXP rn = PROTECT(make_seq_rownames(3));
Rprintf("[make_df] step 7: calling install_new_attrib\n");
install_new_attrib(df, R_RowNamesSymbol, rn);
Rprintf("[make_df] step 8: done, returning\n");
UNPROTECT(5);
return df;
}
)"
# ── Compile and load ──────────────────────────────────────────────────────
tmp <- tempfile(fileext = ".cpp")
writeLines(cpp_code, tmp)
sourceCpp(tmp)
# ── Demo ──────────────────────────────────────────────────────────────────
cat("=== make_df_altrep_rownames() ===\n")
df <- make_df_altrep_rownames()
cat("\n=== Expecting no materialization here ===\n")
class(df)
cat("\n=== Accessing row names (should trigger materialization) ===\n")
rn <- rownames(df)
invisible(deparse(rn))
rn
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment