|
#!/usr/bin/env lua |
|
local the, help = {},[[ |
|
|
|
seven : stochastic incremental XAI (v2) |
|
(c)2025, Tim Menzies. MIT License. opensource.org/licenses/MIT |
|
|
|
Options |
|
-b bins=7 number of bins |
|
-f file=auto93.csv csv data file |
|
-s seed=1234567891 random number seeds |
|
-h show help ]] |
|
|
|
--## Batteries |
|
local new,push,map,sort,lt,coerce,cells,csv,fmt,cat,_a2as,_d2as |
|
|
|
--### Lists |
|
|
|
-- push(a,v) -> v;; Appends value to an array. |
|
function push(a,v) |
|
a[#a+1] = v; return v end |
|
|
|
-- map(d,f) --> a;; Map a function over all items in `d`. |
|
function map(d,f, u) |
|
u={}; for k,v in pairs(d) do push(u,f(k,v)) end; return u end |
|
|
|
-- sort(a, f=nil) -> a;; Sorts array in-place. |
|
function sort(a, f) |
|
table.sort(a, f); return a end |
|
|
|
-- lt(n) --> f;; Return a function that sorts by index `n`. |
|
function lt(n) |
|
return function(a1,a2) return a1[n] < a2[n] end end |
|
|
|
-- ### Thing to string |
|
|
|
-- fmt(s, ...) -> s;; Alias for string.format. |
|
fmt=string.format |
|
|
|
-- cat(v) -> s;; Return a non-ugly string for a value or nested table. |
|
function cat(v) |
|
return type(v)=="number" and fmt(math.floor(v)==v and "%.0f" or "%.3g", v) |
|
or type(v)==type(cat) and "()" |
|
or type(v)~="table" and tostring(v) |
|
or "{".. table.concat(#v==0 and sort(_d2as(v)) or _a2as(v), " ") .."}" end |
|
|
|
-- _a2as(a) -> as;; Internal, makes array of strings for `cat`. |
|
function _a2as(a) |
|
local as={}; for i,v in ipairs(a) do as[i]=cat(v) end; return as end |
|
|
|
-- _d2as(d) -> as;; Internal, makes dict of strings for `cat`. |
|
function _d2as(d) |
|
local as={}; for k,v in pairs(d) do |
|
as[1+#as] = fmt(":%s %s",k,cat(v)) end; return as end |
|
|
|
--### String to thing |
|
|
|
-- coerce(s) -> n|s;; Converts a string to a number if possible. |
|
function coerce(s) |
|
return s==nil and "" or tonumber(s) or s:match'^%s*(.-)%s*$' end |
|
|
|
-- cells(s) -> row;; Splits CSV string and coerces values. |
|
function cells(s) |
|
local a={}; for s1 in s:gmatch"([^,]+)" do a[1+#a]=coerce(s1) end; return a end |
|
|
|
-- csv(file: s) -> iterator<i,row>;; Convert `file` to iterator for index,row |
|
function csv(file, i,tream) |
|
i,stream = 0,assert(io.open(file)) |
|
return function( s) |
|
s= stream:read() |
|
if s then i=i+1; return i,cells(s) else stream:close() end end end |
|
|
|
--### Meta |
|
|
|
-- iter(v: nil|t|f) -->iterator<i,Row>;; Convert `v` to iterator for index,row |
|
function iter(v) |
|
if type(v)=="string" then return csv(v) end |
|
if type(v)=="table" then |
|
if #v > 0 then return ipairs(v) else return pairs(v) end end |
|
return pairs({}) end |
|
|
|
-- new(meta:o, d) -> d;; Standard metatable constructor. |
|
function new(meta,d) |
|
meta.__index = meta; return setmetatable(d, meta) end |
|
|
|
--## Constructors |
|
local SYM,NUM,DATA,COLS = {},{},{},{} -- types |
|
local Sym,Num,Data,Cols -- constructors for types |
|
local adds |
|
|
|
-- Sym(pos=0:n, txt="":s) -> SYM;; Symbolic column constructor. |
|
function Sym( pos,txt) |
|
return new(SYM,{pos=pos or 0, is=txt or"", n=0, has={}, mode=nil, most=0}) end |
|
|
|
-- Num(pos=0:n, txt="":s) -> NUM;; Numeric column constructor. |
|
function Num( pos,txt) |
|
return new(NUM, {pos=pos or 0, is=txt or"", n=0, mu=0, sd=0, m2=0, has=Sym(), |
|
best=(txt or""):find"-$" and 0 or 1}) end |
|
|
|
-- Data(v:t|f|s) -> DATA;; Data table constructor. |
|
function Data(v) |
|
return adds(v, new(DATA, {rows={}, cols=nil, ys=Num()})) end |
|
|
|
-- Cols(names:as) -> COLS;; Column metadata constructor. |
|
function Cols(names, col) |
|
local x, y, all = {},{},{} |
|
for i,s in ipairs(names) do |
|
col = (s:match"^[A-Z]" and Num or Sym)(i,s) |
|
push(all, col) |
|
if not s:match"X[+-]?$" then |
|
push(s:find"[+-]$" and y or x, col) end end |
|
return new(COLS,{x=x, y=y, seen=Num(), all=all, names=names}) end |
|
|
|
--## Bins |
|
function Bins(names) |
|
return new(Guess,{all = Data({names}), best = Data({names}), |
|
rest = Data({names})}) end |
|
|
|
-- function BINS:add(row) |
|
-- x = function(r1,r2) return all:distx(r1,r2) end |
|
-- y = function(r) return all:disty(r) end |
|
-- now = #self.all.rows |
|
-- if now <= the.any then all:add(row) end |
|
-- if now == the.any then |
|
-- for i,row in ipairs(sort(self.all.rows, y)) do |
|
-- (i <= the.any/2 and self.best or self.rest):add(row) end |
|
-- if now > the.any then |
|
-- if x(row, best:mid()) < x(row, rest:mid()) then |
|
-- self.best:add(row) |
|
|
|
-- adds(v:t|f|s, it=Num():d) -> d;; Adds a items to some object. |
|
function adds(v, it) |
|
it = it or Num() |
|
for _,z in iter(v) do it:add(z) end |
|
return it end |
|
|
|
-- DATA:clone(rows={}:a) -> DATA;; Creates a new DATA structure. |
|
function DATA:clone( rows) |
|
return adds(rows or {}, Data({self.cols.names})) end |
|
|
|
--## Data Layer Methods |
|
|
|
-- NUM:norm(n) -> n;; Normalizes number to 0..1. |
|
function NUM:norm(n) |
|
return n=="?" and n or 1/(1+math.exp(-1.7*(n-self.mu)/(self.sd+1e-32))) end |
|
|
|
-- DATA:mid() -> v;; Return expected value of this type. |
|
function DATA:mid() return map(self.cols.all,function(c) return c:mid() end) end |
|
function NUM:mid() return self.mu end |
|
function SYM:mid() return self.mode end |
|
|
|
-- DATA:bin(v) -> b;; Rows are binned if they are/are not at the least end. |
|
function DATA:bin(row) |
|
y = self.ys:add(self:disty(row)) |
|
return self.ys.norm(y) < 1/self.ys.n^0.5 end |
|
|
|
-- NUM:bin(num) -> 0 .. the.bin-1;; Numeric binning returns an index to a bin. |
|
function NUM:bin(n) |
|
return n=="?" and n or math.floor(the.bins*self:norm(n)) end |
|
|
|
-- SYM:bin(v) -> v;; Discrete binning returns v unchanged |
|
function SYM:bin(v) |
|
return v end |
|
|
|
--## Add Methods |
|
|
|
-- DATA:add(row) -> row;; Adds a new row to DATA. |
|
function DATA:add(row) |
|
if self.cols |
|
then push(self.rows, row) |
|
for i,col in pairs(self.cols.all) do col:add(row[col.pos]) end |
|
else self.cols = Cols(row) end |
|
return row end |
|
|
|
-- NUM:add(n) -> n;; Updates mean/sd for a numeric column. |
|
function NUM:add(n, d) |
|
if n ~= "?" then |
|
self.n = 1 + self.n |
|
d = n - self.mu |
|
self.mu = self.mu + d / self.n |
|
self.m2 = self.m2 + d * (n - self.mu) |
|
self.sd = self.n<2 and 0 or math.sqrt(self.m2/(self.n-1)) end |
|
return n end |
|
|
|
-- SYM:add(v) -> v;; Updates mode/count for a symbolic column. |
|
function SYM:add(v) |
|
if v ~= "?" then |
|
self.n = 1 + self.n |
|
self.has[v] = 1 + (self.has[v] or 0) |
|
if self.has[v] > self.most then |
|
self.most, self.mode = self.has[v],v end end |
|
return v end |
|
|
|
--## Distance/Report Methods |
|
|
|
-- DATA:disty(row) -> n;; Calculates normalized Y-distance for a row. |
|
function DATA:disty(row, d,n) |
|
d,n = 0,0 |
|
for i,num in pairs(self.cols.y) do |
|
n = n + 1 |
|
d = d + (num:norm(row[num.pos]) - num.best)^2 end |
|
return (d/n) ^ 0.5 end |
|
|
|
-- DATA:distx(row1,row2) -> n;; Normalized X-distance between rows. |
|
function DATA:distx(row1,row2, d,n) |
|
d,n = 0,0 |
|
for i,col in pairs(self.cols.x) do |
|
n = n + 1 |
|
d = d + col:distx(row1[col.pos], row2[col.pos])^2 end |
|
return (d/n) ^ 0.5 end |
|
|
|
-- SYM:distx(v1,v2) -> n;; Distance metric for symbolic values. |
|
function SYM:distx(v1,v2) |
|
return v1=="?" and v2=="?" and 1 or (v1==v2 and 0 or 1) end |
|
|
|
-- NUM:distx(n1,n2) -> n;; Distance metric for numeric values. |
|
function NUM:distx(n1,n2) |
|
if n1=="?" and n2=="?" then return 1 end |
|
n1, n2 = self:norm(n1), self:norm(n2) |
|
n1 = (n1 ~= "?") and n1 or (n2 > 0.5 and 0 or 1) |
|
n2 = (n2 ~= "?") and n2 or (n1 > 0.5 and 0 or 1) |
|
return math.abs(n1 - n2) end |
|
|
|
--## Discretization |
|
|
|
--## Reporting |
|
local sparkline |
|
|
|
-- sparkline(scores:a) --> s;; Return a sparkline string. |
|
function sparkline(scores, s,labels,colors,reset) |
|
s, labels = "", {"▁","▂","▃"," ","▄","▅","▆"} |
|
colors = { "\27[97;101m", -- 1: bright white on bright red |
|
"\27[30;103m", -- 2: black on bright yellow |
|
"\27[30;43m", -- 3: black on yellow/orange |
|
"\27[30;107m", -- 4: black on bright white (middle) |
|
"\27[30;102m", -- 5: black on bright green |
|
"\27[30;42m", -- 6: black on green |
|
"\27[97;42m"} -- 7: bright white on green (darkest green) |
|
for _,x in ipairs(scores) do |
|
local i = math.min(7, math.floor(x*7)+1) |
|
s = s .. colors[i] .. labels[i] .. "\27[0m " end |
|
return s end |
|
|
|
-- DATA:report() --> nil;; Print the sparklines |
|
function DATA:report( kv,cols,vals,bins) |
|
kv = function (t) return map(t, function(k,v) return {k,v} end) end |
|
cols = self:score() |
|
for _,col in ipairs(self.cols.x) do |
|
vals, bins = {}, {} |
|
for _,p in ipairs(sort(kv(cols[col.is] or {}), lt(1))) do |
|
push(vals, p[2]); push(bins, p[1]) end |
|
print(fmt("%15s: %s %s", col.is, sparkline(vals), cat(bins))) end end |
|
|
|
--## CLI/Config |
|
local settings, cli -- Locals for CLI/Config functions |
|
local go = {} |
|
|
|
-- settings(s) -> d;; Parses help string into a config dictionary. |
|
function settings(s, d) |
|
d={}; for k,v in s:gmatch("(%S+)=(%S+)") do d[k]=coerce(v) end; return d end |
|
|
|
-- cli(settings:d) -> settings;; For CLI args, call a `go` or update settings. |
|
function cli(settings) |
|
for i,s in pairs(arg) do |
|
if go[s] |
|
then go[s](coerce(arg[i+1])) |
|
else |
|
for k,_ in pairs(settings) do |
|
if k:sub(1,1)==s:sub(2) then settings[k]=coerce(arg[i+1]) end end end end |
|
return settings end |
|
|
|
-- ## Demos/ tests/ start-up functions |
|
|
|
-- These `go` functions override the default variable setting. |
|
go["-h"] = function(_) print(help) end |
|
go["-s"] = function(n) the.seed=n; math.randomseed(n) end |
|
|
|
the = settings(help) |
|
math.randomseed(the.seed) |
|
|
|
if arg[0]:find"lua7a.lua" then |
|
the = cli(the) |
|
local data1 = Data(the.file) |
|
local data2 = data1:clone() |
|
for _,row in pairs(data1.rows) do data2:add(row) end |
|
print(cat(data1.cols.x[2])) |
|
print(data2.cols.x[2]) end |
|
|
|
return {Data=Data, Cols=Cols, Num=Num, Sym=Sym, adds=adds, clone=clone, |
|
sparkline=sparkline, new=new, push=push, map=map, sort=sort, lt=lt, |
|
coerce=coerce, cells=cells, csv=csv, fmt=fmt, cat=cat, the=the, help=help} |