Skip to content

Instantly share code, notes, and snippets.

@kenwebb
Last active November 6, 2025 15:46
Show Gist options
  • Select an option

  • Save kenwebb/f563b28b1cdcb3aa7cbeceaa1d07aca7 to your computer and use it in GitHub Desktop.

Select an option

Save kenwebb/f563b28b1cdcb3aa7cbeceaa1d07aca7 to your computer and use it in GitHub Desktop.
Wikipedia Math Format - Parser
<?xml version="1.0" encoding="UTF-8"?>
<!--Xholon Workbook http://www.primordion.com/Xholon/gwt/ MIT License, Copyright (C) Ken Webb, Thu Nov 06 2025 10:45:44 GMT-0500 (Eastern Standard Time)-->
<XholonWorkbook>
<Notes><![CDATA[
Xholon
------
Title: Wikipedia Math Format - Parser
Description:
Url: http://www.primordion.com/Xholon/gwt/
InternalName: f563b28b1cdcb3aa7cbeceaa1d07aca7
Keywords:
My Notes
--------
2025 Nov 4
Some Wikipedia pages (see ref[1]) use a special format for Math expressions.
For example:
kg⋅m−1⋅s−2
L−2⋅J
T−2L−2M
As part of a TypeScript Node.js project, I have started to write a parser for this format.
See:
r-p:~/A2025_Oct/TsConvTool$ src/conv.ts
and see the later JS node version:
r-p:~/A2025_Nov$ WmfParser.js
In this workbook I implement the parser using Xholon.
Saved at:
https://gist.github.com/kenwebb/f563b28b1cdcb3aa7cbeceaa1d07aca7
### The existing TypeScript code is as follows:
```typescript
// mini parser for wikipedia-styled formulae, as in https://en.wikipedia.org/wiki/International_System_of_Units
// ex: kg⋅m⋅s−2 kg−1⋅m−2⋅s3⋅A2
// see https://pint.readthedocs.io/en/stable/
// first step is to tokenize from string to string[]
// TODO
const parseWsf = (wsf: string): string => {
const tokens: string[] = [];
let tix: number = 0; // index into token array
let current: string = "";
let readingDigits = false;
for (var i: number = 0; i < wsf.length; i++) {
let char: string = wsf.charAt(i);
switch(char) {
case "⋅":
case "*":
tokens.push(current);
current = "*";
tokens.push(current);
current = "";
readingDigits = false;
break;
case "−":
case "-":
tokens.push(current);
tokens.push("**");
current = "-";
readingDigits = true;
break;
case "0":
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
if (readingDigits) {
current += char;
}
else {
tokens.push(current);
tokens.push("**");
current = char;
readingDigits = true;
}
break;
default:
// this is a lower or uppercase letter
current += char;
break;
}
}
tokens.push(current);
return tokens.join("");
}
let example = "a12*bc3*def*ghij-1*k1*l-1*m-2*n-34"; // "kg⋅m⋅s−2"; "kg−1⋅m−2⋅s3⋅A2"; "a12*bc3*def*ghij-1*k1*l-1*m-2*n-34";
//console.log(example, parseWsf(example)); // kg⋅m⋅s−2 kg|⋅|m|⋅|s|−2 kg−1⋅m−2⋅s3⋅A2 kg|−1|⋅|m|−2|⋅|s|3|⋅|A|2
// a12*bc3*def*ghij-1*k1*l-1*m-2*n-34
// a|12|*|bc|3|*|def|*|ghij|-1|*|k|1|*|l|-1|*|m|-2|*|n|-34
/* Dev Tools
var a=1,bc=2,def=3,ghij=1,k=1,l=4,m=1,n=1;
var x = a**12*bc**3*def*ghij**-1*k**1*l**-1*m**-2*n**-34;
console.log(x); // 6
*/
// from wikipedia
// pascal Pa pressure, stress kg⋅m−1⋅s−2
let pascal = (kg, m, s) => kg*m**-1*s**-2; // Pa pressure, stress
//console.log(`pascal: ${pascal(3, 4, 2)}`); // 0.1875
```
### References
(1) https://en.wikipedia.org/wiki/International_System_of_Units
(2) https://en.wikipedia.org/wiki/Dimensional_analysis
]]></Notes>
<_-.XholonClass>
<PhysicalSystem/>
<!-- quantities This might be useful here? as a comparison? -->
<Quant superClass="Quantity"/>
<WmfParser superClass="Script"/>
<Testin superClass="Script"/>
</_-.XholonClass>
<xholonClassDetails>
<!-- this works!
execute the generated code by putting it inside a Script tag, and appending that to parent node
-->
<WmfParser><DefaultContent><![CDATA[
var me, beh = {
postConfigure: function() {
me = this.cnode;
this.parse();
},
act: function() {
me.println(me.name());
},
parse: function() {
/*
* parseWmf - parse a Wikipedia Math Formated string.
* @param wmf - a Wikipedia Math Formated string
* @return an array
*/
const parseWmf = (wmf) => {
const tokens = [];
//let tix = 0; // index into token array
let current = "";
let varNames = [];
let readingDigits = false;
const SEP = ","; // = or ,
for (var i = 0; i < wmf.length; i++) {
let char = wmf.charAt(i);
switch (char) {
case "⋅":
case "*":
case "×":
case "·":
case " ":
tokens.push(current);
if (isNaN(current)) {
// check if varNames is a string or a number
varNames.push(current);
}
current = "*";
tokens.push(current);
current = "";
readingDigits = false;
break;
case "−":
case "-":
tokens.push(current);
tokens.push("**");
varNames.push(current);
current = "-";
readingDigits = true;
break;
case "(": break; // TODO
case ")": break; // TODO
case "0":
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
if (readingDigits) {
current += char;
}
else {
varNames.push(current);
tokens.push(current);
tokens.push("**");
current = char;
readingDigits = true;
}
break;
default:
// this is a lower or uppercase letter
current += char;
break;
}
}
tokens.push(current);
return [wmf, tokens.join(""), varNames.join(SEP)];
};
/**
* writeScript - generate a Xholon Script node, from the results of the parseWmf() function
* @param wmf - the original wmf-formatted string
* @param mexpr - an executable math expression (string) produced by parseWmf()
* @param varNames - a string containing the variable names found by parseWmf()
* @param roleName - Xholon roleName
* @return a Xholon Script node, as a string
*/
const writeScript = (wmf, mexpr, varNames, roleName) => {
let script = `<Script roleName="${roleName}">var beh = {
// ${wmf}
postConfigure: function() {
var ${varNames}=1;
var x = ${mexpr};
console.log("script${roleName}:", x);
}
}</Script>`;
me.parent().append(script);
return script;
}
/**
* writeScript2 - generate a Xholon Script node, from the results of the parseWmf() function
* @param wmf - the original wmf-formatted string
* @param mexpr - an executable math expression (string) produced by parseWmf()
* @param varNames - a string containing the variable names found by parseWmf()
* @param roleName - Xholon roleName
* @return a Xholon Script node, as a string
*/
// ((kg,m,q) => kg*m*q**-2)(1,2,3)
const writeScript2 = (wmf, mexpr, varNames, roleName) => {
let script = `<Script roleName="${roleName}">var beh = {
// ${wmf}
postConfigure: function() {
var x = ((${varNames}) => ${mexpr})(${varNames.split(",").map((item) => 1).join(",")});
console.log("script${roleName}:", x);
}
}</Script>`;
me.parent().append(script);
return script;
}
// Testing
console.log(writeScript2(...parseWmf("a12*bc3*def*ghij-1*k1*l-1*m-2*n-34"), 1), "\n");
console.log(writeScript2(...parseWmf("kg⋅m⋅q−2"), 2), "\n");
console.log(writeScript2(...parseWmf("T−2⋅L−2⋅M1"), 3), "\n"); // the final M does not get picked up unless I put a 1 after it
// https://en.wikipedia.org/wiki/List_of_conversion_factors#Energy
// 6.12×109
console.log(writeScript2(...parseWmf("a×b9"), 4), "\n");
// https://www.ibiblio.org/units/siderive.html
console.log(writeScript2(...parseWmf("kg-1·m-2·s4·A2"), 5), "\n");
} // end parse() function
} // end beh
//# sourceURL=WmfParser.js
]]></DefaultContent></WmfParser>
<!-- test -->
<Testin><DefaultContent>
console.log("testin Script");
var a = bc = def = ghij = k = l = m = n = 1;
console.log(a, bc); // 1 1
var x = a**12*bc**3*def*ghij**-1*k**1*l**-1*m**-2*n**-34;
console.log(x); // 1
$wnd.xh.root().println(x); // 1
</DefaultContent></Testin>
</xholonClassDetails>
<PhysicalSystem>
<WmfParser/>
<Testin/>
</PhysicalSystem>
<SvgClient><Attribute_String roleName="svgUri"><![CDATA[data:image/svg+xml,
<svg width="100" height="50" xmlns="http://www.w3.org/2000/svg">
<g>
<title>WmfParser</title>
<rect id="PhysicalSystem/WmfParser" fill="#98FB98" height="50" width="50" x="25" y="0"/>
<g>
<title>WmfParser</title>
<rect id="PhysicalSystem/Testin" fill="#6AB06A" height="50" width="10" x="80" y="0"/>
</g>
</g>
</svg>
]]></Attribute_String><Attribute_String roleName="setup">${MODELNAME_DEFAULT},${SVGURI_DEFAULT}</Attribute_String></SvgClient>
</XholonWorkbook>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment