Last active
November 6, 2025 15:46
-
-
Save kenwebb/f563b28b1cdcb3aa7cbeceaa1d07aca7 to your computer and use it in GitHub Desktop.
Wikipedia Math Format - Parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="UTF-8"?> | |
| <!--Xholon Workbook http://www.primordion.com/Xholon/gwt/ MIT License, Copyright (C) Ken Webb, Thu Nov 06 2025 10:45:44 GMT-0500 (Eastern Standard Time)--> | |
| <XholonWorkbook> | |
| <Notes><![CDATA[ | |
| Xholon | |
| ------ | |
| Title: Wikipedia Math Format - Parser | |
| Description: | |
| Url: http://www.primordion.com/Xholon/gwt/ | |
| InternalName: f563b28b1cdcb3aa7cbeceaa1d07aca7 | |
| Keywords: | |
| My Notes | |
| -------- | |
| 2025 Nov 4 | |
| Some Wikipedia pages (see ref[1]) use a special format for Math expressions. | |
| For example: | |
| kg⋅m−1⋅s−2 | |
| L−2⋅J | |
| T−2L−2M | |
| As part of a TypeScript Node.js project, I have started to write a parser for this format. | |
| See: | |
| r-p:~/A2025_Oct/TsConvTool$ src/conv.ts | |
| and see the later JS node version: | |
| r-p:~/A2025_Nov$ WmfParser.js | |
| In this workbook I implement the parser using Xholon. | |
| Saved at: | |
| https://gist.github.com/kenwebb/f563b28b1cdcb3aa7cbeceaa1d07aca7 | |
| ### The existing TypeScript code is as follows: | |
| ```typescript | |
| // mini parser for wikipedia-styled formulae, as in https://en.wikipedia.org/wiki/International_System_of_Units | |
| // ex: kg⋅m⋅s−2 kg−1⋅m−2⋅s3⋅A2 | |
| // see https://pint.readthedocs.io/en/stable/ | |
| // first step is to tokenize from string to string[] | |
| // TODO | |
| const parseWsf = (wsf: string): string => { | |
| const tokens: string[] = []; | |
| let tix: number = 0; // index into token array | |
| let current: string = ""; | |
| let readingDigits = false; | |
| for (var i: number = 0; i < wsf.length; i++) { | |
| let char: string = wsf.charAt(i); | |
| switch(char) { | |
| case "⋅": | |
| case "*": | |
| tokens.push(current); | |
| current = "*"; | |
| tokens.push(current); | |
| current = ""; | |
| readingDigits = false; | |
| break; | |
| case "−": | |
| case "-": | |
| tokens.push(current); | |
| tokens.push("**"); | |
| current = "-"; | |
| readingDigits = true; | |
| break; | |
| case "0": | |
| case "1": | |
| case "2": | |
| case "3": | |
| case "4": | |
| case "5": | |
| case "6": | |
| case "7": | |
| case "8": | |
| case "9": | |
| if (readingDigits) { | |
| current += char; | |
| } | |
| else { | |
| tokens.push(current); | |
| tokens.push("**"); | |
| current = char; | |
| readingDigits = true; | |
| } | |
| break; | |
| default: | |
| // this is a lower or uppercase letter | |
| current += char; | |
| break; | |
| } | |
| } | |
| tokens.push(current); | |
| return tokens.join(""); | |
| } | |
| let example = "a12*bc3*def*ghij-1*k1*l-1*m-2*n-34"; // "kg⋅m⋅s−2"; "kg−1⋅m−2⋅s3⋅A2"; "a12*bc3*def*ghij-1*k1*l-1*m-2*n-34"; | |
| //console.log(example, parseWsf(example)); // kg⋅m⋅s−2 kg|⋅|m|⋅|s|−2 kg−1⋅m−2⋅s3⋅A2 kg|−1|⋅|m|−2|⋅|s|3|⋅|A|2 | |
| // a12*bc3*def*ghij-1*k1*l-1*m-2*n-34 | |
| // a|12|*|bc|3|*|def|*|ghij|-1|*|k|1|*|l|-1|*|m|-2|*|n|-34 | |
| /* Dev Tools | |
| var a=1,bc=2,def=3,ghij=1,k=1,l=4,m=1,n=1; | |
| var x = a**12*bc**3*def*ghij**-1*k**1*l**-1*m**-2*n**-34; | |
| console.log(x); // 6 | |
| */ | |
| // from wikipedia | |
| // pascal Pa pressure, stress kg⋅m−1⋅s−2 | |
| let pascal = (kg, m, s) => kg*m**-1*s**-2; // Pa pressure, stress | |
| //console.log(`pascal: ${pascal(3, 4, 2)}`); // 0.1875 | |
| ``` | |
| ### References | |
| (1) https://en.wikipedia.org/wiki/International_System_of_Units | |
| (2) https://en.wikipedia.org/wiki/Dimensional_analysis | |
| ]]></Notes> | |
| <_-.XholonClass> | |
| <PhysicalSystem/> | |
| <!-- quantities This might be useful here? as a comparison? --> | |
| <Quant superClass="Quantity"/> | |
| <WmfParser superClass="Script"/> | |
| <Testin superClass="Script"/> | |
| </_-.XholonClass> | |
| <xholonClassDetails> | |
| <!-- this works! | |
| execute the generated code by putting it inside a Script tag, and appending that to parent node | |
| --> | |
| <WmfParser><DefaultContent><![CDATA[ | |
| var me, beh = { | |
| postConfigure: function() { | |
| me = this.cnode; | |
| this.parse(); | |
| }, | |
| act: function() { | |
| me.println(me.name()); | |
| }, | |
| parse: function() { | |
| /* | |
| * parseWmf - parse a Wikipedia Math Formated string. | |
| * @param wmf - a Wikipedia Math Formated string | |
| * @return an array | |
| */ | |
| const parseWmf = (wmf) => { | |
| const tokens = []; | |
| //let tix = 0; // index into token array | |
| let current = ""; | |
| let varNames = []; | |
| let readingDigits = false; | |
| const SEP = ","; // = or , | |
| for (var i = 0; i < wmf.length; i++) { | |
| let char = wmf.charAt(i); | |
| switch (char) { | |
| case "⋅": | |
| case "*": | |
| case "×": | |
| case "·": | |
| case " ": | |
| tokens.push(current); | |
| if (isNaN(current)) { | |
| // check if varNames is a string or a number | |
| varNames.push(current); | |
| } | |
| current = "*"; | |
| tokens.push(current); | |
| current = ""; | |
| readingDigits = false; | |
| break; | |
| case "−": | |
| case "-": | |
| tokens.push(current); | |
| tokens.push("**"); | |
| varNames.push(current); | |
| current = "-"; | |
| readingDigits = true; | |
| break; | |
| case "(": break; // TODO | |
| case ")": break; // TODO | |
| case "0": | |
| case "1": | |
| case "2": | |
| case "3": | |
| case "4": | |
| case "5": | |
| case "6": | |
| case "7": | |
| case "8": | |
| case "9": | |
| if (readingDigits) { | |
| current += char; | |
| } | |
| else { | |
| varNames.push(current); | |
| tokens.push(current); | |
| tokens.push("**"); | |
| current = char; | |
| readingDigits = true; | |
| } | |
| break; | |
| default: | |
| // this is a lower or uppercase letter | |
| current += char; | |
| break; | |
| } | |
| } | |
| tokens.push(current); | |
| return [wmf, tokens.join(""), varNames.join(SEP)]; | |
| }; | |
| /** | |
| * writeScript - generate a Xholon Script node, from the results of the parseWmf() function | |
| * @param wmf - the original wmf-formatted string | |
| * @param mexpr - an executable math expression (string) produced by parseWmf() | |
| * @param varNames - a string containing the variable names found by parseWmf() | |
| * @param roleName - Xholon roleName | |
| * @return a Xholon Script node, as a string | |
| */ | |
| const writeScript = (wmf, mexpr, varNames, roleName) => { | |
| let script = `<Script roleName="${roleName}">var beh = { | |
| // ${wmf} | |
| postConfigure: function() { | |
| var ${varNames}=1; | |
| var x = ${mexpr}; | |
| console.log("script${roleName}:", x); | |
| } | |
| }</Script>`; | |
| me.parent().append(script); | |
| return script; | |
| } | |
| /** | |
| * writeScript2 - generate a Xholon Script node, from the results of the parseWmf() function | |
| * @param wmf - the original wmf-formatted string | |
| * @param mexpr - an executable math expression (string) produced by parseWmf() | |
| * @param varNames - a string containing the variable names found by parseWmf() | |
| * @param roleName - Xholon roleName | |
| * @return a Xholon Script node, as a string | |
| */ | |
| // ((kg,m,q) => kg*m*q**-2)(1,2,3) | |
| const writeScript2 = (wmf, mexpr, varNames, roleName) => { | |
| let script = `<Script roleName="${roleName}">var beh = { | |
| // ${wmf} | |
| postConfigure: function() { | |
| var x = ((${varNames}) => ${mexpr})(${varNames.split(",").map((item) => 1).join(",")}); | |
| console.log("script${roleName}:", x); | |
| } | |
| }</Script>`; | |
| me.parent().append(script); | |
| return script; | |
| } | |
| // Testing | |
| console.log(writeScript2(...parseWmf("a12*bc3*def*ghij-1*k1*l-1*m-2*n-34"), 1), "\n"); | |
| console.log(writeScript2(...parseWmf("kg⋅m⋅q−2"), 2), "\n"); | |
| console.log(writeScript2(...parseWmf("T−2⋅L−2⋅M1"), 3), "\n"); // the final M does not get picked up unless I put a 1 after it | |
| // https://en.wikipedia.org/wiki/List_of_conversion_factors#Energy | |
| // 6.12×109 | |
| console.log(writeScript2(...parseWmf("a×b9"), 4), "\n"); | |
| // https://www.ibiblio.org/units/siderive.html | |
| console.log(writeScript2(...parseWmf("kg-1·m-2·s4·A2"), 5), "\n"); | |
| } // end parse() function | |
| } // end beh | |
| //# sourceURL=WmfParser.js | |
| ]]></DefaultContent></WmfParser> | |
| <!-- test --> | |
| <Testin><DefaultContent> | |
| console.log("testin Script"); | |
| var a = bc = def = ghij = k = l = m = n = 1; | |
| console.log(a, bc); // 1 1 | |
| var x = a**12*bc**3*def*ghij**-1*k**1*l**-1*m**-2*n**-34; | |
| console.log(x); // 1 | |
| $wnd.xh.root().println(x); // 1 | |
| </DefaultContent></Testin> | |
| </xholonClassDetails> | |
| <PhysicalSystem> | |
| <WmfParser/> | |
| <Testin/> | |
| </PhysicalSystem> | |
| <SvgClient><Attribute_String roleName="svgUri"><![CDATA[data:image/svg+xml, | |
| <svg width="100" height="50" xmlns="http://www.w3.org/2000/svg"> | |
| <g> | |
| <title>WmfParser</title> | |
| <rect id="PhysicalSystem/WmfParser" fill="#98FB98" height="50" width="50" x="25" y="0"/> | |
| <g> | |
| <title>WmfParser</title> | |
| <rect id="PhysicalSystem/Testin" fill="#6AB06A" height="50" width="10" x="80" y="0"/> | |
| </g> | |
| </g> | |
| </svg> | |
| ]]></Attribute_String><Attribute_String roleName="setup">${MODELNAME_DEFAULT},${SVGURI_DEFAULT}</Attribute_String></SvgClient> | |
| </XholonWorkbook> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment