Skip to content

Instantly share code, notes, and snippets.

@ndabas
Last active December 16, 2021 20:01
Show Gist options
  • Select an option

  • Save ndabas/be9b8d42ef38bbe450f2eb448492c1f8 to your computer and use it in GitHub Desktop.

Select an option

Save ndabas/be9b8d42ef38bbe450f2eb448492c1f8 to your computer and use it in GitHub Desktop.

Parsing Expression Grammars in PowerShell

Have you ever wanted to write a formal parser in PowerShell? No? Then this piece of code is for you.

I found the code walkthrough on the Parsing Expression Grammars - jq as a PEG engine page to be rather elegant. So I decided to re-implement it using another language in which no one would ever want to write a formal parser, PowerShell. This is the result.

The code is a proof-of-concept only. It works, but there is nowhere close to the level of error handling or performance optimization that might be expected of a production code base. However, the code is serviceable in my opinion, should someone want to continue this quixotic endeavor.

# https://github.com/stedolan/jq/wiki/Parsing-Expression-Grammars
<#
Sequence: e1 e2 e1 | e2
Ordered choice: e1 / e2 e1 // e2
Zero-or-more: e* star(E)
One-or-more: e+ plus(E)
Optional: e? optional(E)
And-predicate: &e amp(E)
Not-predicate: !e neg(E)
#>
# def star(E): (E | star(E)) // .;
function star {
param ($E)
process {
$x = $_ | & $E | star $E
if ($x) { $x } else { $_ }
}
}
# def plus(E): E | (plus(E) // . );
function plus {
param ($E)
process {
$x = $_ | & $E
if ($x) {
$y = $x | plus $E
if ($y) { $y } else { $x }
}
}
}
# def optional(E): E // .;
function optional {
param ($E)
process {
$x = $_ | & $E
if ($x) { $x } else { $_ }
}
}
# def amp(E): . as $in | E | $in;
function amp {
param ($E)
process {
$x = $_ | & $E
if ($x) { $_ }
}
}
# def neg(E): select( [E] == [] );
function neg {
param ($E)
process {
$x = $_ | & $E
if (-not $x) { $_ }
}
}
# def literal($s):
# select(.remainder | startswith($s))
# | .result += [$s]
# | .remainder |= .[$s | length :] ;
function literal {
param ([string]$s, [switch]$noresult)
process {
if ((($_.index + $s.Length) -le $_.input.Length) -and ($_.input.IndexOf($s, $_.index, $s.Length) -eq $_.index)) {
$r = $_.result
if (-not $noresult) { $r += $s }
@{ input = $_.input; index = $_.index + $s.Length; result = $r }
}
}
}
function match {
param([string]$re)
process {
if (($_.index -lt $_.input.Length) -and ($_.input.Substring($_.index) -match "^$re")) {
@{ input = $_.input; index = $_.index + $Matches[0].Length; result = $_.result + $Matches[0] }
}
}
}
# def nonempty: select( (.remainder | length) > 0 );
function nonempty {
process {
if ($_.input.Length -gt $_.index) {
$_
}
}
}
# def parse: {remainder: .} | S | .result;
function parse {
process {
@{ input = $_; index = 0; result = @() }
}
}
# def box(E):
# ((.result = null) | E) as $e
# | .remainder = $e.remainder
# | .result += [$e.result]; # the magic sauce
function box {
param ($E)
process {
$r = $_.result
$x = $_
$x.result = @()
$y = $x | & $E
if ($y) {
$y.result = $r + , $y.result
$y
}
}
}
<#
S ← &(A 'c') 'a'+ B !.
A ← 'a' A? 'b'
B ← 'b' B? 'c'
#>
# def A: literal("a") | optional(A) | literal("b");
function A {
process {
$_ | literal "a" | optional A | literal "b"
}
}
# def B: literal("b") | optional(B) | literal("c");
function B {
process {
$_ | literal "b" | optional B | literal "c"
}
}
# def S: amp(A | literal("c")) | plus(literal("a")) | B | neg(nonempty);
function S {
process {
$_ | amp { $_ | A | literal "c" } | plus { $_ | literal "a" } | B | neg { $_ | nonempty }
}
}
"aabbcc" | parse | S
"abc" | parse | S
"abbcc" | parse | S
# Expr ← Sum
# def Expr:
# Value ← [0-9]+ / '(' Expr ')'
# def Value: parseNumber( "[0-9]+" ) // (literal("(") | Expr | literal(")"));
# def Value: parseNumber( "[0-9]+" ) // (consume("[(]") | box(Expr) | consume("[)["));
function parseNumber {
param ([string]$re)
process {
$x = $_ | match $re
if ($x) {
$x.result[-1] = [int]$x.result[-1]
$x
}
}
}
function Value {
process {
$x = $_ | parseNumber "[0-9]+"
if ($x) { $x }
else {
$_ | literal "(" -noresult | box { $_ | Sum } | literal ")" -noresult
}
}
}
# Product ← Value (('*' / '/') Value)*
# def Product: (Value | star( (literal("*") // literal("/")) | Value));
function Product {
process {
$_ | Value | star { $_ | match "[*/]" | Value }
}
}
# Sum ← Product (('+' / '-') Product)*
# def Sum: (Product | star( (literal("+") // literal("-")) | Product));
function Sum {
process {
$_ | Product | star { $_ | match "[+-]" | Product }
}
}
# Sum ;
<#
def eval:
if type == "array" then
if length == 0 then null
else .[-1] |= eval
| if length == 1 then .[0]
else (.[:-2] | eval) as $v
| if .[-2] == "*" then $v * .[-1]
elif .[-2] == "/" then $v / .[-1]
elif .[-2] == "+" then $v + .[-1]
elif .[-2] == "-" then $v - .[-1]
else tostring|error
end
end
end
else .
end ;
#>
function eval {
param($r)
if ($r -is [array]) {
if ($r.Length -gt 0) {
$r[-1] = eval $r[-1]
}
if ($r.Length -eq 1) {
$r[0]
} else {
$v = eval $r[0..($r.Length - 3)]
switch ($r[-2]) {
"*" { $v * $r[-1] }
"/" { $v / $r[-1] }
"+" { $v + $r[-1] }
"-" { $v - $r[-1] }
}
}
} else {
$r
}
}
$p = "(1+2)*(3+4)+(1*1)" | parse | Sum
$p
eval $p.result
# def atoz: parse("[a-z]");
function atoz {
process {
$_ | match "[a-z]"
}
}
# def ing: literal("ing");
function ing {
process {
$_ | literal "ing"
}
}
# def Aplus_B(A; B): plus(neg(B) | A) | B;
function Aplus_B {
param ($A, $B)
process {
$_ | plus { $_ | neg $B | & $A } | & $B
}
}
# Aplus_B( atoz; ing )
"zxing" | parse | Aplus_B atoz ing
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment