Skip to content

Instantly share code, notes, and snippets.

@nicolov
Created September 7, 2025 18:55
Show Gist options
  • Select an option

  • Save nicolov/a24256322fe78780707668d12b86c884 to your computer and use it in GitHub Desktop.

Select an option

Save nicolov/a24256322fe78780707668d12b86c884 to your computer and use it in GitHub Desktop.
Precedence climbing parser
[package]
name = "prec-climbing"
version = "0.1.0"
edition = "2024"
[dependencies]
// A precedence climbing parser for a simple programming language supporting:
// - arithmetic expressions
// - function calls
// - unary operators
// - variable assignment
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Ident(String),
NumberLit(String),
If,
Then,
Else,
End,
Less,
Plus,
Minus,
Star,
Slash,
Assign,
Comma,
LParen,
RParen,
}
#[derive(Debug)]
struct OpInfo {
precedence: u8,
associativity: Associativity,
name: &'static str,
}
#[derive(Debug, PartialEq)]
enum Associativity {
Left,
Right,
}
static PREC_MIN: u8 = 1;
static PREC_MAX: u8 = 100;
fn get_op_info(op: &Token) -> Option<OpInfo> {
match op {
Token::Assign => Some(OpInfo {
precedence: 2,
associativity: Associativity::Right,
name: "__assign",
}),
Token::Less => Some(OpInfo {
precedence: 4,
associativity: Associativity::Left,
name: "__less",
}),
Token::Plus => Some(OpInfo {
precedence: 5,
associativity: Associativity::Left,
name: "__add",
}),
Token::Minus => Some(OpInfo {
precedence: 5,
associativity: Associativity::Left,
name: "__sub",
}),
Token::Star => Some(OpInfo {
precedence: 6,
associativity: Associativity::Left,
name: "__mul",
}),
Token::Slash => Some(OpInfo {
precedence: 6,
associativity: Associativity::Left,
name: "__div",
}),
_ => None,
}
}
#[derive(Debug, PartialEq)]
enum Expression {
IntConst {
value: i64,
},
FunctionCall {
name: String,
args: Vec<Expression>,
},
Variable {
name: String,
},
If {
condition: Box<Expression>,
then: Box<Expression>,
else_: Option<Box<Expression>>,
},
Assignment {
name: String,
value: Box<Expression>,
},
}
#[derive(Debug)]
struct ParseError {
message: String,
}
impl ParseError {
fn new(msg: impl Into<String>) -> Self {
ParseError {
message: msg.into(),
}
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Parse error: {}", self.message)
}
}
impl std::error::Error for ParseError {}
struct Parser {
tokens: std::iter::Peekable<std::vec::IntoIter<Token>>,
}
impl Parser {
fn new(input: Vec<Token>) -> Self {
Self {
tokens: input.into_iter().peekable(),
}
}
fn expect(&mut self, expected: Token) -> Result<(), ParseError> {
let t = self.tokens.next();
match t {
Some(t) if t == expected => Ok(()),
Some(t) => Err(ParseError::new(format!(
"Expected {:?}, got {:?}",
expected, t
))),
None => Err(ParseError::new("Unexpected end of input")),
}
}
fn parse_primary(&mut self) -> Result<Expression, ParseError> {
let mut expr = match self.tokens.peek() {
Some(Token::Minus) => {
self.tokens.next();
// Unary operator parsed with very high precedence.
let operand = self.parse_expression(PREC_MAX)?;
let args = vec![operand];
Ok(Expression::FunctionCall {
name: "__neg".to_string(),
args,
})
}
Some(Token::LParen) => {
self.expect(Token::LParen)?;
// Start parsing a subexpression inside parentheses, startin over from the
// minimum precedence.
let subexpr = self.parse_expression(PREC_MIN)?;
self.expect(Token::RParen)?;
Ok(subexpr)
}
Some(Token::Ident(s)) => {
let name = s.clone();
self.tokens.next();
Ok(Expression::Variable { name })
}
Some(Token::NumberLit(s)) => {
match s.parse::<i64>() {
Ok(x) => {
self.tokens.next(); // consume number
Ok(Expression::IntConst { value: x })
}
Err(_) => Err(ParseError::new(format!("Invalid number literal: {}", s))),
}
}
Some(Token::If) => {
self.tokens.next();
let condition_expr = self.parse_expression(PREC_MIN)?;
self.expect(Token::Then)?;
let then_expr = self.parse_expression(PREC_MIN)?;
let else_expr = if matches!(self.tokens.peek(), Some(&Token::Else)) {
self.tokens.next();
Some(Box::new(self.parse_expression(PREC_MIN)?))
} else {
None
};
self.expect(Token::End)?;
Ok(Expression::If {
condition: Box::new(condition_expr),
then: Box::new(then_expr),
else_: else_expr,
})
}
None => Err(ParseError::new(
"Unexpected end of input while parsing expression",
)),
_ => Err(ParseError::new(format!(
"Unexpected token in expression: {:?}",
self.tokens.peek()
))),
}?;
loop {
match self.tokens.peek() {
Some(Token::LParen) => {
if let Expression::Variable { name } = expr {
self.tokens.next(); // consume '('
let mut args = Vec::new();
if self.tokens.peek() != Some(&Token::RParen) {
loop {
args.push(self.parse_expression(PREC_MIN)?);
if self.tokens.peek() == Some(&Token::Comma) {
self.tokens.next(); // consume ','
} else {
break;
}
}
}
self.expect(Token::RParen)?;
expr = Expression::FunctionCall { name, args };
} else {
return Err(ParseError::new(
"Only identifiers can be called as functions",
));
}
}
_ => break,
}
}
Ok(expr)
}
fn parse_expression(&mut self, min_precedence: u8) -> Result<Expression, ParseError> {
let mut primary_lhs = self.parse_primary()?;
loop {
if let Some(peek) = self.tokens.peek()
&& let Some(op_info) = get_op_info(peek)
{
if op_info.precedence < min_precedence {
break;
}
// Consume the current operator token.
self.tokens.next();
let next_min_precedence = match op_info.associativity {
Associativity::Left => op_info.precedence + 1,
Associativity::Right => op_info.precedence,
};
let primary_rhs = self.parse_expression(next_min_precedence)?;
// Rewrite assignments into the proper AST node.
if op_info.name == "__assign" {
// Check that the LHS is a variable (ie lvalue).
if let Expression::Variable { name } = primary_lhs {
return Ok(Expression::Assignment {
name: name.to_string(),
value: Box::new(primary_rhs),
});
} else {
return Err(ParseError::new(format!(
"Invalid assignment: lhs is not a variable: {:?}",
primary_lhs
)));
}
} else {
primary_lhs = Expression::FunctionCall {
name: op_info.name.to_string(),
args: vec![primary_lhs, primary_rhs],
};
}
} else {
break;
}
}
return Ok(primary_lhs);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_expr_arithmetic() {
// 1 + 2 * 3 / 4
let input = vec![
Token::NumberLit("1".to_string()),
Token::Plus,
Token::NumberLit("2".to_string()),
Token::Star,
Token::NumberLit("3".to_string()),
Token::Slash,
Token::NumberLit("4".to_string()),
];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
let ast_ref = Expression::FunctionCall {
name: "__add".to_string(),
args: vec![
Expression::IntConst { value: 1 },
Expression::FunctionCall {
name: "__div".to_string(),
args: vec![
Expression::FunctionCall {
name: "__mul".to_string(),
args: vec![
Expression::IntConst { value: 2 },
Expression::IntConst { value: 3 },
],
},
Expression::IntConst { value: 4 },
],
},
],
};
assert!(ast == ast_ref);
}
#[test]
fn test_parse_expr_variable() {
// x + 100
let input = vec![
Token::Ident("x".to_string()),
Token::Plus,
Token::NumberLit("100".to_string()),
];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
let ast_ref = Expression::FunctionCall {
name: "__add".to_string(),
args: vec![
Expression::Variable {
name: "x".to_string(),
},
Expression::IntConst { value: 100 },
],
};
assert!(ast == ast_ref);
}
#[test]
fn test_parse_expr_function_call() {
// fn(100, x)
let input = vec![
Token::Ident("fn".to_string()),
Token::LParen,
Token::NumberLit("100".to_string()),
Token::Comma,
Token::Ident("x".to_string()),
Token::RParen,
];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
dbg!(&ast);
let ast_ref = Expression::FunctionCall {
name: "fn".to_string(),
args: vec![
Expression::IntConst { value: 100 },
Expression::Variable {
name: "x".to_string(),
},
],
};
assert!(ast == ast_ref);
}
#[test]
fn test_parse_expr_if() {
// if x < 100 then 100 else 200 end
let input = vec![
Token::If,
Token::Ident("x".to_string()),
Token::Less,
Token::NumberLit("100".to_string()),
Token::Then,
Token::NumberLit("100".to_string()),
Token::Else,
Token::NumberLit("200".to_string()),
Token::End,
];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
let ast_ref = Expression::If {
condition: Box::new(Expression::FunctionCall {
name: "__less".to_string(),
args: vec![
Expression::Variable {
name: "x".to_string(),
},
Expression::IntConst { value: 100 },
],
}),
then: Box::new(Expression::IntConst { value: 100 }),
else_: Some(Box::new(Expression::IntConst { value: 200 })),
};
assert!(ast == ast_ref);
}
#[test]
fn test_parse_expr_unary() {
// -x
let input = vec![Token::Minus, Token::Ident("x".to_string())];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
let ast_ref = Expression::FunctionCall {
name: "__neg".to_string(),
args: vec![Expression::Variable {
name: "x".to_string(),
}],
};
assert!(ast == ast_ref);
}
#[test]
fn test_parse_expr_assignment() {
// x = 100
let input = vec![
Token::Ident("x".to_string()),
Token::Assign,
Token::NumberLit("100".to_string()),
];
let mut parser = Parser::new(input);
let ast = parser.parse_expression(1).unwrap();
dbg!(&ast);
let ast_ref = Expression::Assignment {
name: "x".to_string(),
value: Box::new(Expression::IntConst { value: 100 }),
};
assert!(ast == ast_ref);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment