Created
September 7, 2025 18:55
-
-
Save nicolov/a24256322fe78780707668d12b86c884 to your computer and use it in GitHub Desktop.
Precedence climbing parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [package] | |
| name = "prec-climbing" | |
| version = "0.1.0" | |
| edition = "2024" | |
| [dependencies] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // A precedence climbing parser for a simple programming language supporting: | |
| // - arithmetic expressions | |
| // - function calls | |
| // - unary operators | |
| // - variable assignment | |
| use std::fmt; | |
| #[derive(Debug, Clone, PartialEq)] | |
| pub enum Token { | |
| Ident(String), | |
| NumberLit(String), | |
| If, | |
| Then, | |
| Else, | |
| End, | |
| Less, | |
| Plus, | |
| Minus, | |
| Star, | |
| Slash, | |
| Assign, | |
| Comma, | |
| LParen, | |
| RParen, | |
| } | |
| #[derive(Debug)] | |
| struct OpInfo { | |
| precedence: u8, | |
| associativity: Associativity, | |
| name: &'static str, | |
| } | |
| #[derive(Debug, PartialEq)] | |
| enum Associativity { | |
| Left, | |
| Right, | |
| } | |
| static PREC_MIN: u8 = 1; | |
| static PREC_MAX: u8 = 100; | |
| fn get_op_info(op: &Token) -> Option<OpInfo> { | |
| match op { | |
| Token::Assign => Some(OpInfo { | |
| precedence: 2, | |
| associativity: Associativity::Right, | |
| name: "__assign", | |
| }), | |
| Token::Less => Some(OpInfo { | |
| precedence: 4, | |
| associativity: Associativity::Left, | |
| name: "__less", | |
| }), | |
| Token::Plus => Some(OpInfo { | |
| precedence: 5, | |
| associativity: Associativity::Left, | |
| name: "__add", | |
| }), | |
| Token::Minus => Some(OpInfo { | |
| precedence: 5, | |
| associativity: Associativity::Left, | |
| name: "__sub", | |
| }), | |
| Token::Star => Some(OpInfo { | |
| precedence: 6, | |
| associativity: Associativity::Left, | |
| name: "__mul", | |
| }), | |
| Token::Slash => Some(OpInfo { | |
| precedence: 6, | |
| associativity: Associativity::Left, | |
| name: "__div", | |
| }), | |
| _ => None, | |
| } | |
| } | |
| #[derive(Debug, PartialEq)] | |
| enum Expression { | |
| IntConst { | |
| value: i64, | |
| }, | |
| FunctionCall { | |
| name: String, | |
| args: Vec<Expression>, | |
| }, | |
| Variable { | |
| name: String, | |
| }, | |
| If { | |
| condition: Box<Expression>, | |
| then: Box<Expression>, | |
| else_: Option<Box<Expression>>, | |
| }, | |
| Assignment { | |
| name: String, | |
| value: Box<Expression>, | |
| }, | |
| } | |
| #[derive(Debug)] | |
| struct ParseError { | |
| message: String, | |
| } | |
| impl ParseError { | |
| fn new(msg: impl Into<String>) -> Self { | |
| ParseError { | |
| message: msg.into(), | |
| } | |
| } | |
| } | |
| impl fmt::Display for ParseError { | |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | |
| write!(f, "Parse error: {}", self.message) | |
| } | |
| } | |
| impl std::error::Error for ParseError {} | |
| struct Parser { | |
| tokens: std::iter::Peekable<std::vec::IntoIter<Token>>, | |
| } | |
| impl Parser { | |
| fn new(input: Vec<Token>) -> Self { | |
| Self { | |
| tokens: input.into_iter().peekable(), | |
| } | |
| } | |
| fn expect(&mut self, expected: Token) -> Result<(), ParseError> { | |
| let t = self.tokens.next(); | |
| match t { | |
| Some(t) if t == expected => Ok(()), | |
| Some(t) => Err(ParseError::new(format!( | |
| "Expected {:?}, got {:?}", | |
| expected, t | |
| ))), | |
| None => Err(ParseError::new("Unexpected end of input")), | |
| } | |
| } | |
| fn parse_primary(&mut self) -> Result<Expression, ParseError> { | |
| let mut expr = match self.tokens.peek() { | |
| Some(Token::Minus) => { | |
| self.tokens.next(); | |
| // Unary operator parsed with very high precedence. | |
| let operand = self.parse_expression(PREC_MAX)?; | |
| let args = vec![operand]; | |
| Ok(Expression::FunctionCall { | |
| name: "__neg".to_string(), | |
| args, | |
| }) | |
| } | |
| Some(Token::LParen) => { | |
| self.expect(Token::LParen)?; | |
| // Start parsing a subexpression inside parentheses, startin over from the | |
| // minimum precedence. | |
| let subexpr = self.parse_expression(PREC_MIN)?; | |
| self.expect(Token::RParen)?; | |
| Ok(subexpr) | |
| } | |
| Some(Token::Ident(s)) => { | |
| let name = s.clone(); | |
| self.tokens.next(); | |
| Ok(Expression::Variable { name }) | |
| } | |
| Some(Token::NumberLit(s)) => { | |
| match s.parse::<i64>() { | |
| Ok(x) => { | |
| self.tokens.next(); // consume number | |
| Ok(Expression::IntConst { value: x }) | |
| } | |
| Err(_) => Err(ParseError::new(format!("Invalid number literal: {}", s))), | |
| } | |
| } | |
| Some(Token::If) => { | |
| self.tokens.next(); | |
| let condition_expr = self.parse_expression(PREC_MIN)?; | |
| self.expect(Token::Then)?; | |
| let then_expr = self.parse_expression(PREC_MIN)?; | |
| let else_expr = if matches!(self.tokens.peek(), Some(&Token::Else)) { | |
| self.tokens.next(); | |
| Some(Box::new(self.parse_expression(PREC_MIN)?)) | |
| } else { | |
| None | |
| }; | |
| self.expect(Token::End)?; | |
| Ok(Expression::If { | |
| condition: Box::new(condition_expr), | |
| then: Box::new(then_expr), | |
| else_: else_expr, | |
| }) | |
| } | |
| None => Err(ParseError::new( | |
| "Unexpected end of input while parsing expression", | |
| )), | |
| _ => Err(ParseError::new(format!( | |
| "Unexpected token in expression: {:?}", | |
| self.tokens.peek() | |
| ))), | |
| }?; | |
| loop { | |
| match self.tokens.peek() { | |
| Some(Token::LParen) => { | |
| if let Expression::Variable { name } = expr { | |
| self.tokens.next(); // consume '(' | |
| let mut args = Vec::new(); | |
| if self.tokens.peek() != Some(&Token::RParen) { | |
| loop { | |
| args.push(self.parse_expression(PREC_MIN)?); | |
| if self.tokens.peek() == Some(&Token::Comma) { | |
| self.tokens.next(); // consume ',' | |
| } else { | |
| break; | |
| } | |
| } | |
| } | |
| self.expect(Token::RParen)?; | |
| expr = Expression::FunctionCall { name, args }; | |
| } else { | |
| return Err(ParseError::new( | |
| "Only identifiers can be called as functions", | |
| )); | |
| } | |
| } | |
| _ => break, | |
| } | |
| } | |
| Ok(expr) | |
| } | |
| fn parse_expression(&mut self, min_precedence: u8) -> Result<Expression, ParseError> { | |
| let mut primary_lhs = self.parse_primary()?; | |
| loop { | |
| if let Some(peek) = self.tokens.peek() | |
| && let Some(op_info) = get_op_info(peek) | |
| { | |
| if op_info.precedence < min_precedence { | |
| break; | |
| } | |
| // Consume the current operator token. | |
| self.tokens.next(); | |
| let next_min_precedence = match op_info.associativity { | |
| Associativity::Left => op_info.precedence + 1, | |
| Associativity::Right => op_info.precedence, | |
| }; | |
| let primary_rhs = self.parse_expression(next_min_precedence)?; | |
| // Rewrite assignments into the proper AST node. | |
| if op_info.name == "__assign" { | |
| // Check that the LHS is a variable (ie lvalue). | |
| if let Expression::Variable { name } = primary_lhs { | |
| return Ok(Expression::Assignment { | |
| name: name.to_string(), | |
| value: Box::new(primary_rhs), | |
| }); | |
| } else { | |
| return Err(ParseError::new(format!( | |
| "Invalid assignment: lhs is not a variable: {:?}", | |
| primary_lhs | |
| ))); | |
| } | |
| } else { | |
| primary_lhs = Expression::FunctionCall { | |
| name: op_info.name.to_string(), | |
| args: vec![primary_lhs, primary_rhs], | |
| }; | |
| } | |
| } else { | |
| break; | |
| } | |
| } | |
| return Ok(primary_lhs); | |
| } | |
| } | |
| #[cfg(test)] | |
| mod tests { | |
| use super::*; | |
| #[test] | |
| fn test_parse_expr_arithmetic() { | |
| // 1 + 2 * 3 / 4 | |
| let input = vec![ | |
| Token::NumberLit("1".to_string()), | |
| Token::Plus, | |
| Token::NumberLit("2".to_string()), | |
| Token::Star, | |
| Token::NumberLit("3".to_string()), | |
| Token::Slash, | |
| Token::NumberLit("4".to_string()), | |
| ]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| let ast_ref = Expression::FunctionCall { | |
| name: "__add".to_string(), | |
| args: vec![ | |
| Expression::IntConst { value: 1 }, | |
| Expression::FunctionCall { | |
| name: "__div".to_string(), | |
| args: vec![ | |
| Expression::FunctionCall { | |
| name: "__mul".to_string(), | |
| args: vec![ | |
| Expression::IntConst { value: 2 }, | |
| Expression::IntConst { value: 3 }, | |
| ], | |
| }, | |
| Expression::IntConst { value: 4 }, | |
| ], | |
| }, | |
| ], | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| #[test] | |
| fn test_parse_expr_variable() { | |
| // x + 100 | |
| let input = vec![ | |
| Token::Ident("x".to_string()), | |
| Token::Plus, | |
| Token::NumberLit("100".to_string()), | |
| ]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| let ast_ref = Expression::FunctionCall { | |
| name: "__add".to_string(), | |
| args: vec![ | |
| Expression::Variable { | |
| name: "x".to_string(), | |
| }, | |
| Expression::IntConst { value: 100 }, | |
| ], | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| #[test] | |
| fn test_parse_expr_function_call() { | |
| // fn(100, x) | |
| let input = vec![ | |
| Token::Ident("fn".to_string()), | |
| Token::LParen, | |
| Token::NumberLit("100".to_string()), | |
| Token::Comma, | |
| Token::Ident("x".to_string()), | |
| Token::RParen, | |
| ]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| dbg!(&ast); | |
| let ast_ref = Expression::FunctionCall { | |
| name: "fn".to_string(), | |
| args: vec![ | |
| Expression::IntConst { value: 100 }, | |
| Expression::Variable { | |
| name: "x".to_string(), | |
| }, | |
| ], | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| #[test] | |
| fn test_parse_expr_if() { | |
| // if x < 100 then 100 else 200 end | |
| let input = vec![ | |
| Token::If, | |
| Token::Ident("x".to_string()), | |
| Token::Less, | |
| Token::NumberLit("100".to_string()), | |
| Token::Then, | |
| Token::NumberLit("100".to_string()), | |
| Token::Else, | |
| Token::NumberLit("200".to_string()), | |
| Token::End, | |
| ]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| let ast_ref = Expression::If { | |
| condition: Box::new(Expression::FunctionCall { | |
| name: "__less".to_string(), | |
| args: vec![ | |
| Expression::Variable { | |
| name: "x".to_string(), | |
| }, | |
| Expression::IntConst { value: 100 }, | |
| ], | |
| }), | |
| then: Box::new(Expression::IntConst { value: 100 }), | |
| else_: Some(Box::new(Expression::IntConst { value: 200 })), | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| #[test] | |
| fn test_parse_expr_unary() { | |
| // -x | |
| let input = vec![Token::Minus, Token::Ident("x".to_string())]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| let ast_ref = Expression::FunctionCall { | |
| name: "__neg".to_string(), | |
| args: vec![Expression::Variable { | |
| name: "x".to_string(), | |
| }], | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| #[test] | |
| fn test_parse_expr_assignment() { | |
| // x = 100 | |
| let input = vec![ | |
| Token::Ident("x".to_string()), | |
| Token::Assign, | |
| Token::NumberLit("100".to_string()), | |
| ]; | |
| let mut parser = Parser::new(input); | |
| let ast = parser.parse_expression(1).unwrap(); | |
| dbg!(&ast); | |
| let ast_ref = Expression::Assignment { | |
| name: "x".to_string(), | |
| value: Box::new(Expression::IntConst { value: 100 }), | |
| }; | |
| assert!(ast == ast_ref); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment