Created
May 6, 2025 00:51
-
-
Save arockwell/5ce76c4f464f8c2c299ef4a62b72ef56 to your computer and use it in GitHub Desktop.
Complete refactor of number parsing for lexer - mega-mind edition
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // BEFORE: Complex, nested conditionals with inconsistent flow | |
| // AFTER: State machine approach with clear phases | |
| fn number(&mut self, graphemes: &[&str]) { | |
| let token_col = self.column - 2; | |
| let mut number = graphemes[self.column - 2].to_string(); | |
| // Use an enum to track parsing state | |
| enum NumberState { | |
| Digits, // Consuming integer digits | |
| AfterDecimal, // After decimal point, consuming fraction | |
| AfterE, // After 'e' or 'E', looking for sign or digits | |
| AfterSign, // After +/- in exponent, must have digit | |
| ExponentDigits, // Consuming exponent digits | |
| } | |
| let mut state = NumberState::Digits; | |
| let mut is_float = false; | |
| // Process until we complete the number | |
| while self.column - 1 < graphemes.len() { | |
| let grapheme = graphemes[self.column - 1]; | |
| let c = grapheme.chars().next().unwrap_or('\0'); | |
| match state { | |
| NumberState::Digits => { | |
| if c.is_ascii_digit() { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| } else if grapheme == "." { | |
| is_float = true; | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::AfterDecimal; | |
| } else if grapheme == "e" || grapheme == "E" { | |
| is_float = true; | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::AfterE; | |
| } else if grapheme == "_" { | |
| // Skip underscores in digits | |
| self.column += 1; | |
| } else { | |
| // Not part of the number | |
| break; | |
| } | |
| }, | |
| NumberState::AfterDecimal => { | |
| if c.is_ascii_digit() { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| } else if grapheme == "e" || grapheme == "E" { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::AfterE; | |
| } else if grapheme == "_" { | |
| self.column += 1; | |
| } else if grapheme == "." { | |
| self.report_error( | |
| graphemes, | |
| "number may not contain multiple decimal points" | |
| ); | |
| return; | |
| } else { | |
| // Not part of the number | |
| break; | |
| } | |
| }, | |
| NumberState::AfterE => { | |
| if c.is_ascii_digit() { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::ExponentDigits; | |
| } else if grapheme == "+" || grapheme == "-" { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::AfterSign; | |
| } else { | |
| self.report_error( | |
| graphemes, | |
| "expected sign or digit after exponent marker" | |
| ); | |
| return; | |
| } | |
| }, | |
| NumberState::AfterSign => { | |
| if c.is_ascii_digit() { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| state = NumberState::ExponentDigits; | |
| } else { | |
| self.report_error( | |
| graphemes, | |
| "expected digit after exponent sign" | |
| ); | |
| return; | |
| } | |
| }, | |
| NumberState::ExponentDigits => { | |
| if c.is_ascii_digit() { | |
| number.push_str(grapheme); | |
| self.column += 1; | |
| } else if grapheme == "_" { | |
| self.column += 1; | |
| } else if grapheme == "e" || grapheme == "E" { | |
| self.report_error( | |
| graphemes, | |
| "number may not contain multiple exponents" | |
| ); | |
| return; | |
| } else { | |
| // Not part of the number | |
| break; | |
| } | |
| } | |
| } | |
| } | |
| // Validate final number state | |
| match state { | |
| NumberState::AfterDecimal if \!number.ends_with('.') => { | |
| // Valid - we have digits after decimal | |
| }, | |
| NumberState::AfterDecimal => { | |
| self.report_error( | |
| graphemes, | |
| "expected at least one digit after decimal point" | |
| ); | |
| return; | |
| }, | |
| NumberState::AfterE | NumberState::AfterSign => { | |
| self.report_error( | |
| graphemes, | |
| "incomplete exponent in number" | |
| ); | |
| return; | |
| }, | |
| _ => { | |
| // Other states are valid at end of number | |
| } | |
| } | |
| self.tokens.push(Token::new( | |
| if is_float { TokenType::Float } else { TokenType::Integer }, | |
| number, | |
| self.line, | |
| token_col, | |
| )); | |
| } | |
| // Helper for consistent error reporting | |
| fn report_error(&mut self, graphemes: &[&str], message: &str) { | |
| self.errors.push(LexError::new( | |
| graphemes.concat(), | |
| self.line, | |
| self.column - 1, | |
| message.to_owned(), | |
| )); | |
| } | |
| EOF < /dev/null |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment