Created
May 9, 2025 21:05
-
-
Save jeffersonmourak/0e59cf0da7a263e456918b3066c6ccb8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // tokens.swift | |
| // compilers | |
| // | |
| // Created by Jefferson Oliveira on 5/8/25. | |
| // | |
| struct TokenContext { | |
| var input: [Character] | |
| var cursor: Int | |
| var output: [Token] | |
| var stack: [Character] | |
| var parenthesisStartCursors: [Int] | |
| // Move the parser cursor to next character | |
| func next( | |
| _ context: TokenContext | |
| ) -> TokenContext { | |
| .init(context.input, context.cursor + 1, context.output, context.stack, context.parenthesisStartCursors) | |
| } | |
| func next() -> TokenContext { | |
| self.next(self) | |
| } | |
| func next( | |
| input: [Character]? = nil, | |
| cursor: Int? = nil, | |
| output: [Token]? = nil, | |
| stack: [Character]? = nil, | |
| staleParenthesis: [Int]? = nil | |
| ) -> TokenContext { | |
| self.next( | |
| self.mutate(input, cursor, output, stack, staleParenthesis) | |
| ) | |
| } | |
| // Mutates the current context with new data | |
| func mutate( | |
| _ input: [Character]? = nil, | |
| _ cursor: Int? = nil, | |
| _ output: [Token]? = nil, | |
| _ stack: [Character]? = nil, | |
| _ staleParenthesis: [Int]? = nil | |
| ) -> TokenContext { | |
| .init( | |
| input ?? self.input, | |
| cursor ?? self.cursor, | |
| output ?? self.output, | |
| stack ?? self.stack, | |
| staleParenthesis ?? self.parenthesisStartCursors | |
| ) | |
| } | |
| func mutate( | |
| input: [Character]? = nil, | |
| cursor: Int? = nil, | |
| output: [Token]? = nil, | |
| stack: [Character]? = nil, | |
| staleParenthesis: [Int]? = nil | |
| ) -> TokenContext { | |
| self.mutate(input, cursor, output, stack, staleParenthesis) | |
| } | |
| // initialization helpers | |
| init( | |
| _ input: [Character] = [], | |
| _ cursor: Int = 0, | |
| _ output: [Token] = [], | |
| _ stack: [Character] = [], | |
| _ staleParenthesis: [Int] = [] | |
| ) { | |
| self.input = input | |
| self.cursor = cursor | |
| self.output = output | |
| self.stack = stack | |
| self.parenthesisStartCursors = staleParenthesis | |
| } | |
| init(_ input: [Character] = []) { | |
| self.init(input, 0, [], [], []) | |
| } | |
| } | |
| // MARK: Parsers type and definition | |
| // Categorizes the accepted token parsers | |
| enum ParserType: CaseIterable { | |
| case Numeral, Parenthesis, Operator, Space | |
| static func get(_ char: Character) -> (any ParseDefinition)? { | |
| switch ParserType.matchType(char) { | |
| case .Numeral: NumeralParser() | |
| case .Parenthesis: ParenthesisParser() | |
| case .Operator: OperatorParser() | |
| case .Space: SpaceParser() | |
| default: nil | |
| } | |
| } | |
| static func matchType(_ char: Character) -> ParserType? { | |
| for parser in ParserType.allCases { | |
| switch parser { | |
| case .Numeral: if NumeralParser.match(char) { return .Numeral }; continue | |
| case .Parenthesis: if ParenthesisParser.match(char) { return .Parenthesis }; continue | |
| case .Operator: if OperatorParser.match(char) { return .Operator }; continue | |
| case .Space: if SpaceParser.match(char) { return .Space }; continue | |
| } | |
| } | |
| return nil | |
| } | |
| } | |
| // Protocol for a Parser struct | |
| protocol ParseDefinition { | |
| var matchType: ParserType { get } | |
| static func match(_ char: Character) -> Bool | |
| func assert(_ context: TokenContext) throws -> Void | |
| func parse(_ context: TokenContext) -> TokenContext | |
| } | |
| // MARK: Space token parser | |
| struct SpaceParser: ParseDefinition { | |
| var matchType: ParserType { get { .Space } } | |
| static func match(_ char: Character) -> Bool { char == " " } | |
| func assert(_ context: TokenContext) throws {} | |
| func parse(_ context: TokenContext) -> TokenContext { context } | |
| } | |
| // MARK: Numeral parser | |
| struct NumeralParser: ParseDefinition { | |
| var matchType: ParserType { get { .Numeral } } | |
| static func match(_ char: Character) -> Bool { | |
| return String(char).firstMatch(of: /[0-9]/) != nil | |
| } | |
| func assert(_ context: TokenContext) throws {} | |
| func parse(_ context: TokenContext) -> TokenContext { | |
| var cursor = context.cursor | |
| var numberString = "\(context.input[cursor])" | |
| var output = context.output | |
| while cursor + 1 < context.input.count | |
| && Number(String(context.input[cursor + 1])) != nil | |
| { | |
| numberString = "\(numberString)\(context.input[cursor + 1])" | |
| cursor += 1 | |
| } | |
| output.append(.ValueToken(Number(numberString)!)) | |
| return context.mutate(cursor: cursor, output: output) | |
| } | |
| } | |
| // MARK: Parenthesis parser | |
| struct ParenthesisParser: ParseDefinition { | |
| var matchType: ParserType { get { .Parenthesis } } | |
| static func match(_ char: Character) -> Bool { | |
| return char == "(" || char == ")" | |
| } | |
| func assert(_ context: TokenContext) throws { | |
| let cursor = context.cursor | |
| let char = context.input[cursor] | |
| if char == "(" { | |
| if cursor > 0 { | |
| let offset = skipSpaces(context.input, from: cursor, to: 0) | |
| // check if previous non white-space character is a number | |
| if Number(String(context.input[cursor - offset])) != nil { | |
| // Opening parenthesis must come after an operation or begin of input | |
| throw TokenParsingError(cursor: [cursor - offset], input: context.input) | |
| } | |
| } | |
| return | |
| } | |
| // Check if there are no parenthesis opened | |
| if context.parenthesisStartCursors.count == 0 { | |
| // Cannot close a parenthesis when there's nothing to close | |
| throw TokenParsingError(cursor: [cursor], input: context.input) | |
| } | |
| if cursor < context.input.count - 1 { | |
| let offset = skipSpaces(context.input, from: cursor, to: context.input.count - 1) | |
| // check if next non white-space character is a number | |
| if Number(String(context.input[cursor + offset])) != nil { | |
| // Closing parenthesis must come after an operation or begin of input | |
| throw TokenParsingError(cursor: [cursor + offset], input: context.input) | |
| } | |
| } | |
| } | |
| func parse(_ context: TokenContext) -> TokenContext { | |
| let cursor = context.cursor | |
| let char = context.input[cursor] | |
| var stack = context.stack | |
| var output = context.output | |
| var parenthesisStartCursors = context.parenthesisStartCursors | |
| if char == "(" { | |
| stack.append(char) | |
| parenthesisStartCursors.append(cursor) | |
| return context.mutate(cursor: cursor, stack: stack, staleParenthesis: parenthesisStartCursors) | |
| } | |
| while stack.count != 0 && stack[stack.count - 1] != "(" { | |
| let op = Operator.getOperator(stack.popLast()!)! | |
| output.append(.OperatorToken(op)) | |
| } | |
| _ = stack.popLast() | |
| _ = parenthesisStartCursors.popLast() | |
| return context.mutate(cursor: cursor, output: output, stack: stack, staleParenthesis: parenthesisStartCursors) | |
| } | |
| } | |
| // MARK: Operator parser | |
| struct OperatorParser: ParseDefinition { | |
| var matchType: ParserType { get { .Operator } } | |
| static func match(_ char: Character) -> Bool { | |
| return Operator.getOperator(char) != nil | |
| } | |
| func assert(_ context: TokenContext) throws { | |
| let cursor = context.cursor | |
| if cursor == 0 || cursor == context.input.count - 1 { | |
| // Operators cannot be placed to begin or end of an input | |
| throw TokenParsingError(cursor: [cursor], input: context.input) | |
| } | |
| if cursor > 0 { | |
| let offset = skipSpaces(context.input, from: cursor, to: 0) | |
| let prevChar = context.input[cursor - offset] | |
| // checks if previous non white-space char is not an parenthesis closing neither a number | |
| if prevChar != ")" && Number(String(context.input[cursor - offset])) == nil { | |
| // Operators can only be placed between numbers or parenthesis | |
| throw TokenParsingError(cursor: [cursor], input: context.input) | |
| } | |
| } else if cursor < context.input.count { | |
| let offset = skipSpaces(context.input, from: cursor, to: context.input.count - 1) | |
| let nextChar = context.input[cursor + offset] | |
| // checks if previous non white-space char is not an parenthesis closing neither a number | |
| if nextChar != "(" && Number(String(context.input[cursor + offset])) == nil { | |
| // Operators can only be placed between numbers or parenthesis | |
| throw TokenParsingError(cursor: [cursor], input: context.input) | |
| } | |
| } | |
| } | |
| func parse(_ context: TokenContext) -> TokenContext { | |
| let cursor = context.cursor | |
| let char = context.input[cursor] | |
| var stack = context.stack | |
| var output = context.output | |
| while ( | |
| stack.count != 0 && | |
| Operator.getOperator(stack[stack.count - 1])?.getPrecedence() ?? 0 >= Operator.getOperator(char)?.getPrecedence() ?? 0 | |
| ) | |
| { | |
| output.append( | |
| .OperatorToken( | |
| Operator.getOperator(stack.popLast()!)! | |
| ) | |
| ) | |
| } | |
| stack.append(char) | |
| return context.mutate(cursor: cursor, output: output, stack: stack) | |
| } | |
| } | |
| struct TokenParsingError: Error { | |
| let cursor: [Int] | |
| let input: [Character] | |
| } | |
| func parseTokens(_ input: [Character], disableSpace: Bool = false, multipleErrors: Bool = false) throws -> [Token] { | |
| var context: TokenContext = .init(input) | |
| var errorCursors: [Int] = [] | |
| // Look all the tokens in the list | |
| while context.cursor < input.count { | |
| // gets what kind of parser matches the current token | |
| let parser = ParserType.get(input[context.cursor]) | |
| // Fails if no parser found or white-space when enabled | |
| if (disableSpace && parser?.matchType == .Space) || parser == nil { | |
| errorCursors.append(context.cursor) | |
| if !multipleErrors { break } | |
| } | |
| // Asserts the token is well placed | |
| try parser!.assert(context) | |
| // Updates the context with new data from the parser or just next cursor | |
| context = parser != nil ? parser!.parse(context).next() : context.next() | |
| } | |
| // Append any leftover parenthesis cursor that hasn't been closed | |
| errorCursors.append(contentsOf: context.parenthesisStartCursors) | |
| if errorCursors.count > 0 { | |
| // Throw all errors at once! | |
| throw TokenParsingError(cursor: errorCursors, input: input) | |
| } | |
| // joins the rest of the stack reversed with the output | |
| return context.output + context.stack.reversed().map { .OperatorToken(Operator.getOperator($0)!) } | |
| } | |
| let test = [Character]("3 + 4 * ((25 * 2) + (33 / 3))") | |
| let result = try parseTokens(test, multipleErrors: true) | |
| print(result) // [3, 4, 25, 2, *, 33, 3, /, +, *, +] | |
| /* | |
| ###### TESTS ###### | |
| (PASS) "1 + (3 * 5)" | |
| (PASS) "2 * (15 + 5)" | |
| (PASS) "(15 + 5) * 2" | |
| (PASS) "(15 + 5) * (1 * 2)" | |
| (PASS) "+ (3 * 5)" -> failed as expected | |
| (PASS) "1 + 3(3 * 5)" -> failed as expected | |
| (PASS) "1 + (3 * 5" -> failed as expected | |
| (PASS) "1 + 3 * 5)" -> failed as expected | |
| (PASS) "1 + 3 ** 5)" -> failed as expected | |
| (PASS) "((15 + 5) * (1 * 2)) * 3 + 4 * ((25 * 2) + (33 / 3))" | |
| (PASS) "(1 + 3 * 5)" -> failed as expected | |
| (PASS) "(1+3*5)" | |
| (PASS) "1+2" | |
| (PASS) "3+4*2" | |
| (PASS) "10+20" | |
| (PASS) "2^3" | |
| (FAIL) "2^3^2" -> Got "64" when expecting "512" | |
| (PASS) "((3))" | |
| (PASS) "(1+2)*3" | |
| (PASS) "(1+2)*(3+4)" | |
| (PASS) "((1+2)+3)" | |
| (PASS) "3+7/(2+3)*2" | |
| (PASS) "3 + 4 * 2 / (1 - 5)^2^3" | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment