Skip to content

Instantly share code, notes, and snippets.

@jeffersonmourak
Created May 9, 2025 21:05
Show Gist options
  • Select an option

  • Save jeffersonmourak/0e59cf0da7a263e456918b3066c6ccb8 to your computer and use it in GitHub Desktop.

Select an option

Save jeffersonmourak/0e59cf0da7a263e456918b3066c6ccb8 to your computer and use it in GitHub Desktop.
//
// tokens.swift
// compilers
//
// Created by Jefferson Oliveira on 5/8/25.
//
struct TokenContext {
var input: [Character]
var cursor: Int
var output: [Token]
var stack: [Character]
var parenthesisStartCursors: [Int]
// Move the parser cursor to next character
func next(
_ context: TokenContext
) -> TokenContext {
.init(context.input, context.cursor + 1, context.output, context.stack, context.parenthesisStartCursors)
}
func next() -> TokenContext {
self.next(self)
}
func next(
input: [Character]? = nil,
cursor: Int? = nil,
output: [Token]? = nil,
stack: [Character]? = nil,
staleParenthesis: [Int]? = nil
) -> TokenContext {
self.next(
self.mutate(input, cursor, output, stack, staleParenthesis)
)
}
// Mutates the current context with new data
func mutate(
_ input: [Character]? = nil,
_ cursor: Int? = nil,
_ output: [Token]? = nil,
_ stack: [Character]? = nil,
_ staleParenthesis: [Int]? = nil
) -> TokenContext {
.init(
input ?? self.input,
cursor ?? self.cursor,
output ?? self.output,
stack ?? self.stack,
staleParenthesis ?? self.parenthesisStartCursors
)
}
func mutate(
input: [Character]? = nil,
cursor: Int? = nil,
output: [Token]? = nil,
stack: [Character]? = nil,
staleParenthesis: [Int]? = nil
) -> TokenContext {
self.mutate(input, cursor, output, stack, staleParenthesis)
}
// initialization helpers
init(
_ input: [Character] = [],
_ cursor: Int = 0,
_ output: [Token] = [],
_ stack: [Character] = [],
_ staleParenthesis: [Int] = []
) {
self.input = input
self.cursor = cursor
self.output = output
self.stack = stack
self.parenthesisStartCursors = staleParenthesis
}
init(_ input: [Character] = []) {
self.init(input, 0, [], [], [])
}
}
// MARK: Parsers type and definition
// Categorizes the accepted token parsers
enum ParserType: CaseIterable {
case Numeral, Parenthesis, Operator, Space
static func get(_ char: Character) -> (any ParseDefinition)? {
switch ParserType.matchType(char) {
case .Numeral: NumeralParser()
case .Parenthesis: ParenthesisParser()
case .Operator: OperatorParser()
case .Space: SpaceParser()
default: nil
}
}
static func matchType(_ char: Character) -> ParserType? {
for parser in ParserType.allCases {
switch parser {
case .Numeral: if NumeralParser.match(char) { return .Numeral }; continue
case .Parenthesis: if ParenthesisParser.match(char) { return .Parenthesis }; continue
case .Operator: if OperatorParser.match(char) { return .Operator }; continue
case .Space: if SpaceParser.match(char) { return .Space }; continue
}
}
return nil
}
}
// Protocol for a Parser struct
protocol ParseDefinition {
var matchType: ParserType { get }
static func match(_ char: Character) -> Bool
func assert(_ context: TokenContext) throws -> Void
func parse(_ context: TokenContext) -> TokenContext
}
// MARK: Space token parser
struct SpaceParser: ParseDefinition {
var matchType: ParserType { get { .Space } }
static func match(_ char: Character) -> Bool { char == " " }
func assert(_ context: TokenContext) throws {}
func parse(_ context: TokenContext) -> TokenContext { context }
}
// MARK: Numeral parser
struct NumeralParser: ParseDefinition {
var matchType: ParserType { get { .Numeral } }
static func match(_ char: Character) -> Bool {
return String(char).firstMatch(of: /[0-9]/) != nil
}
func assert(_ context: TokenContext) throws {}
func parse(_ context: TokenContext) -> TokenContext {
var cursor = context.cursor
var numberString = "\(context.input[cursor])"
var output = context.output
while cursor + 1 < context.input.count
&& Number(String(context.input[cursor + 1])) != nil
{
numberString = "\(numberString)\(context.input[cursor + 1])"
cursor += 1
}
output.append(.ValueToken(Number(numberString)!))
return context.mutate(cursor: cursor, output: output)
}
}
// MARK: Parenthesis parser
struct ParenthesisParser: ParseDefinition {
var matchType: ParserType { get { .Parenthesis } }
static func match(_ char: Character) -> Bool {
return char == "(" || char == ")"
}
func assert(_ context: TokenContext) throws {
let cursor = context.cursor
let char = context.input[cursor]
if char == "(" {
if cursor > 0 {
let offset = skipSpaces(context.input, from: cursor, to: 0)
// check if previous non white-space character is a number
if Number(String(context.input[cursor - offset])) != nil {
// Opening parenthesis must come after an operation or begin of input
throw TokenParsingError(cursor: [cursor - offset], input: context.input)
}
}
return
}
// Check if there are no parenthesis opened
if context.parenthesisStartCursors.count == 0 {
// Cannot close a parenthesis when there's nothing to close
throw TokenParsingError(cursor: [cursor], input: context.input)
}
if cursor < context.input.count - 1 {
let offset = skipSpaces(context.input, from: cursor, to: context.input.count - 1)
// check if next non white-space character is a number
if Number(String(context.input[cursor + offset])) != nil {
// Closing parenthesis must come after an operation or begin of input
throw TokenParsingError(cursor: [cursor + offset], input: context.input)
}
}
}
func parse(_ context: TokenContext) -> TokenContext {
let cursor = context.cursor
let char = context.input[cursor]
var stack = context.stack
var output = context.output
var parenthesisStartCursors = context.parenthesisStartCursors
if char == "(" {
stack.append(char)
parenthesisStartCursors.append(cursor)
return context.mutate(cursor: cursor, stack: stack, staleParenthesis: parenthesisStartCursors)
}
while stack.count != 0 && stack[stack.count - 1] != "(" {
let op = Operator.getOperator(stack.popLast()!)!
output.append(.OperatorToken(op))
}
_ = stack.popLast()
_ = parenthesisStartCursors.popLast()
return context.mutate(cursor: cursor, output: output, stack: stack, staleParenthesis: parenthesisStartCursors)
}
}
// MARK: Operator parser
struct OperatorParser: ParseDefinition {
var matchType: ParserType { get { .Operator } }
static func match(_ char: Character) -> Bool {
return Operator.getOperator(char) != nil
}
func assert(_ context: TokenContext) throws {
let cursor = context.cursor
if cursor == 0 || cursor == context.input.count - 1 {
// Operators cannot be placed to begin or end of an input
throw TokenParsingError(cursor: [cursor], input: context.input)
}
if cursor > 0 {
let offset = skipSpaces(context.input, from: cursor, to: 0)
let prevChar = context.input[cursor - offset]
// checks if previous non white-space char is not an parenthesis closing neither a number
if prevChar != ")" && Number(String(context.input[cursor - offset])) == nil {
// Operators can only be placed between numbers or parenthesis
throw TokenParsingError(cursor: [cursor], input: context.input)
}
} else if cursor < context.input.count {
let offset = skipSpaces(context.input, from: cursor, to: context.input.count - 1)
let nextChar = context.input[cursor + offset]
// checks if previous non white-space char is not an parenthesis closing neither a number
if nextChar != "(" && Number(String(context.input[cursor + offset])) == nil {
// Operators can only be placed between numbers or parenthesis
throw TokenParsingError(cursor: [cursor], input: context.input)
}
}
}
func parse(_ context: TokenContext) -> TokenContext {
let cursor = context.cursor
let char = context.input[cursor]
var stack = context.stack
var output = context.output
while (
stack.count != 0 &&
Operator.getOperator(stack[stack.count - 1])?.getPrecedence() ?? 0 >= Operator.getOperator(char)?.getPrecedence() ?? 0
)
{
output.append(
.OperatorToken(
Operator.getOperator(stack.popLast()!)!
)
)
}
stack.append(char)
return context.mutate(cursor: cursor, output: output, stack: stack)
}
}
struct TokenParsingError: Error {
let cursor: [Int]
let input: [Character]
}
func parseTokens(_ input: [Character], disableSpace: Bool = false, multipleErrors: Bool = false) throws -> [Token] {
var context: TokenContext = .init(input)
var errorCursors: [Int] = []
// Look all the tokens in the list
while context.cursor < input.count {
// gets what kind of parser matches the current token
let parser = ParserType.get(input[context.cursor])
// Fails if no parser found or white-space when enabled
if (disableSpace && parser?.matchType == .Space) || parser == nil {
errorCursors.append(context.cursor)
if !multipleErrors { break }
}
// Asserts the token is well placed
try parser!.assert(context)
// Updates the context with new data from the parser or just next cursor
context = parser != nil ? parser!.parse(context).next() : context.next()
}
// Append any leftover parenthesis cursor that hasn't been closed
errorCursors.append(contentsOf: context.parenthesisStartCursors)
if errorCursors.count > 0 {
// Throw all errors at once!
throw TokenParsingError(cursor: errorCursors, input: input)
}
// joins the rest of the stack reversed with the output
return context.output + context.stack.reversed().map { .OperatorToken(Operator.getOperator($0)!) }
}
let test = [Character]("3 + 4 * ((25 * 2) + (33 / 3))")
let result = try parseTokens(test, multipleErrors: true)
print(result) // [3, 4, 25, 2, *, 33, 3, /, +, *, +]
/*
###### TESTS ######
(PASS) "1 + (3 * 5)"
(PASS) "2 * (15 + 5)"
(PASS) "(15 + 5) * 2"
(PASS) "(15 + 5) * (1 * 2)"
(PASS) "+ (3 * 5)" -> failed as expected
(PASS) "1 + 3(3 * 5)" -> failed as expected
(PASS) "1 + (3 * 5" -> failed as expected
(PASS) "1 + 3 * 5)" -> failed as expected
(PASS) "1 + 3 ** 5)" -> failed as expected
(PASS) "((15 + 5) * (1 * 2)) * 3 + 4 * ((25 * 2) + (33 / 3))"
(PASS) "(1 + 3 * 5)" -> failed as expected
(PASS) "(1+3*5)"
(PASS) "1+2"
(PASS) "3+4*2"
(PASS) "10+20"
(PASS) "2^3"
(FAIL) "2^3^2" -> Got "64" when expecting "512"
(PASS) "((3))"
(PASS) "(1+2)*3"
(PASS) "(1+2)*(3+4)"
(PASS) "((1+2)+3)"
(PASS) "3+7/(2+3)*2"
(PASS) "3 + 4 * 2 / (1 - 5)^2^3"
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment