Created
November 17, 2023 15:28
-
-
Save nobodywasishere/b18d196ed06e91d8afb72e41d1c8fdeb to your computer and use it in GitHub Desktop.
Work in progress EBNF language grammar for Crystal
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| document = expressions ; | |
| one_space = ' ' ; | |
| opt_space = { one_space } ; | |
| all_space = one_space, { one_space } ; | |
| one_newline = '\n' | '\r\n' | '\r' ; | |
| opt_newline = { one_newline } ; | |
| all_newline = one_newline, { one_newline } ; | |
| one_space_or_newline = one_space | one_newline ; | |
| opt_space_or_newline = { one_space_or_newline } ; | |
| all_space_or_newline = one_space_or_newline, { one_space_or_newline } ; | |
| one_semicolon_or_newline = ';' | one_newline ; | |
| opt_semicolon_or_newline = { one_semicolon_or_newline } ; | |
| all_semicolon_or_newline = one_semicolon_or_newline, { one_semicolon_or_newline } ; | |
| statement_end = opt_space, one_semicolon_or_newline ; | |
| expressions = { multi_assign, statement_end } ; | |
| multi_assign = expression ; | |
| (* todo: limit to assign or call *) | |
| (* | | |
| ( | |
| { multi_assign_part, ',' }, opt_space_or_newline, | |
| [ '*', multi_assign_part, ',' ], opt_space_or_newline, | |
| { multi_assign_part, ',' }, opt_space_or_newline, | |
| [ multi_assign_part ] | |
| ) ; *) | |
| multi_assign_part = opt_space, expression, opt_space ; | |
| expression = assign, opt_space, [ expression_suffix ] ; | |
| (* todo *) | |
| expression_suffix = expression_suffix_if | expression_suffix_unless | expression_suffix_rescue | expression_suffix_ensure ; | |
| expression_suffix_if = "if", all_space, assign_no_control ; | |
| expression_suffix_unless = "unless", all_space, assign_no_control ; | |
| expression_suffix_rescue = "rescue", all_space, assign_no_control ; | |
| expression_suffix_ensure = "ensure", all_space, assign_no_control ; | |
| (* todo *) | |
| assign = question_colon, | |
| [ | |
| opt_space, | |
| ( assignment_operator, all_space_or_newline, assign_no_control ) | | |
| ( '=', all_space_or_newline, bare_proc_type | assign_no_control ) (* todo: differentiate '[]=' operator *) | |
| ] ; | |
| assignment_operator = | |
| '+=' | '-=' | '*=' | '/=' | '//=' | '%=' | '&=' | '^=' | | |
| '**=' | '<<=' | '>>=' | '||=' | '&&=' | '&+=' | '&-=' | '&*=' ; | |
| question_colon = range, | |
| { | |
| all_space, | |
| "?", all_space_or_newline, question_colon, all_space_or_newline, | |
| ":", all_space_or_newline, question_colon | |
| } ; | |
| (* todo: the precedence of this may be wrong, as the parser checks manually for end tokens *) | |
| range = [ range_operator ], infix_or, { range_operator, infix_or } ; | |
| range_operator = '..' | '...' ; | |
| infix_or = infix_and, [ opt_space, "||", opt_space_or_newline, infix_and ] ; | |
| infix_and = infix_equality, [ opt_space, "&&", opt_space_or_newline, infix_equality ] ; | |
| infix_equality = infix_cmp, [ opt_space, ( "<" | "<=" | ">" | ">=" | "<=>" ), opt_space_or_newline, infix_cmp ] ; | |
| infix_cmp = logical_or, [ opt_space, ( "==" | "!=" | "=~" | "!~" | "===" ), opt_space_or_newline, logical_or ] ; | |
| logical_or = logical_and, [ opt_space, ( "|", "^" ), opt_space_or_newline, logical_and ] ; | |
| logical_and = shift, [ opt_space, "&", opt_space_or_newline, shift ] ; | |
| shift = add_or_sub, [ opt_space, ( "<<" | ">>" ), opt_space_or_newline, add_or_sub ] ; | |
| (* todo: some number stuff *) | |
| add_or_sub = mul_or_div, [ opt_space, ( "+", "-", "&+", "&-" ), opt_space_or_newline, mul_or_div ] ; | |
| mul_or_div = power, [ opt_space, ( "*", "/", "//", "%", "&*" ), opt_space_or_newline, power ] ; | |
| power = prefix, [ opt_space, ( "**", "&**" ), opt_space_or_newline, prefix ] ; | |
| prefix = ( ( "!" | "+" | "-" | "~" | "&+" | "&-" ), opt_space_or_newline, prefix ) | atomic_with_method ; | |
| (* todo: newline chaining can't happen for class/module/enum/fun/def *) | |
| atomic_with_method = atomic, [ opt_space, atomic_method_suffix ] ; | |
| (* todo: stuff *) | |
| atomic_method_suffix = opt_space_or_newline, ( is_a | as | as_question | responds_to | nil_question ) ; | |
| single_arg = [ '*', opt_space, assign_no_control ] ; | |
| is_a = '.is_a?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
| as = '.as', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
| as_question = '.as?', all_space, union_type | ( '(', opt_space_or_newline, bare_proc_type, opt_space_or_newline, ')' ) ; | |
| responds_to = '.responds_to?', all_space, symbol | ( '(', opt_space_or_newline, symbol, opt_space_or_newline, ')' ) ; | |
| nil_question = '.nil?', [ '(', opt_space_or_newline, ')' ] ; | |
| union_type = atomic_type_with_suffix, opt_space, { '|', opt_space_or_newline, atomic_type_with_suffix } ; | |
| atomic_type_with_suffix = atomic_type, type_suffix ; | |
| type_suffix = | |
| { ( | |
| '?' | '*' | '**' | | |
| ( '.', opt_space_or_newline, 'class' ) | | |
| ( '[', opt_space_or_newline, type_arg, opt_space_or_newline, ']' ) | |
| ), opt_space } ; | |
| type_arg = number | sizeof | instance_sizeof | offsetof | union_type ; | |
| atomic = parenthesized_expression | empty_array_literal | array_literal | hash_or_tuple_literal | percent_macro_expression | percent_macro_control | generic_or_global_call | fun_literal | annotation | number | char | delimiter | string_array | symbol_array | symbol | dollar_tilde | dollar_question | global_match_data_index | magic | identifier | constant | instance_var | class_var | underscore ; | |
| parenthesized_expression = "(", opt_space_or_newline, { expression, statement_end }, [ expression ], ")" ; | |
| empty_array_literal = "[]", opt_space, "of", opt_space_or_newline, bare_proc_type ; | |
| array_literal = | |
| "[", opt_space_or_newline, | |
| { [ "*" ], opt_space_or_newline, assign_no_control, opt_space, ",", opt_space_or_newline }, | |
| [ [ "*" ], opt_space_or_newline, assign_no_control, opt_space_or_newline ], | |
| "]" ; | |
| hash_or_tuple_literal = "{", opt_space_or_newline, [ named_tuple (* todo: stuff *) ], "}" ; | |
| fun_literal = | |
| 'fun', all_space_or_newline, | |
| [ "(", opt_space_or_newline, | |
| { fun_literal_param, ',', opt_space_or_newline }, | |
| fun_literal_param, opt_space_or_newline, | |
| ")" ], opt_space_or_newline, | |
| [ ':', all_space_or_newline, bare_proc_type ], opt_space_or_newline, | |
| [ do_end_block | curly_block ] ; | |
| fun_literal_param = ident, opt_newline, [ all_space_or_newline, ":", all_space_or_newline, bare_proc_type ] ; | |
| do_end_block = 'do', statement_end, expressions, opt_space_or_newline, 'end' ; | |
| curly_block = '{', opt_space_or_newline, expressions, opt_space_or_newline, '}' ; | |
| assign_no_control = assign ; | |
| magic = '__LINE__' | '__FILE__' | '__DIR__' ; | |
| underscore = '_' ; | |
| identifier = keyword | var_or_call ; | |
| keyword = begin | nil | true | false | yield | yield_with_scope | abstract | def | macro | require | | |
| case | select | if | unless | include | extend | class | struct | module | enum | | |
| while | until | return | next | break | lib | fun_def | alias | pointerof | sizeof | | |
| instance_sizeof | offsetof | typeof | private | protected | asm | annotation ; | |
| (* todo: handle blocks correctly, parse type declarations *) | |
| var_or_call = ( "!" | is_a | as | as_question | responds_to | nil_question | "super" | "initialize" | "previous_def" | call_args ) ; | |
| (* keywords not allowed in method definitions *) | |
| keyword_def_disallowed = abstract | def | macro | require | include | extend | class | | |
| struct | module | enum | lib | fun_def | alias | annotation ; | |
| constant = generic_or_custom_literal ; | |
| atomic_type = "self" | "self?" | typeof | underscore | constant | named_type_args | union_types | proc_type_output (* todo: .op_lparen?? *) ; | |
| (* void_expression_keyword = ( break | next | return ), space, ":", space ; *) | |
| (* todo: next char not '=' *) | |
| symbol = ":", ( | |
| "+" | "-" | "*" | "**" | "/" | "//" | | |
| "==" | "===" | "=~" | "!=" | "!~" | "!" | | |
| "<" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | | |
| "&" | "&+" | "&-" | "&*" | "&**" | | |
| "|" | "^" | "~" | "%" | "[]=" | "[]?" | "[]" | | |
| string | | |
| ( ident_start, { ident_part }, [ '?' | '!' | '=' ] ) | |
| ) ; | |
| ident = ident_start, { ident_part } ; | |
| ident_start = ascii_letter | '_' | ord_0x9F ; | |
| ident_part = ident_start | ascii_digit ; | |
| global = "$", ident_start, { ident_part } ; | |
| ascii_letter = ascii_uppercase | | |
| "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | | |
| "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" ; | |
| ascii_uppercase = | |
| "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | | |
| "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" ; | |
| ascii_digit = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" | "0" ; | |
| ord_0x9F = ? all characterrs yield_with_scope ord > 0x9F ? ; (* todo *) | |
| binary_digit = "0" | "1" ; | |
| octal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" ; | |
| hex_digit = ascii_digit | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" ; | |
| (* todo: don't allow consecutive underscores or underscore end *) | |
| number = | |
| ( | |
| ( "0b", binary_digit, { binary_digit | '_' } ) | | |
| ( "0o", octal_digit, { octal_digit | '_' } ) | | |
| ( "0x", hex_digit, { hex_digit | '_' }, { number_suffix } ) | | |
| ( | |
| ascii_digit, { ascii_digit | '_' }, | |
| { '.', { ascii_digit } }, | |
| [ number_exponent ], | |
| { number_suffix } | |
| ) | |
| ) ; | |
| number_exponent = "e", ascii_digit, { ascii_digit } ; | |
| number_suffix = ( ( 'i' | 'u' ), ( '8' | '16' | '32' | '64' | '128' ) ) | ( 'f', '32' | '64' ) ; | |
| begin = "begin", statement_end, expressions, [ rescue, { rescue }, [ else ] ], [ ensure ], 'end' ; | |
| rescue = "rescue", all_space, [ identifier, [ all_space, ':', all_space, constant ] ], one_semicolon_or_newline, expressions, statement_end ; | |
| else = "else", statement_end, expressions ; | |
| ensure = "ensure", statement_end, expressions ; | |
| if = | |
| "if", all_space_or_newline, assign_no_control, statement_end, | |
| expressions, | |
| { elsif, statement_end }, | |
| [ else, statement_end ], | |
| 'end'; | |
| elsif = | |
| "elsif", all_space_or_newline, | |
| assign_no_control ; | |
| unless = | |
| "unless", all_space_or_newline, assign_no_control, statement_end, | |
| expressions, | |
| [ else, statement_end ], | |
| 'end'; | |
| while = "while", all_space_or_newline, while_body, statement_end ; | |
| until = "until", all_space_or_newline, while_body, statement_end ; | |
| while_body = assign_no_control, statement_end, expressions ; | |
| nil = 'nil' ; | |
| true = 'true' ; | |
| false = 'false' ; | |
| yield = call_args ; | |
| abstract = "abstract", all_space_or_newline, ( def | class | struct ) ; | |
| def = "def", opt_space_or_newline, def_or_macro_name, opt_space, [ ".", def_or_macro_name ], opt_space, | |
| [ "(", opt_space_or_newline, { param, ',' }, [ param ], ")" ], | |
| [ ":" ] ; | |
| def_or_macro_name = def_symbols | path | ( ident - pseudo_methods, [ '=' ] ) ; | |
| def_symbols = '`' | '<<' | '<' | '<=' | '==' | '===' | '!=' | '=~' | '!~' | | |
| '>>' | '>' | '>=' | '+' | '-' | '*' | '/' | '//' | '!' | '~' | '%' | '&' | '|' | '^' | '**' | | |
| '[]' | '[]?' | '[]=' | '<=>' | '&+' | '&-' | '&*' | '&*' ; | |
| pseudo_methods = 'is_a?' | 'as' | 'as?' | 'responds_to?' | 'nil?' ; | |
| def_op_name_disallowed = '!' ; | |
| module = "module", all_space_or_newline, path, all_space, [ type_vars ], statement_end, expressions, 'end' ; | |
| class = "class", class_body ; | |
| struct = "struct", class_body ; | |
| class_body = all_space_or_newline, path, opt_space, [ type_vars ], [ "<", all_space_or_newline, ( "self" | generic ) ], statement_end, expressions, 'end' ; | |
| annotation = "@[", opt_space, path, opt_space, [ "(", opt_space_or_newline, ( named_args | call_args ), ")" ], "]" ; | |
| enum = "enum", all_space_or_newline, path, opt_space, [ bare_proc_type ], statement_end, enum_body, 'end' ; | |
| enum_body = | |
| { | |
| ( const, opt_space, [ '=', opt_space_or_newline, logical_or ], statement_end ), | |
| ( [ 'private' | 'protected' ], ( def, macro ) ) | | |
| ( class_var, opt_space, '=', opt_space_or_newline, assign ) | | |
| percent_macro_expression | | |
| percent_macro_control | | |
| annotation | | |
| statement_end | |
| } ; | |
| type_args = | |
| "(", | |
| opt_space_or_newline, | |
| { | |
| named_type_args | | |
| ( | |
| [ type_arg, opt_space, ",", opt_space_or_newline ], | |
| type_arg, opt_space_or_newline | |
| ) | |
| }, | |
| ")" ; | |
| named_type_args = | |
| { ( ident | const | string ), ':', all_space, bare_proc_type, opt_space, ',' }, | |
| ( ident | const | string ), ':', all_space, bare_proc_type; | |
| (* macro = "macro", space_or_newline, def_name, *) | |
| path = "::", opt_space_or_newline, const, { "::", const } ; | |
| const = ascii_uppercase, { ident_part } ; | |
| require = "require", all_space, string ; | |
| case = "case", opt_space_or_newline, { ';', opt_space }, [ all_space, assign_no_control, statement_end ], | |
| { ( when | in ), statement_end }, [ else, statement_end ], 'end' ; | |
| select = "select", all_space, statement_end, { when, statement_end }, [ else, statement_end ], 'end' ; | |
| include = "include", all_space, include_body ; | |
| extend = "extend", all_space, include_body ; | |
| include_body = opt_space_or_newline, [ 'self', generic ]; | |
| alias = "alias", all_space_or_newline, path, opt_space, "=", opt_space_or_newline, bare_proc_type ; | |
| generic = [ "::" ], path, [ type_args ] ; | |
| instance_var = "@", ident; | |
| class_var = "@@", ident; | |
| return = "return", control_expression ; | |
| next = "next", control_expression ; | |
| break = "break", control_expression ; | |
| control_expression = [ call_args ]; | |
| lib = "lib", all_space_or_newline, path, statement_end, lib_body, 'end' ; | |
| lib_body = | |
| { | |
| annotation | | |
| alias | | |
| fun_def | | |
| type_def | | |
| c_struct | | |
| c_union | | |
| enum | | |
| ( const, opt_space_or_newline, '=', expression, statement_end ) | | |
| ( global, [ '=', opt_space, ( ident | const ) ], all_space, ':', all_space, bare_proc_type, statement_end ) | | |
| percent_macro_expression | | |
| percent_macro_control | |
| }; | |
| char = "'", ( ascii_letter | ascii_digit | " " ), "'" ; (* todo: improve *) | |
| dollar_tilde = "$~" ; | |
| dollar_question = "$?" ; | |
| private = "private", all_space, assign ; | |
| protected = "protected", all_space, assign ; | |
| (* todo: asm operands, can't find any examples *) | |
| asm = "asm", [ all_space ], | |
| "(", | |
| [ opt_space_or_newline ], string, [ opt_space_or_newline ], | |
| ")" ; | |
| pointerof = "pointerof", opt_space, "(", opt_space_or_newline, assign, opt_space_or_newline, ")" ; | |
| sizeof = "sizeof", opt_space, "(", opt_space_or_newline, bare_proc_type, opt_space_or_newline, ")" ; | |
| instance_sizeof = sizeof ; | |
| offsetof = "offsetof", opt_space, "(", opt_space_or_newline, | |
| bare_proc_type, opt_space, ',', opt_space_or_newline, | |
| ( number | instance_var ), opt_space_or_newline, ")" ; | |
| string = '"', ? characters, escaped if necessary ?, '"' ; | |
| heredoc = ( "<<-" | "<<~" ), const, ? too complex for EBNF ? ; | |
| yield_with_scope = "? TODO ?" ; | |
| macro = "? TODO ?" ; | |
| when = "? TODO ?" ; | |
| in = "? TODO ?" ; | |
| percent_macro_expression = "? TODO ?" ; | |
| percent_macro_control = "? TODO ?" ; | |
| fun_def = "? TODO ?" ; | |
| type_def = "? TODO ?" ; | |
| c_struct = "? TODO ?" ; | |
| c_union = "? TODO ?" ; | |
| call_args = "? TODO ?" ; | |
| type_vars = "? TODO ?" ; | |
| named_args = "? TODO ?" ; | |
| generic_or_custom_literal = "? TODO ?" ; | |
| typeof = "? TODO ?" ; | |
| bare_proc_type = "? TODO ?" ; | |
| named_tuple = "? TODO ?" ; | |
| union_types = "? TODO ?" ; | |
| proc_type_output = "? TODO ?" ; | |
| generic_or_global_call = "? TODO ?" ; | |
| delimiter = "? TODO ?" ; | |
| string_array = "? TODO ?" ; | |
| symbol_array = "? TODO ?" ; | |
| global_match_data_index = "? TODO ?" ; | |
| param = "? TODO ?" ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment