Skip to content

Instantly share code, notes, and snippets.

@hdf
Last active March 16, 2026 06:54
Show Gist options
  • Select an option

  • Save hdf/6ecfa4754ddce29d8764ea67b4a98e62 to your computer and use it in GitHub Desktop.

Select an option

Save hdf/6ecfa4754ddce29d8764ea67b4a98e62 to your computer and use it in GitHub Desktop.
Small C# class for Where condition string tokenizing. Maybe later parse into a predicate?
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Text.Encodings.Web;
using System.Text;
[JsonSerializable(typeof(List<Dictionary<string, string>>))]
internal partial class SourceGenerationContext : JsonSerializerContext { }
public class Program
{
public static class Tokenizer
{
public enum TokenType
{
ParenthesisOpen,
ParenthesisClose,
BracketOpen,
BracketClose,
QuoteBegin,
QuoteEnd,
VariableName,
ConstantValue,
Equal,
NotEqual,
GreaterThan,
LessThan,
GreaterThanOrEqual,
LessThanOrEqual,
In,
NotIn,
And,
Or
}
#region Private Constants
private static readonly Dictionary<TokenType, TokenType[]> AllowedNextTokens = new Dictionary<TokenType, TokenType[]>
{
{ TokenType.ParenthesisOpen, new[] { TokenType.VariableName, TokenType.ParenthesisOpen } },
{ TokenType.ParenthesisClose, new[] { TokenType.And, TokenType.Or, TokenType.ParenthesisClose } },
{ TokenType.BracketOpen, new[] { TokenType.QuoteBegin, TokenType.ConstantValue } },
{ TokenType.BracketClose, new[] { TokenType.And, TokenType.Or, TokenType.ParenthesisClose } },
{ TokenType.QuoteBegin, new[] { TokenType.ConstantValue } },
{ TokenType.QuoteEnd, new[] { TokenType.And, TokenType.Or, TokenType.ParenthesisClose } },
{ TokenType.VariableName, new[] { TokenType.Equal, TokenType.NotEqual, TokenType.GreaterThan, TokenType.LessThan, TokenType.GreaterThanOrEqual, TokenType.LessThanOrEqual, TokenType.In, TokenType.NotIn } },
{ TokenType.ConstantValue, new[] { TokenType.And, TokenType.Or, TokenType.QuoteEnd, TokenType.BracketClose, TokenType.ParenthesisClose } },
{ TokenType.Equal, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.NotEqual, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.GreaterThan, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.LessThan, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.GreaterThanOrEqual, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.LessThanOrEqual, new[] { TokenType.QuoteBegin, TokenType.VariableName, TokenType.ConstantValue } },
{ TokenType.In, new[] { TokenType.BracketOpen } },
{ TokenType.NotIn, new[] { TokenType.BracketOpen } },
{ TokenType.And, new[] { TokenType.ParenthesisOpen, TokenType.VariableName } },
{ TokenType.Or, new[] { TokenType.ParenthesisOpen, TokenType.VariableName } }
};
private static readonly HashSet<TokenType> InitialTokens = new HashSet<TokenType>
{
TokenType.ParenthesisOpen,
TokenType.And,
TokenType.Or
};
private static readonly HashSet<TokenType> OperatorTokens = new HashSet<TokenType>
{
TokenType.Equal,
TokenType.NotEqual,
TokenType.GreaterThan,
TokenType.LessThan,
TokenType.GreaterThanOrEqual,
TokenType.LessThanOrEqual,
TokenType.In,
TokenType.NotIn
};
private static readonly HashSet<TokenType> BeforeOperatorTokens = new HashSet<TokenType>
{
TokenType.VariableName,
TokenType.ConstantValue,
TokenType.QuoteEnd,
TokenType.BracketClose,
TokenType.ParenthesisClose,
TokenType.And,
TokenType.Or
};
private static readonly Dictionary<string, TokenType> TokenMap = new Dictionary<string, TokenType>
{
{ "equal", TokenType.Equal },
{ "notequal", TokenType.NotEqual },
{ "greaterthan", TokenType.GreaterThan },
{ "lessthan", TokenType.LessThan },
{ "greaterthanorequal", TokenType.GreaterThanOrEqual },
{ "lessthanorequal", TokenType.LessThanOrEqual },
{ "in", TokenType.In },
{ "notin", TokenType.NotIn },
{ "and", TokenType.And },
{ "or", TokenType.Or }
};
private static readonly Dictionary<TokenType, string> TokenToStringMap = new Dictionary<TokenType, string>{
{ TokenType.ParenthesisOpen, "(" },
{ TokenType.ParenthesisClose, ")" },
{ TokenType.BracketOpen, "[" },
{ TokenType.BracketClose, "]" },
{ TokenType.QuoteBegin, "'" },
{ TokenType.QuoteEnd, "'" },
{ TokenType.Equal, " equal " },
{ TokenType.NotEqual, " notequal " },
{ TokenType.GreaterThan, " greaterthan " },
{ TokenType.LessThan, " lessthan " },
{ TokenType.GreaterThanOrEqual, " greaterthanorequal " },
{ TokenType.LessThanOrEqual, " lessthanorequal " },
{ TokenType.In, " in " },
{ TokenType.NotIn, " notin " },
{ TokenType.And, " and " },
{ TokenType.Or, " or " }
};
#endregion
#region Private Methods
private static bool IsValidVariableName(string token)
{
if (string.IsNullOrEmpty(token))
{
return false;
}
char first = token[0];
bool isFirstValid = (first >= 'a' && first <= 'z') || (first >= 'A' && first <= 'Z') || first == '_';
if (!isFirstValid)
{
return false;
}
for (int j = 1; j < token.Length; j++)
{
char c = token[j];
bool isValid = (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
if (!isValid)
{
return false;
}
}
return true;
}
private static int SkipOverQuoted(string str, int startIndex, string sectionEndStr = "]")
{
bool inQuote = false;
for (int i = startIndex; i < str.Length; i++)
{
if (str[i] == '\'')
{
// Két egymást követő aposztróf egy escaped idézőjel a string literálban.
if (inQuote && i + 1 < str.Length && str[i + 1] == '\'')
{
i++;
continue;
}
inQuote = !inQuote;
continue;
}
if (!inQuote && i + sectionEndStr.Length <= str.Length &&
string.CompareOrdinal(str, i, sectionEndStr, 0, sectionEndStr.Length) == 0)
{
return i;
}
}
if (inQuote)
{
throw new ArgumentException("Unmatched quote");
}
return -1;
}
private static int FindQuoteEnd(string str, int startIndex)
{
for (int i = startIndex; i < str.Length; i++)
{
if (str[i] != '\'')
{
continue;
}
// Két egymást követő aposztróf egy escaped idézőjel a string literálban.
if (i + 1 < str.Length && str[i + 1] == '\'')
{
i++;
continue;
}
return i;
}
throw new ArgumentException("Unmatched quote");
}
private static TokenType AddTokenToResultWithValidation(List<Tuple<TokenType, string>> result, TokenType tokenType, string tokenValue = "")
{
if (result.Count > 0)
{
var lastToken = result.Last();
if (!AllowedNextTokens.TryGetValue(lastToken.Item1, out var allowedNext) || !allowedNext.Contains(tokenType))
{
Console.WriteLine($"Current tokens: {SerializeTokens(result)}");
throw new ArgumentException($"Unexpected token: {tokenType} after {lastToken.Item1}!");
}
}
result.Add(Tuple.Create(tokenType, tokenValue));
return tokenType;
}
#endregion
#region Public Methods
public static List<Tuple<TokenType, string>> Tokenize(string str)
{
List<Tuple<TokenType, string>> result = new List<Tuple<TokenType, string>>();
int parenthesisDepth = 0;
TokenType? prevToken = null;
int i = 0;
while (i < str.Length)
{
char c = str[i];
if (char.IsWhiteSpace(c))
{
i++;
continue;
}
if (c == '(')
{
prevToken = AddTokenToResultWithValidation(result, TokenType.ParenthesisOpen);
parenthesisDepth++;
i++;
continue;
}
if (c == ')')
{
prevToken = AddTokenToResultWithValidation(result, TokenType.ParenthesisClose);
parenthesisDepth--;
i++;
continue;
}
if (c == '\'')
{
prevToken = AddTokenToResultWithValidation(result, TokenType.QuoteBegin);
i++;
int quoteEndIndex = FindQuoteEnd(str, i);
string value = str.Substring(i, quoteEndIndex - i);
prevToken = AddTokenToResultWithValidation(result, TokenType.ConstantValue, value);
prevToken = AddTokenToResultWithValidation(result, TokenType.QuoteEnd);
i = quoteEndIndex + 1;
continue;
}
if (c == '[')
{
prevToken = AddTokenToResultWithValidation(result, TokenType.BracketOpen);
i++;
int bracketEndIndex = SkipOverQuoted(str, i);
if (bracketEndIndex == -1)
{
throw new ArgumentException("Unmatched bracket");
}
string value = str.Substring(i, bracketEndIndex - i);
prevToken = AddTokenToResultWithValidation(result, TokenType.ConstantValue, value);
prevToken = AddTokenToResultWithValidation(result, TokenType.BracketClose);
i = bracketEndIndex + 1;
continue;
}
if (prevToken == null || InitialTokens.Contains(prevToken.Value)) // Változó név
{
int nextSpaceIndex = str.IndexOf(' ', i);
if (nextSpaceIndex == -1)
{
throw new ArgumentException("Unexpected end of input after variable!");
}
string tokenStr = str.Substring(i, nextSpaceIndex - i);
if (!IsValidVariableName(tokenStr))
{
throw new ArgumentException($"Invalid variable name: {tokenStr}");
}
prevToken = AddTokenToResultWithValidation(result, TokenType.VariableName, tokenStr);
i = nextSpaceIndex + 1;
continue;
}
else if (BeforeOperatorTokens.Contains(prevToken.Value)) // Operátor
{
int nextSpaceIndex = str.IndexOf(' ', i);
if (nextSpaceIndex == -1)
{
throw new ArgumentException("Unexpected end of input after operator!");
}
string tokenStr = str.Substring(i, nextSpaceIndex - i).ToLower();
if (!TokenMap.TryGetValue(tokenStr, out var tokenType))
{
throw new ArgumentException($"Unknown operator: {tokenStr}");
}
prevToken = AddTokenToResultWithValidation(result, tokenType);
i = nextSpaceIndex + 1;
continue;
}
else if (OperatorTokens.Contains(prevToken.Value)) // Operátor után szám (vagy null). (A string és tömb konstans értékek máshol vannak kezelve.)
{
int nextSpaceIndex = str.IndexOf(' ', i);
if (nextSpaceIndex == -1)
{
nextSpaceIndex = str.Length;
}
bool delimitedByParenthesis = false;
int nextParenthesisCloseIndex = str.IndexOf(')', i);
if (nextParenthesisCloseIndex != -1 && nextParenthesisCloseIndex < nextSpaceIndex)
{
nextSpaceIndex = nextParenthesisCloseIndex;
delimitedByParenthesis = true;
}
string tokenStr = str.Substring(i, nextSpaceIndex - i);
if (tokenStr.Equals("null", StringComparison.OrdinalIgnoreCase))
{
tokenStr = "null";
}
else if (!decimal.TryParse(tokenStr, System.Globalization.CultureInfo.InvariantCulture, out _))
{
throw new ArgumentException($"Invalid number: {tokenStr}");
}
prevToken = AddTokenToResultWithValidation(result, TokenType.ConstantValue, tokenStr);
i = delimitedByParenthesis ? nextSpaceIndex : nextSpaceIndex + 1;
continue;
}
Console.WriteLine($"Current tokens: {SerializeTokens(result)}");
throw new ArgumentException($"Unexpected character: '{c}' at position {i}");
}
if (parenthesisDepth != 0)
{
throw new ArgumentException("Unmatched parenthesis");
}
return result;
}
public static string TokensToString(List<Tuple<TokenType, string>> tokens)
{
StringBuilder sb = new StringBuilder();
foreach (var token in tokens)
{
if (token.Item1 == TokenType.VariableName || token.Item1 == TokenType.ConstantValue)
{
sb.Append(token.Item2);
}
else if (TokenToStringMap.TryGetValue(token.Item1, out var tokenStr))
{
sb.Append(tokenStr);
}
else
{
throw new Exception($"No string mapping for token type: {token.Item1}");
}
}
return sb.ToString();
}
#endregion
#region Serialization
private static readonly SourceGenerationContext _serializationContext = new SourceGenerationContext(
new JsonSerializerOptions
{
WriteIndented = true,
Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping
});
public static string SerializeTokens(List<Tuple<TokenType, string>> tokens)
{
List<Dictionary<string, string>> shapedTokens = tokens
.Select(token => new Dictionary<string, string> { [token.Item1.ToString()] = token.Item2 })
.ToList();
return JsonSerializer.Serialize(shapedTokens, typeof(List<Dictionary<string, string>>), _serializationContext);
}
#endregion
}
public static void Main(string[] args)
{
string original = "((region in ['Nor]''th]Zone','South','A''B'] and years_experience lessthanorequal 1.5) or (_root equal 1 and score greaterthan 90)) and (name equal 'O]'']Connor' and level lessthan 10) and (department notin [HR,Finance , 'Legal' ] or (title notequal 'Intern' and _temp1 greaterthanorequal -5)) or nothing equal null or nothing notequal ''";
var tokens = Tokenizer.Tokenize(original);
Console.WriteLine($"Tokens: {Tokenizer.SerializeTokens(tokens)}");
string generated = Tokenizer.TokensToString(tokens);
Console.WriteLine($"Tokens as String: {generated}");
Console.WriteLine($"Original and recreated strings {(original != generated ? "DON'T " : "")}MATCH!");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment