Skip to content

Instantly share code, notes, and snippets.

@apemost
Created October 1, 2024 02:52
Show Gist options
  • Select an option

  • Save apemost/b64a3882716a269cad0f0ebe54edf085 to your computer and use it in GitHub Desktop.

Select an option

Save apemost/b64a3882716a269cad0f0ebe54edf085 to your computer and use it in GitHub Desktop.
AIP-160: Filtering example for Python
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from lark import Lark"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"parser = Lark.open(\"./filtering.lark\", rel_to=\".\", start=\"filter\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\ta\n",
" \n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tb\n",
" \n",
" AND\n",
" \n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tc\n",
" \n",
" AND\n",
" \n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\td\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"a b AND c AND d\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tNew\n",
" \n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tYork\n",
" \n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tGiants\n",
" \n",
" OR\n",
" \n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tYankees\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"New York Giants OR Yankees\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\ta\n",
" comparator\t<\n",
" arg\n",
" comparable\n",
" member\t10\n",
" \n",
" OR\n",
" \n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\ta\n",
" comparator\t>=\n",
" arg\n",
" comparable\n",
" member\t100\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"a < 10 OR a >= 100\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" NOT\n",
" \n",
" simple\n",
" composite\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\ta\n",
" \n",
" OR\n",
" \n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tb\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"NOT (a OR b)\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tfile\n",
" comparator\t:\n",
" arg\n",
" comparable\n",
" member\t\".java\"\n",
"\n"
]
}
],
"source": [
"print(parser.parse('-file:\".java\"').pretty())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\n",
" experiment\n",
" field\trollout\n",
" comparator\t<=\n",
" arg\n",
" comparable\n",
" function\n",
" name\tcohort\n",
" arglist\n",
" arg\n",
" comparable\n",
" member\n",
" request\n",
" field\tuser\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"experiment.rollout <= cohort(request.user)\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" function\n",
" name\tregex\n",
" arglist\n",
" arg\n",
" comparable\n",
" member\n",
" m\n",
" field\tkey\n",
" arg\n",
" comparable\n",
" member\t'^.*prod.*$'\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"regex(m.key, '^.*prod.*$')\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"filter\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" composite\n",
" expression\n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" function\n",
" name\tmsg\n",
" name\tendsWith\n",
" arglist\n",
" arg\n",
" comparable\n",
" member\t'world'\n",
" \n",
" AND\n",
" \n",
" sequence\n",
" factor\n",
" term\n",
" simple\n",
" restriction\n",
" comparable\n",
" member\tretries\n",
" comparator\t<\n",
" arg\n",
" comparable\n",
" member\t10\n",
"\n"
]
}
],
"source": [
"print(parser.parse(\"(msg.endsWith('world') AND retries < 10)\").pretty())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "apemost",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
// https://google.aip.dev/160
// https://google.aip.dev/assets/misc/ebnf-filtering.txt
// Filter, possibly empty
filter: expression?
// Expressions may either be a conjunction (AND) of sequences or a simple
// sequence.
//
// Note, the AND is case-sensitive.
//
// Example: `a b AND c AND d`
//
// The expression `(a b) AND c AND d` is equivalent to the example.
expression: sequence (WS AND WS sequence)*
// Sequence is composed of one or more whitespace (WS) separated factors.
//
// A sequence expresses a logical relationship between 'factors' where
// the ranking of a filter result may be scored according to the number
// factors that match and other such criteria as the proximity of factors
// to each other within a document.
//
// When filters are used with exact match semantics rather than fuzzy
// match semantics, a sequence is equivalent to AND.
//
// Example: `New York Giants OR Yankees`
//
// The expression `New York (Giants OR Yankees)` is equivalent to the
// example.
sequence: factor (WS factor)*
// Factors may either be a disjunction (OR) of terms or a simple term.
//
// Note, the OR is case-sensitive.
//
// Example: `a < 10 OR a >= 100`
factor: term (WS OR WS term)*
// Terms may either be unary or simple expressions.
//
// Unary expressions negate the simple expression, either mathematically `-`
// or logically `NOT`. The negation styles may be used interchangeably.
//
// Note, the `NOT` is case-sensitive and must be followed by at least one
// whitespace (WS).
//
// Examples:
// * logical not : `NOT (a OR b)`
// * alternative not : `-file:".java"`
// * negation : `-30`
term: (NOT WS | "-")? simple
// Simple expressions may either be a restriction or a nested (composite)
// expression.
simple: restriction
| composite
// Restrictions express a relationship between a comparable value and a
// single argument. When the restriction only specifies a comparable
// without an operator, this is a global restriction.
//
// Note, restrictions are not whitespace sensitive.
//
// Examples:
// * equality : `package=com.google`
// * inequality : `msg != 'hello'`
// * greater than : `1 > 0`
// * greater or equal : `2.5 >= 2.4`
// * less than : `yesterday < request.time`
// * less or equal : `experiment.rollout <= cohort(request.user)`
// * has : `map:key`
// * global : `prod`
//
// In addition to the global, equality, and ordering operators, filters
// also support the has (`:`) operator. The has operator is unique in
// that it can test for presence or value based on the proto3 type of
// the `comparable` value. The has operator is useful for validating the
// structure and contents of complex values.
restriction: comparable (comparator arg)?
// Comparable may either be a member or function.
comparable: member
| function
// Member expressions are either value or DOT qualified field references.
//
// Example: `expr.type_map.1.type`
member: value ("." field)*
// Function calls may use simple or qualified names with zero or more
// arguments.
//
// All functions declared within the list filter, apart from the special
// `arguments` function must be provided by the host service.
//
// Examples:
// * `regex(m.key, '^.*prod.*$')`
// * `math.mem('30mb')`
//
// Antipattern: simple and qualified function names may include keywords:
// NOT, AND, OR. It is not recommended that any of these names be used
// within functions exposed by a service that supports list filters.
function: name ("." name)* "(" [arglist] ")"
// Comparators supported by list filters.
comparator: LESS_EQUALS
| LESS_THAN
| GREATER_EQUALS
| GREATER_THAN
| NOT_EQUALS
| EQUALS
| HAS
// Composite is a parenthesized expression, commonly used to group
// terms or clarify operator precedence.
//
// Example: `(msg.endsWith('world') AND retries < 10)`
composite: "(" expression ")"
// Value may either be a TEXT or STRING.
//
// TEXT is a free-form set of characters without whitespace (WS)
// or . (DOT) within it. The text may represent a variable, string,
// number, boolean, or alternative literal value and must be handled
// in a manner consistent with the service's intention.
//
// STRING is a quoted string which may or may not contain a special
// wildcard `*` character at the beginning or end of the string to
// indicate a prefix or suffix-based search within a restriction.
?value: TEXT
| STRING
// Fields may be either a value or a keyword.
field: value
| keyword
// Names may either be TEXT or a keyword.
name: NAME
| keyword
arglist: arg ("," arg)*
arg: comparable
| composite
keyword: NOT
| AND
| OR
NOT: "NOT"
AND: "AND"
OR: "OR"
LESS_EQUALS: "<="
LESS_THAN: "<"
GREATER_EQUALS: ">="
GREATER_THAN: ">"
NOT_EQUALS: "!="
EQUALS: "="
HAS: ":"
NAME: /\b(?!(NOT|AND|OR)\b)[a-zA-z][_a-zA-Z0-9]*\b/
TEXT: /\b(?!(NOT|AND|OR)\b)[_a-zA-Z0-9]+\b/
STRING: "\"" _STRING_ESC_INNER "\""
| "'" _STRING_ESC_INNER "'"
%import common._STRING_ESC_INNER
%import common.WS
%ignore WS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment