Created
August 6, 2025 06:35
-
-
Save vanhtuan0409/35952bde26df603a30853cd3aecf876b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pprint import pprint | |
| import sqlglot | |
| from sqlglot.expressions import Select, Subquery, Table | |
| def get_acl_permission(table: Table) -> str: | |
| # given a specific table | |
| # should query gondor for a list of accessible row for this table | |
| # after that, transform the result into duckdb temp table to query `IN` | |
| # this function should be cached | |
| return "temp_permission_table" | |
| # The idea is to transform every `Table` identifier to a subquery | |
| # doing filter on the original table | |
| # For example `select * from X` should be rewritten to `select * from (select * from X where ...)` | |
| # . | |
| # This approach heavily rely on the reliable of sqlglot parser | |
| # Need to comeup with a complex test suite | |
| def transform_node(e: sqlglot.Expression) -> sqlglot.Expression: | |
| if not isinstance(e, Table): | |
| return e | |
| orig_alias = e.alias | |
| e.set("alias", None) # strip down alias from originial table | |
| perm_table = get_acl_permission(e) | |
| res = Subquery( | |
| this=Select() | |
| .select("*") | |
| .from_(e) | |
| .where(f"perm_key IN (SELECT allowed_key FROM {perm_table})") | |
| ) | |
| if not orig_alias: | |
| return res | |
| return res.as_(orig_alias) | |
| def transform_select(e: sqlglot.Expression) -> sqlglot.Expression: | |
| if not isinstance(e, Select): | |
| # Only handle rewrite for read query | |
| # Write query should be perform through file import and checking ingestion data | |
| raise ValueError(f"only support `SELECT` query: {e}") | |
| return e.transform(transform_node, copy=True) | |
| def main(): | |
| query = "select * from anduin.foo.bar as t1 join anduin.bar.foo as t2 on t1.id = t2.id where x in (select name from anduin.x.y)" | |
| expressions = sqlglot.parse(query, dialect="duckdb") | |
| for expr in expressions: | |
| if expr is None: | |
| continue | |
| print("=====") | |
| print(expr.sql(pretty=True)) | |
| print("---") | |
| new_expression = transform_select(expr) | |
| print(new_expression.sql(pretty=True)) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment