Last active
October 3, 2024 09:37
-
-
Save lawrenceadams/8e259ebebf5ed5986020a9185fe50dc6 to your computer and use it in GitHub Desktop.
Quick script to convert OMOP vocab into parquet files with valid date types
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import duckdb | |
| from glob import glob | |
| from pathlib import Path | |
| conn = duckdb.connect() | |
| for file_path in glob("*.csv"): | |
| file = Path(file_path) | |
| print(f" {file.stem}") | |
| query = f""" | |
| CREATE TABLE {file.stem} AS ( | |
| SELECT * FROM read_csv('{file}', delim='\t', quote='') | |
| ); | |
| """ | |
| conn.execute(query) | |
| inspect_query = f""" | |
| DESCRIBE {file.stem}; | |
| """ | |
| columns = conn.query(inspect_query) | |
| columns = columns.fetchall() | |
| for col in columns: | |
| col = col[0] | |
| if "date" in col: | |
| alter_query = f""" | |
| ALTER TABLE {file.stem} ALTER COLUMN {col} | |
| SET DATA TYPE DATE | |
| USING strptime({col}::VARCHAR, '%Y%m%d'); | |
| """ | |
| conn.execute(alter_query) | |
| export_query = f"COPY (SELECT * FROM {file.stem}) TO '{file.stem.lower()}.parquet';" | |
| conn.query(export_query) | |
| conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment