Skip to content

Instantly share code, notes, and snippets.

View philerooski's full-sized avatar

Phil Snyder philerooski

  • San Francisco, CA
View GitHub Profile
@philerooski
philerooski / load_snapshot_data.py
Created December 3, 2025 22:54
An updated version of the script used to load RDS snapshot data into Snowflake
"""
Load snapshot data from Snowflake stage into tables.
This script dynamically lists all prefixes under the specified prefix_base
from the stage, derives table names, creates tables using INFER_SCHEMA,
and logs all operations to LOAD_LOG.
See `python load_snapshot_data.py --help`
"""
"""
Analyze errors from the LOAD_LOG table.
This script queries the LOAD_LOG table for failed operations,
categorizes errors by type, and groups data types by error category.
This is a complementary script to https://gist.github.com/philerooski/a740b25f066f1ad205344637160aa969
"""
import snowflake.connector
"""
Load snapshot data from Snowflake stage into tables.
This script processes prefixes from PREFIX_LIST table, derives table names,
creates tables using INFER_SCHEMA, and logs all operations to LOAD_LOG.
See `--help` for optional parameter `--only-affected`
"""
import snowflake.connector
@philerooski
philerooski / create_confluence_pages.py
Created June 25, 2025 17:12
A rough draft of a script which pulls Snowflake table/column comments into Confluence
import os
import random
import logging
import argparse
import toml
import snowflake.connector
from atlassian import Confluence
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')
### Does not work
expectation_suite_name = "my_expectation_suite"
checkpoint_name = "my_checkpoint"
context = gx.get_context()
# # Initialize expectation suite
def init_expectation_suite():
expectation_suite = context.add_expectation_suite(
"""
Run this script from within the unzipped directory `JMV_fitbit_dta`
Download zipped data here: https://www.synapse.org/Synapse:syn62667431
"""
import pandas as pd
import json
import os
"""
A script which uploads validation results and a data validation
report to S3 for the FitbitSleepLogs data type. This was run in
Glue 4.0 while specifying --additional-python-modules great_expectations==0.18.11,boto3==1.24.70
"""
import json
import logging
import os
import subprocess
import sys
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import bridgeclient # https://github.com/larssono/bridgeclient
import functools
def get_bridge_client(email, password, study):
bridge = bridgeclient.bridgeConnector(email, password, study=study)
return bridge
"""
import synapseclient
import argparse
import pandas
import json
import os
DUMMY_ARG = "dummy" # A 'None' like string we can pass to boto