This guide explains how to set up a DBT environment using Docker Compose with PostgreSQL as the database. It includes SQL and Python models to transform data, fetch data from an external API (ASPIRE), and export it to AWS S3.
my_dbt_project/
├── docker-compose.yml
├── Dockerfile
├── dbt_project/
│ ├── dbt_project.yml
│ ├── profiles.yml
│ ├── models/
│ │ ├── sql_model.sql
│ │ ├── python_script.py
│ ├── seeds/
├── scripts/
│ ├── fetch_data.py
│ ├── export_to_s3.py
version: '3.8'
services:
postgres:
image: postgres:13
container_name: postgres
environment:
POSTGRES_USER: dbt_user
POSTGRES_PASSWORD: dbt_password
POSTGRES_DB: dbt_db
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
dbt:
build:
context: .
container_name: dbt
environment:
DBT_PROFILES_DIR: /usr/app/dbt/profiles.yml
volumes:
- ./dbt_project:/usr/app/dbt
command: dbt --version
depends_on:
- postgres
volumes:
postgres_data:# Base image with Python
FROM python:3.9-slim
# Set environment variables
ENV DBT_HOME=/usr/app/dbt
ENV PATH="$DBT_HOME:$PATH"
# Install system dependencies
RUN apt-get update && apt-get install -y \
git \
build-essential \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
# Create working directory
WORKDIR $DBT_HOME
# Install DBT and required modules
RUN pip install --no-cache-dir \
dbt-core dbt-postgres boto3 pandas requests psycopg2
# Copy project files
COPY . $DBT_HOME
# Set entrypoint for DBT
ENTRYPOINT ["dbt"]name: my_dbt_project
version: "1.0"
profile: default
config-version: 2
# Define where models are located
models:
my_dbt_project:
+materialized: tabledefault:
outputs:
dev:
type: postgres
host: postgres
user: dbt_user
password: dbt_password
port: 5432
dbname: dbt_db
schema: public
target: devSELECT
user_id,
event_type,
COUNT(*) AS event_count,
MAX(event_time) AS last_event_time
FROM {{ ref('raw_table') }}
GROUP BY user_id, event_type;import pandas as pd
import boto3
def model(dbt, session):
# Fetch data from an upstream model
df = dbt.ref("sql_model").to_pandas()
# Save the DataFrame as a CSV file
output_file = "/tmp/exported_data.csv"
df.to_csv(output_file, index=False)
# Upload to S3
s3 = boto3.client('s3', aws_access_key_id='<your_aws_access_key>',
aws_secret_access_key='<your_aws_secret_key>')
s3.upload_file(output_file, 'your-s3-bucket', 'data/exported_data.csv')
dbt.log("File uploaded to S3!")
return dfimport requests
import psycopg2
# ASPIRE API details
ASPIRE_API_URL = "https://api.aspire-system.com/data"
API_KEY = "your_api_key"
# Database connection details
DB_CONFIG = {
"host": "<your_db_host>",
"database": "<your_db_name>",
"user": "<your_db_user>",
"password": "<your_db_password>",
"port": 5432
}
def fetch_data_from_aspire():
headers = {"Authorization": f"Bearer {API_KEY}"}
response = requests.get(ASPIRE_API_URL, headers=headers)
response.raise_for_status()
return response.json()
def insert_data_into_staging(data):
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
# Insert data into staging table
for record in data:
cursor.execute(
"INSERT INTO staging_table (column1, column2) VALUES (%s, %s)",
(record['field1'], record['field2'])
)
conn.commit()
cursor.close()
conn.close()
def main():
data = fetch_data_from_aspire()
insert_data_into_staging(data)
print("Data loaded into staging database.")
if __name__ == "__main__":
main()user_id,event_type,event_time
1,login,2025-01-01 10:00:00
1,logout,2025-01-01 10:05:00
2,login,2025-01-01 11:00:00
2,logout,2025-01-01 11:15:00
docker-compose builddocker-compose up -ddocker exec -it dbt python /usr/app/dbt/scripts/fetch_data.pydocker exec -it dbt dbt seeddocker exec -it dbt dbt run- Check the transformed data in PostgreSQL.
- Confirm the CSV file upload in your S3 bucket.
- Use orchestration tools like Apache Airflow or Prefect to automate the DBT pipeline.
- Configure S3 event notifications to trigger Snowpipe after the upload.