Forked from imankulov/sqlalchemy_with_pydantic.py
Last active
November 8, 2025 14:43
-
-
Save pdmtt/a6dc62f051c5597a8cdeeb8271c1e079 to your computer and use it in GitHub Desktop.
Using pydantic models as SQLAlchemy JSON fields (convert beween JSON and pydantic.BaseModel subclasses) with modern syntax
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import datetime | |
| from typing import TYPE_CHECKING, Any, Optional, final | |
| import sqlalchemy as sa | |
| from pydantic import BaseModel, Field | |
| from sqlalchemy.dialects.postgresql import JSONB | |
| from sqlalchemy.orm import Mapped, declarative_base, mapped_column, sessionmaker | |
| from typing_extensions import override | |
| if TYPE_CHECKING: | |
| from typing import Any | |
| from sqlalchemy import Dialect | |
| from sqlalchemy.sql.type_api import TypeEngine | |
| # -------------------------------------------------------------------------------------- | |
| # Define pydantic-alchemy specific types (once per application) | |
| # -------------------------------------------------------------------------------------- | |
| @final | |
| class PydanticType(sa.types.TypeDecorator["BaseModel"]): | |
| """Pydantic type. | |
| SAVING: | |
| - Uses SQLAlchemy JSON type under the hood. | |
| - Acceps the pydantic model and converts it to a dict on save. | |
| - SQLAlchemy engine JSON-encodes the dict to a string. | |
| RETRIEVING: | |
| - Pulls the string from the database. | |
| - SQLAlchemy engine JSON-decodes the string to a dict. | |
| - Uses the dict to create a pydantic model. | |
| """ | |
| # If you intend to use this class with one dialect only, | |
| # you could pick a type from the specific dialect for | |
| # simplicity sake. | |
| # | |
| # E.g., if you work with PostgreSQL, you can consider using | |
| # sqlalchemy.dialects.postgresql.JSONB instead of a | |
| # generic sa.types.JSON | |
| # Ref: https://www.postgresql.org/docs/13/datatype-json.html | |
| # | |
| # Otherwise, you should implement the `load_dialect_impl` | |
| # method to handle different dialects. In this case, the | |
| # impl variable can reference TypeEngine as a placeholder. | |
| impl = sa.types.JSON | |
| def __init__(self, pydantic_type: type["BaseModel"]) -> None: | |
| super().__init__() | |
| self.pydantic_type = pydantic_type | |
| @override | |
| def load_dialect_impl(self, dialect: "Dialect") -> "TypeEngine[JSONB | sa.JSON]": | |
| # You should implement this method to handle different dialects | |
| # if you intend to use this class with more than one. | |
| # E.g., use JSONB for PostgreSQL and the generic JSON type for | |
| # other databases. | |
| if dialect.name == "postgresql": | |
| return dialect.type_descriptor(JSONB()) | |
| else: | |
| return dialect.type_descriptor(sa.JSON()) | |
| @override | |
| def process_bind_param( | |
| self, | |
| value: "BaseModel | None", | |
| dialect: "Dialect", | |
| ) -> "dict[str, Any] | None": | |
| if value is None: | |
| return None | |
| if not isinstance(value, BaseModel): # dynamic typing. | |
| raise TypeError(f'The value "{value!r}" is not a pydantic model') | |
| # Method .dict() is deprecated as of pydantic V2. | |
| # | |
| # You should think before setting exclude_unset to True. This can reduce the size | |
| # of the dump, but unset values might mean something depending on your context. | |
| # | |
| # Setting mode to "json" entails that you won't need to define a custom json | |
| # serializer ahead. | |
| return value.model_dump(mode="json", exclude_unset=True) | |
| @override | |
| def process_result_value( | |
| self, | |
| value: "dict[str, Any] | None", | |
| dialect: "Dialect", | |
| ) -> "BaseModel | None": | |
| # `parse_obj_as` is deprecated as of pydantic V2. | |
| # We're assuming that the value will be a dictionary here. | |
| return self.pydantic_type(**value) if value else None | |
| # -------------------------------------------------------------------------------------- | |
| # Configure SQLAlchemy engine, session and declarative base (once per application) | |
| # -------------------------------------------------------------------------------------- | |
| engine = sa.create_engine("sqlite:///:memory:") | |
| Session = sessionmaker(bind=engine, expire_on_commit=False) | |
| Base = declarative_base() | |
| # -------------------------------------------------------------------------------------- | |
| # Define your Pydantic and SQLAlchemy models (as many as needed) | |
| # -------------------------------------------------------------------------------------- | |
| class UserSettings(BaseModel): | |
| notify_at: datetime.datetime = Field(default_factory=datetime.datetime.now) | |
| @final | |
| class User(Base): | |
| __tablename__ = "users" | |
| # Using Annotated Declarative Table. | |
| # docs.sqlalchemy.org/en/20/orm/declarative_tables.html#using-annotated-declarative-table-type-annotated-forms-for-mapped-column | |
| id: Mapped[int] = mapped_column(primary_key=True) | |
| name: Mapped[str] = mapped_column(doc="User name", comment="User name") | |
| settings: Mapped[Optional[UserSettings]] = mapped_column( | |
| PydanticType(UserSettings), nullable=True | |
| ) | |
| # -------------------------------------------------------------------------------------- | |
| # Create tables (once per application) | |
| # -------------------------------------------------------------------------------------- | |
| Base.metadata.create_all(engine) | |
| # -------------------------------------------------------------------------------------- | |
| # Usage example (we use 2.0 querying style with selects) | |
| # Ref: https://docs.sqlalchemy.org/en/20/orm/session_basics.htm | |
| # -------------------------------------------------------------------------------------- | |
| session = Session() | |
| user = User(name="user", settings=UserSettings()) | |
| session.add(user) | |
| session.commit() | |
| same_user = session.execute(sa.select(User)).scalars().first() |
Author
Thank you very much for the clarification! :)
Great snippet thankyou.
Some suggestions to be a bit more idiomatic and improve typing DX :
- use
from __future__ import annotationsto avoid quoted typing return self.pydantic_type.model_construct(**value) if value else Noneinprocess_result_value(if you don't want to revalidate on load)- or
return self.pydantic_type.model_validate(value) if value else Noneif you do want revalidation on load. - use MappedAsDataclass
from __future__ import annotations
import datetime
from typing import TYPE_CHECKING, Any, final, override
import sqlalchemy as sa
from pydantic import BaseModel, Field, TypeAdapter
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import DeclarativeBase, Mapped, MappedAsDataclass, declarative_base, mapped_column, sessionmaker
if TYPE_CHECKING:
from typing import Any
from pydantic import BaseModel
from sqlalchemy import Dialect
from sqlalchemy.sql.type_api import TypeEngine
# --------------------------------------------------------------------------------------
# Define pydantic-alchemy specific types (once per application)
# --------------------------------------------------------------------------------------
@final
class PydanticType(sa.types.TypeDecorator[BaseModel]):
"""Pydantic type.
SAVING:
- Uses SQLAlchemy JSON type under the hood.
- Acceps the pydantic model and converts it to a dict on save.
- SQLAlchemy engine JSON-encodes the dict to a string.
RETRIEVING:
- Pulls the string from the database.
- SQLAlchemy engine JSON-decodes the string to a dict.
- Uses the dict to create a pydantic model.
"""
# If you intend to use this class with one dialect only,
# you could pick a type from the specific dialect for
# simplicity sake.
#
# E.g., if you work with PostgreSQL, you can consider using
# sqlalchemy.dialects.postgresql.JSONB instead of a
# generic sa.types.JSON
# Ref: https://www.postgresql.org/docs/13/datatype-json.html
#
# Otherwise, you should implement the `load_dialect_impl`
# method to handle different dialects. In this case, the
# impl variable can reference TypeEngine as a placeholder.
impl = sa.types.JSON
def __init__(self, pydantic_type: type[BaseModel]) -> None:
super().__init__()
self.pydantic_type = pydantic_type
@override
def load_dialect_impl(self, dialect: Dialect) -> TypeEngine[JSONB | sa.JSON]:
# You should implement this method to handle different dialects
# if you intend to use this class with more than one.
# E.g., use JSONB for PostgreSQL and the generic JSON type for
# other databases.
if dialect.name == "postgresql":
return dialect.type_descriptor(JSONB())
else:
return dialect.type_descriptor(sa.JSON())
@override
def process_bind_param(
self,
value: BaseModel | None,
dialect: Dialect,
) -> dict[str, Any] | None:
if value is None:
return None
if not isinstance(value, BaseModel): # dynamic typing.
raise TypeError(f'The value "{value!r}" is not a pydantic model')
# Method .dict() is deprecated as of pydantic V2.
#
# You should think before setting exclude_unset to True. This can reduce the size
# of the dump, but unset values might mean something depending on your context.
#
# Setting mode to "json" entails that you won't need to define a custom json
# serializer ahead.
return value.model_dump(mode="json")
@override
def process_result_value(
self,
value: dict[str, Any] | None,
dialect: Dialect,
) -> BaseModel | None:
# We're assuming that the value will be a dictionary here.
validate_on_load = True
if validate_on_load:
return self.pydantic_type.model_validate(value) if value else None
else:
return self.pydantic_type.model_construct(**value) if value else None
# --------------------------------------------------------------------------------------
# Configure SQLAlchemy engine, session and declarative base (once per application)
# --------------------------------------------------------------------------------------
engine = sa.create_engine("sqlite:///:memory:")
Session = sessionmaker(bind=engine, expire_on_commit=False)
# --------------------------------------------------------------------------------------
# Define your Pydantic and SQLAlchemy models (as many as needed)
# --------------------------------------------------------------------------------------
class UserSettings(BaseModel):
notify_at: datetime.datetime = Field(default_factory=datetime.datetime.now)
class Base(MappedAsDataclass, DeclarativeBase):
"""subclasses will be converted to dataclasses, offering better type hints and ide support.
see: https://docs.sqlalchemy.org/en/20/orm/dataclasses.html
"""
@final
class User(Base):
__tablename__ = "users"
# Using Annotated Declarative Table.
# docs.sqlalchemy.org/en/20/orm/declarative_tables.html#using-annotated-declarative-table-type-annotated-forms-for-mapped-column
id: Mapped[int] = mapped_column(init=False, primary_key=True)
name: Mapped[str] = mapped_column(doc="User name", comment="User name")
settings: Mapped[UserSettings] = mapped_column(PydanticType(UserSettings))
type_adapter = TypeAdapter(User)
# --------------------------------------------------------------------------------------
# Create tables (once per application)
# --------------------------------------------------------------------------------------
Base.metadata.create_all(engine)
# --------------------------------------------------------------------------------------
# Usage example (we use 2.0 querying style with selects)
# Ref: https://docs.sqlalchemy.org/en/20/orm/session_basics.htm
# --------------------------------------------------------------------------------------
session = Session()
user = User(name="user", settings=UserSettings())
session.add(user)
session.commit()
same_user = session.execute(sa.select(User)).scalars().first()
if same_user:
print(type_adapter.dump_json(same_user, indent=2).decode("utf-8"))
pass
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No, you aren't misinterpreting.
Method
load_dialect_implis useful only if you intend to use the class with more than one dialect. If you'll be using it with only one dialect, you could pick a type from the specific dialect for simplicity sake as the value ofimpl.Anyway,
implis a required attribute, so it should always be defined. If you're defining theload_dialect_implmethod, "the impl variable can reference TypeEngine as a placeholder", as stated by the docs.I'll edit the code to improve clarity.
Thanks for the feedback!