The Monolith Repository Pattern, when applied to abstract database access, is less about hiding database specifics and more about enforcing a specific order of operations that’s often overlooked.
Imagine you have a user registration system. In a naive approach, your UserService might directly call UserRepository.save(user).
# services/user_service.py
from models.user import User
from repositories.user_repository import UserRepository
class UserService:
def __init__(self):
self.user_repo = UserRepository()
def register_user(self, username, email, password):
if self.user_repo.find_by_email(email):
raise ValueError("Email already exists")
user = User(username=username, email=email, password=password)
self.user_repo.save(user)
return user
# repositories/user_repository.py
from models.user import User
from db.database import get_db_connection # Assuming a DB connection helper
class UserRepository:
def find_by_email(self, email: str) -> User | None:
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute("SELECT id, username, email FROM users WHERE email = %s", (email,))
row = cursor.fetchone()
conn.close()
if row:
return User(id=row[0], username=row[1], email=row[2])
return None
def save(self, user: User):
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO users (username, email) VALUES (%s, %s) RETURNING id",
(user.username, user.email)
)
user.id = cursor.fetchone()[0]
conn.commit()
conn.close()
# models/user.py
class User:
def __init__(self, id: int = None, username: str = None, email: str = None, password: str = None):
self.id = id
self.username = username
self.email = email
self.password = password # In a real app, this would be a hash
This works. But what happens when you need to perform multiple related database operations? For instance, if registering a user also requires creating an associated profile entry?
# services/user_service.py (modified)
from models.user import User
from models.profile import Profile # New model
from repositories.user_repository import UserRepository
from repositories.profile_repository import ProfileRepository # New repo
class UserService:
def __init__(self):
self.user_repo = UserRepository()
self.profile_repo = ProfileRepository() # Initialize new repo
def register_user_with_profile(self, username, email, password, display_name):
if self.user_repo.find_by_email(email):
raise ValueError("Email already exists")
user = User(username=username, email=email, password=password)
self.user_repo.save(user) # Save user first
# What if the profile save fails AFTER the user save?
profile = Profile(user_id=user.id, display_name=display_name)
self.profile_repo.save(profile) # Save profile
return user
# repositories/profile_repository.py (new)
from models.profile import Profile
from db.database import get_db_connection
class ProfileRepository:
def save(self, profile: Profile):
conn = get_db_connection()
cursor = conn.cursor()
cursor.execute(
"INSERT INTO profiles (user_id, display_name) VALUES (%s, %s) RETURNING id",
(profile.user_id, profile.display_name)
)
profile.id = cursor.fetchone()[0]
conn.commit()
conn.close()
# models/profile.py (new)
class Profile:
def __init__(self, id: int = None, user_id: int = None, display_name: str = None):
self.id = id
self.user_id = user_id
self.display_name = display_name
Now, if self.profile_repo.save(profile) throws an error after self.user_repo.save(user) has already committed, you have a user record in the database with no corresponding profile. This is a data integrity issue.
The Repository Pattern, in this context, isn’t just about abstracting SQL. It’s about providing a unit of work interface. Instead of individual save calls, you might have a UnitOfWork that orchestrates these operations, ensuring they either all succeed or all fail together.
Here’s how a UnitOfWork might look, managing transactions and multiple repositories:
# db/unit_of_work.py
import contextlib
from db.database import get_db_connection
from repositories.user_repository import UserRepository
from repositories.profile_repository import ProfileRepository
class UnitOfWork:
def __init__(self):
self.conn = get_db_connection()
self.user_repo = UserRepository(self.conn) # Pass connection to repos
self.profile_repo = ProfileRepository(self.conn) # Pass connection to repos
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
if exc_type:
self.conn.rollback()
else:
self.conn.commit()
self.conn.close()
def commit(self):
self.conn.commit()
def rollback(self):
self.conn.rollback()
# repositories/user_repository.py (modified to accept connection)
class UserRepository:
def __init__(self, conn):
self.conn = conn
def find_by_email(self, email: str) -> User | None:
cursor = self.conn.cursor()
cursor.execute("SELECT id, username, email FROM users WHERE email = %s", (email,))
row = cursor.fetchone()
if row:
return User(id=row[0], username=row[1], email=row[2])
return None
def save(self, user: User):
cursor = self.conn.cursor()
cursor.execute(
"INSERT INTO users (username, email) VALUES (%s, %s) RETURNING id",
(user.username, user.email)
)
user.id = cursor.fetchone()[0]
# No conn.commit() here!
# repositories/profile_repository.py (modified to accept connection)
class ProfileRepository:
def __init__(self, conn):
self.conn = conn
def save(self, profile: Profile):
cursor = self.conn.cursor()
cursor.execute(
"INSERT INTO profiles (user_id, display_name) VALUES (%s, %s) RETURNING id",
(profile.user_id, profile.display_name)
)
profile.id = cursor.fetchone()[0]
# No conn.commit() here!
# services/user_service.py (using UnitOfWork)
class UserService:
def register_user_with_profile(self, username, email, password, display_name):
with UnitOfWork() as uow:
if uow.user_repo.find_by_email(email):
raise ValueError("Email already exists")
user = User(username=username, email=email, password=password)
uow.user_repo.save(user) # Operates within the UoW's transaction
profile = Profile(user_id=user.id, display_name=display_name)
uow.profile_repo.save(profile) # Operates within the UoW's transaction
# The commit/rollback happens automatically on exiting the 'with' block
return user
The key insight is that the "Repository Pattern" in this context is often a precursor to or an integral part of a larger "Unit of Work" pattern. The repositories themselves don’t commit; they just perform operations on an underlying data source, and the UnitOfWork is responsible for managing the transaction boundary across multiple repository calls. This ensures atomicity for related operations, preventing partial updates and maintaining data consistency.
The real power comes when you need to manage complex, multi-step data modifications that must succeed or fail as a single logical unit. The pattern structures your code so that the decision to commit or roll back is centralized, typically at the service layer or a dedicated transaction management layer.
When you start using a Unit of Work, you’ll often realize that your repositories need to be aware of the connection and transaction context managed by the UoW, rather than managing their own connections and commits. This leads to repositories becoming more like data mappers operating within a broader transactional scope.
The next hurdle is often handling concurrency issues within this transactional model, especially when multiple units of work might be trying to modify the same data.