Push your Django app beyond basic queries. Master B-tree vs GIN indexes, JSONB for semi-structured data, full-text search with ranking, and window functions for analytics.
PostgreSQL is more than a SQL database — it's a swiss army knife that can replace Elasticsearch, Redis, and specialized document stores for many workloads. Learn its advanced features and simplify your stack.
Good for equality and range queries:
class Order(models.Model):
user = models.ForeignKey(User, on_delete=CASCADE, db_index=True)
created_at = models.DateTimeField(db_index=True)
status = models.CharField(max_length=20, db_index=True)
When filtering on multiple columns together:
class Meta:
indexes = [
models.Index(fields=['user', 'created_at']),
models.Index(fields=['status', '-created_at']), # descending
]
Column order matters: (user, created_at) helps queries filtering by user OR by user AND created_at, but not by created_at alone.
Only index a subset of rows — smaller and faster:
from django.db.models import Q
class Meta:
indexes = [
models.Index(
fields=['created_at'],
condition=Q(status='pending'),
name='pending_orders_idx'
),
]
For arrays, JSONB, full-text search:
from django.contrib.postgres.indexes import GinIndex
class Post(models.Model):
tags = ArrayField(models.CharField(max_length=50))
metadata = models.JSONField()
search_vector = SearchVectorField(null=True)
class Meta:
indexes = [
GinIndex(fields=['tags']),
GinIndex(fields=['metadata']),
GinIndex(fields=['search_vector']),
]
EXPLAIN ANALYZE
SELECT * FROM blog_post WHERE status = 'published' ORDER BY created_at DESC LIMIT 10;
Look for Index Scan vs Seq Scan. Sequential scans on large tables = trouble.
JSONB gives you flexibility without leaving your relational database.
class Event(models.Model):
event_type = models.CharField(max_length=50)
payload = models.JSONField()
created_at = models.DateTimeField(auto_now_add=True)
class Meta:
indexes = [GinIndex(fields=['payload'])]
# Key lookup
Event.objects.filter(payload__user_id=42)
# Nested lookup
Event.objects.filter(payload__metadata__source='mobile')
# Contains
Event.objects.filter(payload__contains={'tier': 'premium'})
# Has key
Event.objects.filter(payload__has_key='referrer')
# Array contains
Event.objects.filter(payload__tags__contains=['signup'])
# Extract field as value
from django.db.models import F
from django.db.models.functions import Cast
Event.objects.annotate(
amount=Cast('payload__amount', models.DecimalField())
).filter(amount__gt=100)
| Use JSONB when... | Use columns when... |
|---|---|
| Schema varies per row | Fixed schema |
| Fields rarely filtered | Frequent filtering |
| Deep nesting needed | Flat, relational data |
| Third-party payloads | Your own domain model |
Rule of thumb: if you're filtering on a JSONB key in every request, promote it to a real column.
Ditch Elasticsearch for 80% of use cases — PostgreSQL full-text search is fast, integrated, and good enough.
from django.contrib.postgres.search import SearchVector, SearchQuery, SearchRank
class Article(models.Model):
title = models.CharField(max_length=200)
body = models.TextField()
search_vector = SearchVectorField(null=True)
class Meta:
indexes = [GinIndex(fields=['search_vector'])]
# In a migration or signal
from django.contrib.postgres.search import SearchVector
Article.objects.update(
search_vector=SearchVector('title', weight='A') + SearchVector('body', weight='B')
)
Better: use a database trigger so it's always up to date:
# Data migration
operations = [
migrations.RunSQL(
sql='''
CREATE TRIGGER article_search_update
BEFORE INSERT OR UPDATE ON blog_article
FOR EACH ROW EXECUTE FUNCTION
tsvector_update_trigger(search_vector, 'pg_catalog.english', title, body);
''',
reverse_sql='DROP TRIGGER article_search_update ON blog_article;',
),
]
from django.contrib.postgres.search import SearchQuery, SearchRank
query = SearchQuery('python tutorial')
articles = (Article.objects
.annotate(rank=SearchRank('search_vector', query))
.filter(search_vector=query)
.order_by('-rank')[:10])
# Phrase
SearchQuery('django channels', search_type='phrase')
# Prefix (e.g., 'pyth' matches 'python')
SearchQuery('pyth', search_type='raw') # careful with user input
# Language-specific stemming
SearchVector('body', config='english') # stems 'running' → 'run'
# In migration
CREATE EXTENSION pg_trgm;
from django.contrib.postgres.search import TrigramSimilarity
# Find articles where title is similar to 'djanjo' (misspelled)
Article.objects.annotate(
similarity=TrigramSimilarity('title', 'djanjo')
).filter(similarity__gt=0.3).order_by('-similarity')
Compute aggregates without collapsing rows:
from django.db.models import Window, F
from django.db.models.functions import Rank, Lag, DenseRank
# Rank each order by amount within its month
Order.objects.annotate(
rank_in_month=Window(
expression=Rank(),
partition_by=[F('created_month')],
order_by=F('amount').desc(),
)
)
# Difference from previous row
Order.objects.annotate(
prev_amount=Window(
expression=Lag('amount'),
order_by='created_at',
),
delta=F('amount') - F('prev_amount'),
)
Complex queries become readable:
# Using django-cte package
from django_cte import With
recent = With(Order.objects.filter(created_at__gte=last_week), 'recent')
top_customers = (recent.join(User, id=recent.col.user_id)
.annotate(total=Sum(recent.col.amount))
.with_cte(recent)
.order_by('-total')[:10])
Store lists natively:
from django.contrib.postgres.fields import ArrayField
class Post(models.Model):
tags = ArrayField(models.CharField(max_length=50), size=10, blank=True)
# Query
Post.objects.filter(tags__contains=['python']) # has this tag
Post.objects.filter(tags__contained_by=['python', 'django']) # subset
Post.objects.filter(tags__overlap=['python', 'web']) # any match
Post.objects.filter(tags__len=3) # exactly 3 tags
Django's default is autocommit. For multi-step operations:
from django.db import transaction
@transaction.atomic
def transfer_funds(from_user, to_user, amount):
from_user.balance -= amount
from_user.save()
to_user.balance += amount
to_user.save()
# Both saves succeed or both roll back
Prevent race conditions:
with transaction.atomic():
account = Account.objects.select_for_update().get(pk=account_id)
account.balance -= amount
account.save()
Don't make every request open a new DB connection:
# pip install psycopg[pool]
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.postgresql',
'NAME': 'myapp',
...
'OPTIONS': {
'pool': {
'min_size': 4,
'max_size': 20,
},
},
},
}
Or use PgBouncer in front of PostgreSQL for serious workloads.
DATABASES = {
'default': {...},
'replica': {
'ENGINE': 'django.db.backends.postgresql',
'HOST': 'replica.internal',
...
},
}
DATABASE_ROUTERS = ['myapp.routers.PrimaryReplicaRouter']
# Force read from replica
User.objects.using('replica').filter(...)
-- Slow queries
SELECT query, calls, total_time, mean_time
FROM pg_stat_statements
ORDER BY mean_time DESC
LIMIT 20;
-- Unused indexes
SELECT schemaname, relname, indexrelname, idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;
-- Table bloat
SELECT pg_size_pretty(pg_total_relation_size(relid)) AS total,
relname
FROM pg_catalog.pg_statio_user_tables
ORDER BY pg_total_relation_size(relid) DESC;
PostgreSQL can: - Full-text search → replace Elasticsearch for most cases - JSONB → replace MongoDB for flexible documents - LISTEN/NOTIFY → replace Redis pub/sub - Advisory locks → coordinate workers - Materialized views → cache expensive aggregates
Know what it offers, and you'll build simpler, more maintainable systems with one database instead of three.