I have two models like this:
class User(models.Model):
email = models.EmailField()
class Report(models.Model):
user = models.ForeignKey(User)
Alasdair's answer is helpful, but I don't like using distinct(). It can sometimes be useful, but it's usually a code smell telling you that you messed up your joins.
Luckily, Django's queryset lets you filter on subqueries. With Django 3.0, you can also use an exists clause.
Here are a few ways to run the queries from your question:
# Tested with Django 3.0 and Python 3.6
import logging
import sys
import django
from django.apps import apps
from django.apps.config import AppConfig
from django.conf import settings
from django.db import connections, models, DEFAULT_DB_ALIAS
from django.db.models import Exists, OuterRef
from django.db.models.base import ModelBase
NAME = 'udjango'
DB_FILE = NAME + '.db'
def main():
setup()
class User(models.Model):
email = models.EmailField()
def __repr__(self):
return 'User({!r})'.format(self.email)
class Report(models.Model):
user = models.ForeignKey(User, on_delete=models.CASCADE)
syncdb(User)
syncdb(Report)
anne = User.objects.create(email='anne@example.com')
User.objects.create(email='adam@example.com')
alice = User.objects.create(email='alice@example.com')
User.objects.create(email='bob@example.com')
Report.objects.create(user=anne)
Report.objects.create(user=alice)
Report.objects.create(user=alice)
logging.info('users without reports')
logging.info(User.objects.filter(report__isnull=True, email__startswith='a'))
logging.info('users with reports (allows duplicates)')
logging.info(User.objects.filter(report__isnull=False, email__startswith='a'))
logging.info('users with reports (no duplicates)')
logging.info(User.objects.exclude(report__isnull=True).filter(email__startswith='a'))
logging.info('users with reports (no duplicates, simpler SQL)')
report_user_ids = Report.objects.values('user_id')
logging.info(User.objects.filter(id__in=report_user_ids, email__startswith='a'))
logging.info('users with reports (EXISTS clause, Django 3.0)')
logging.info(User.objects.filter(
Exists(Report.objects.filter(user_id=OuterRef('id'))),
email__startswith='a'))
logging.info('Done.')
def setup():
with open(DB_FILE, 'w'):
pass # wipe the database
settings.configure(
DEBUG=True,
DATABASES={
DEFAULT_DB_ALIAS: {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': DB_FILE}},
LOGGING={'version': 1,
'disable_existing_loggers': False,
'formatters': {
'debug': {
'format': '%(asctime)s[%(levelname)s]'
'%(name)s.%(funcName)s(): %(message)s',
'datefmt': '%Y-%m-%d %H:%M:%S'}},
'handlers': {
'console': {
'level': 'DEBUG',
'class': 'logging.StreamHandler',
'formatter': 'debug'}},
'root': {
'handlers': ['console'],
'level': 'INFO'},
'loggers': {
"django.db": {"level": "DEBUG"}}})
app_config = AppConfig(NAME, sys.modules['__main__'])
apps.populate([app_config])
django.setup()
original_new_func = ModelBase.__new__
@staticmethod
def patched_new(cls, name, bases, attrs):
if 'Meta' not in attrs:
class Meta:
app_label = NAME
attrs['Meta'] = Meta
return original_new_func(cls, name, bases, attrs)
ModelBase.__new__ = patched_new
def syncdb(model):
""" Standard syncdb expects models to be in reliable locations.
Based on https://github.com/django/django/blob/1.9.3
/django/core/management/commands/migrate.py#L285
"""
connection = connections[DEFAULT_DB_ALIAS]
with connection.schema_editor() as editor:
editor.create_model(model)
main()
If you put that into a Python file and run it, you should see something like this:
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = OFF; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) BEGIN; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE TABLE "udjango_user" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "email" varchar(254) NOT NULL); (params None)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE TABLE "udjango_user" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "email" varchar(254) NOT NULL); args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_key_check; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = ON; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = OFF; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) BEGIN; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE TABLE "udjango_report" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "user_id" integer NOT NULL REFERENCES "udjango_user" ("id") DEFERRABLE INITIALLY DEFERRED); (params None)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE TABLE "udjango_report" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "user_id" integer NOT NULL REFERENCES "udjango_user" ("id") DEFERRABLE INITIALLY DEFERRED); args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_key_check; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE INDEX "udjango_report_user_id_60bc619c" ON "udjango_report" ("user_id"); (params ())
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE INDEX "udjango_report_user_id_60bc619c" ON "udjango_report" ("user_id"); args=()
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = ON; args=None
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.017) INSERT INTO "udjango_user" ("email") VALUES ('anne@example.com'); args=['anne@example.com']
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.023) INSERT INTO "udjango_user" ("email") VALUES ('adam@example.com'); args=['adam@example.com']
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.022) INSERT INTO "udjango_user" ("email") VALUES ('alice@example.com'); args=['alice@example.com']
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.022) INSERT INTO "udjango_user" ("email") VALUES ('bob@example.com'); args=['bob@example.com']
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.029) INSERT INTO "udjango_report" ("user_id") VALUES (1); args=[1]
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.033) INSERT INTO "udjango_report" ("user_id") VALUES (3); args=[3]
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.033) INSERT INTO "udjango_report" ("user_id") VALUES (3); args=[3]
2019-12-06 11:45:17[INFO]root.main(): users without reports
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" LEFT OUTER JOIN "udjango_report" ON ("udjango_user"."id" = "udjango_report"."user_id") WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_report"."id" IS NULL) LIMIT 21; args=('a%',)
2019-12-06 11:45:17[INFO]root.main():
2019-12-06 11:45:17[INFO]root.main(): users with reports (allows duplicates)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" INNER JOIN "udjango_report" ON ("udjango_user"."id" = "udjango_report"."user_id") WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_report"."id" IS NOT NULL) LIMIT 21; args=('a%',)
2019-12-06 11:45:17[INFO]root.main():
2019-12-06 11:45:17[INFO]root.main(): users with reports (no duplicates)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE (NOT ("udjango_user"."id" IN (SELECT U0."id" FROM "udjango_user" U0 LEFT OUTER JOIN "udjango_report" U1 ON (U0."id" = U1."user_id") WHERE U1."id" IS NULL)) AND "udjango_user"."email" LIKE 'a%' ESCAPE '\') LIMIT 21; args=('a%',)
2019-12-06 11:45:17[INFO]root.main():
2019-12-06 11:45:17[INFO]root.main(): users with reports (no duplicates, simpler SQL)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_user"."id" IN (SELECT U0."user_id" FROM "udjango_report" U0)) LIMIT 21; args=('a%',)
2019-12-06 11:45:17[INFO]root.main():
2019-12-06 11:45:17[INFO]root.main(): users with reports (EXISTS clause, Django 3.0)
2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE (EXISTS(SELECT U0."id", U0."user_id" FROM "udjango_report" U0 WHERE U0."user_id" = "udjango_user"."id") AND "udjango_user"."email" LIKE 'a%' ESCAPE '\') LIMIT 21; args=('a%',)
2019-12-06 11:45:17[INFO]root.main():
2019-12-06 11:45:17[INFO]root.main(): Done.
You can see that the final query uses all inner joins.