In a Django QuerySet, how to filter for “not exists” in a many-to-one relationship

后端 未结 7 1925
失恋的感觉
失恋的感觉 2020-12-23 19:23

I have two models like this:

class User(models.Model):
    email = models.EmailField()

class Report(models.Model):
    user = models.ForeignKey(User)
         


        
7条回答
  •  臣服心动
    2020-12-23 19:45

    Alasdair's answer is helpful, but I don't like using distinct(). It can sometimes be useful, but it's usually a code smell telling you that you messed up your joins.

    Luckily, Django's queryset lets you filter on subqueries. With Django 3.0, you can also use an exists clause.

    Here are a few ways to run the queries from your question:

    # Tested with Django 3.0 and Python 3.6
    import logging
    import sys
    
    import django
    from django.apps import apps
    from django.apps.config import AppConfig
    from django.conf import settings
    from django.db import connections, models, DEFAULT_DB_ALIAS
    from django.db.models import Exists, OuterRef
    from django.db.models.base import ModelBase
    
    NAME = 'udjango'
    DB_FILE = NAME + '.db'
    
    
    def main():
        setup()
    
        class User(models.Model):
            email = models.EmailField()
    
            def __repr__(self):
                return 'User({!r})'.format(self.email)
    
        class Report(models.Model):
            user = models.ForeignKey(User, on_delete=models.CASCADE)
    
        syncdb(User)
        syncdb(Report)
    
        anne = User.objects.create(email='anne@example.com')
        User.objects.create(email='adam@example.com')
        alice = User.objects.create(email='alice@example.com')
        User.objects.create(email='bob@example.com')
    
        Report.objects.create(user=anne)
        Report.objects.create(user=alice)
        Report.objects.create(user=alice)
    
        logging.info('users without reports')
        logging.info(User.objects.filter(report__isnull=True, email__startswith='a'))
    
        logging.info('users with reports (allows duplicates)')
        logging.info(User.objects.filter(report__isnull=False, email__startswith='a'))
    
        logging.info('users with reports (no duplicates)')
        logging.info(User.objects.exclude(report__isnull=True).filter(email__startswith='a'))
    
        logging.info('users with reports (no duplicates, simpler SQL)')
        report_user_ids = Report.objects.values('user_id')
        logging.info(User.objects.filter(id__in=report_user_ids, email__startswith='a'))
    
        logging.info('users with reports (EXISTS clause, Django 3.0)')
        logging.info(User.objects.filter(
            Exists(Report.objects.filter(user_id=OuterRef('id'))),
            email__startswith='a'))
    
        logging.info('Done.')
    
    
    def setup():
        with open(DB_FILE, 'w'):
            pass  # wipe the database
        settings.configure(
            DEBUG=True,
            DATABASES={
                DEFAULT_DB_ALIAS: {
                    'ENGINE': 'django.db.backends.sqlite3',
                    'NAME': DB_FILE}},
            LOGGING={'version': 1,
                     'disable_existing_loggers': False,
                     'formatters': {
                        'debug': {
                            'format': '%(asctime)s[%(levelname)s]'
                                      '%(name)s.%(funcName)s(): %(message)s',
                            'datefmt': '%Y-%m-%d %H:%M:%S'}},
                     'handlers': {
                        'console': {
                            'level': 'DEBUG',
                            'class': 'logging.StreamHandler',
                            'formatter': 'debug'}},
                     'root': {
                        'handlers': ['console'],
                        'level': 'INFO'},
                     'loggers': {
                        "django.db": {"level": "DEBUG"}}})
        app_config = AppConfig(NAME, sys.modules['__main__'])
        apps.populate([app_config])
        django.setup()
        original_new_func = ModelBase.__new__
    
        @staticmethod
        def patched_new(cls, name, bases, attrs):
            if 'Meta' not in attrs:
                class Meta:
                    app_label = NAME
                attrs['Meta'] = Meta
            return original_new_func(cls, name, bases, attrs)
        ModelBase.__new__ = patched_new
    
    
    def syncdb(model):
        """ Standard syncdb expects models to be in reliable locations.
    
        Based on https://github.com/django/django/blob/1.9.3
        /django/core/management/commands/migrate.py#L285
        """
        connection = connections[DEFAULT_DB_ALIAS]
        with connection.schema_editor() as editor:
            editor.create_model(model)
    
    
    main()
    

    If you put that into a Python file and run it, you should see something like this:

    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = OFF; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) BEGIN; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE TABLE "udjango_user" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "email" varchar(254) NOT NULL); (params None)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE TABLE "udjango_user" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "email" varchar(254) NOT NULL); args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_key_check; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = ON; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = OFF; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) BEGIN; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE TABLE "udjango_report" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "user_id" integer NOT NULL REFERENCES "udjango_user" ("id") DEFERRABLE INITIALLY DEFERRED); (params None)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE TABLE "udjango_report" ("id" integer NOT NULL PRIMARY KEY AUTOINCREMENT, "user_id" integer NOT NULL REFERENCES "udjango_user" ("id") DEFERRABLE INITIALLY DEFERRED); args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_key_check; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.schema.execute(): CREATE INDEX "udjango_report_user_id_60bc619c" ON "udjango_report" ("user_id"); (params ())
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) CREATE INDEX "udjango_report_user_id_60bc619c" ON "udjango_report" ("user_id"); args=()
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) PRAGMA foreign_keys = ON; args=None
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.017) INSERT INTO "udjango_user" ("email") VALUES ('anne@example.com'); args=['anne@example.com']
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.023) INSERT INTO "udjango_user" ("email") VALUES ('adam@example.com'); args=['adam@example.com']
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.022) INSERT INTO "udjango_user" ("email") VALUES ('alice@example.com'); args=['alice@example.com']
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.022) INSERT INTO "udjango_user" ("email") VALUES ('bob@example.com'); args=['bob@example.com']
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.029) INSERT INTO "udjango_report" ("user_id") VALUES (1); args=[1]
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.033) INSERT INTO "udjango_report" ("user_id") VALUES (3); args=[3]
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.033) INSERT INTO "udjango_report" ("user_id") VALUES (3); args=[3]
    2019-12-06 11:45:17[INFO]root.main(): users without reports
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" LEFT OUTER JOIN "udjango_report" ON ("udjango_user"."id" = "udjango_report"."user_id") WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_report"."id" IS NULL) LIMIT 21; args=('a%',)
    2019-12-06 11:45:17[INFO]root.main(): 
    2019-12-06 11:45:17[INFO]root.main(): users with reports (allows duplicates)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" INNER JOIN "udjango_report" ON ("udjango_user"."id" = "udjango_report"."user_id") WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_report"."id" IS NOT NULL) LIMIT 21; args=('a%',)
    2019-12-06 11:45:17[INFO]root.main(): 
    2019-12-06 11:45:17[INFO]root.main(): users with reports (no duplicates)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE (NOT ("udjango_user"."id" IN (SELECT U0."id" FROM "udjango_user" U0 LEFT OUTER JOIN "udjango_report" U1 ON (U0."id" = U1."user_id") WHERE U1."id" IS NULL)) AND "udjango_user"."email" LIKE 'a%' ESCAPE '\') LIMIT 21; args=('a%',)
    2019-12-06 11:45:17[INFO]root.main(): 
    2019-12-06 11:45:17[INFO]root.main(): users with reports (no duplicates, simpler SQL)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE ("udjango_user"."email" LIKE 'a%' ESCAPE '\' AND "udjango_user"."id" IN (SELECT U0."user_id" FROM "udjango_report" U0)) LIMIT 21; args=('a%',)
    2019-12-06 11:45:17[INFO]root.main(): 
    2019-12-06 11:45:17[INFO]root.main(): users with reports (EXISTS clause, Django 3.0)
    2019-12-06 11:45:17[DEBUG]django.db.backends.debug_sql(): (0.000) SELECT "udjango_user"."id", "udjango_user"."email" FROM "udjango_user" WHERE (EXISTS(SELECT U0."id", U0."user_id" FROM "udjango_report" U0 WHERE U0."user_id" = "udjango_user"."id") AND "udjango_user"."email" LIKE 'a%' ESCAPE '\') LIMIT 21; args=('a%',)
    2019-12-06 11:45:17[INFO]root.main(): 
    2019-12-06 11:45:17[INFO]root.main(): Done.
    

    You can see that the final query uses all inner joins.

提交回复
热议问题