4

如果 A 包含一组 B 而 B 包含一组 C,那么我正在寻找一种从 A 开始并以 C 的查询集结束的方法。

一个简单的例子:

class Book(models.Model):
    name = models.CharField(max_length=64)


class Page(models.Model):
    number = models.IntegerField()
    book = models.ForeignKey(Book)


class Paragraph(models.Model):
    number = models.IntegerField()
    page = models.ForeignKey(Page)


def query():
    books = Book.objects.all()\
       .prefetch_related('page_set', 'page_set__paragraph_set')

    for book in books:
        pages = book.page_set

        # I need to do something like this
        paragraphs = pages.all().paragraph_set
        # invalid

        # or
        paragraphs = book.page_set.select_related('paragraph_set')
        # valid, but paragraphs is still a QuerySet of Pages

        # this works, but results in one query for EVERY book,
        # which is what I need to avoid
        paragraphs = Paragraph.objects.filter(page__book=book)


        # do stuff with the book
        #...


        # do stuff with the paragraphs in the book
        # ...

如何从 Book 实例中获取一组查询段落?

Django 查询的命名 args 语法支持集合/外键关系的无限嵌套,但我找不到使用 ORM 映射从自上而下实际获取相关查询集的方法。

并且从自下而上获取查询集会否定prefetch_related/的好处select_related

上面的示例是我需要在我的应用程序中执行的操作的简化版本。数据库有成千上万的“书籍”,必须避免任何 n + 1 查询。

我发现了一个关于跨多个级别使用预取的问题,但答案没有解决如何实际获取获取的查询集以供使用。

4

2 回答 2

1

完成预取后,访问子记录的唯一廉价方法似乎是通过all(). 任何过滤器似乎都会触发另一个数据库查询。

关于书中所有段落的问题的简短回答是使用具有两个级别的列表理解:

    paragraphs = [paragraph
                  for page in book.page_set.all()
                  for paragraph in page.paragraph_set.all()]

这是一个可运行的示例:

# Tested with Django 1.11.13
from __future__ import print_function
import os
import sys

import django
from django.apps import apps
from django.apps.config import AppConfig
from django.conf import settings
from django.core.files.base import ContentFile, File
from django.db import connections, models, DEFAULT_DB_ALIAS
from django.db.models.base import ModelBase

from django_mock_queries.mocks import MockSet, mocked_relations

NAME = 'udjango'


def main():
    setup()

    class Book(models.Model):
        name = models.CharField(max_length=64)

    class Page(models.Model):
        number = models.IntegerField()
        book = models.ForeignKey(Book)

    class Paragraph(models.Model):
        number = models.IntegerField()
        page = models.ForeignKey(Page)

    syncdb(Book)
    syncdb(Page)
    syncdb(Paragraph)

    b = Book.objects.create(name='Gone With The Wind')
    p = b.page_set.create(number=1)
    p.paragraph_set.create(number=1)
    b = Book.objects.create(name='The Three Body Problem')
    p = b.page_set.create(number=1)
    p.paragraph_set.create(number=1)
    p.paragraph_set.create(number=2)
    p = b.page_set.create(number=2)
    p.paragraph_set.create(number=1)
    p.paragraph_set.create(number=2)

    books = Book.objects.all().prefetch_related('page_set',
                                                'page_set__paragraph_set')

    for book in books:
        print(book.name)
        paragraphs = [paragraph
                      for page in book.page_set.all()
                      for paragraph in page.paragraph_set.all()]
        for paragraph in paragraphs:
            print(paragraph.page.number, paragraph.number)


def setup():
    DB_FILE = NAME + '.db'
    with open(DB_FILE, 'w'):
        pass  # wipe the database
    settings.configure(
        DEBUG=True,
        DATABASES={
            DEFAULT_DB_ALIAS: {
                'ENGINE': 'django.db.backends.sqlite3',
                'NAME': DB_FILE}},
        LOGGING={'version': 1,
                 'disable_existing_loggers': False,
                 'formatters': {
                    'debug': {
                        'format': '%(asctime)s[%(levelname)s]'
                                  '%(name)s.%(funcName)s(): %(message)s',
                        'datefmt': '%Y-%m-%d %H:%M:%S'}},
                 'handlers': {
                    'console': {
                        'level': 'DEBUG',
                        'class': 'logging.StreamHandler',
                        'formatter': 'debug'}},
                 'root': {
                    'handlers': ['console'],
                    'level': 'WARN'},
                 'loggers': {
                    "django.db": {"level": "DEBUG"}}})
    app_config = AppConfig(NAME, sys.modules['__main__'])
    apps.populate([app_config])
    django.setup()
    original_new_func = ModelBase.__new__

    @staticmethod
    def patched_new(cls, name, bases, attrs):
        if 'Meta' not in attrs:
            class Meta:
                app_label = NAME
            attrs['Meta'] = Meta
        return original_new_func(cls, name, bases, attrs)
    ModelBase.__new__ = patched_new


def syncdb(model):
    """ Standard syncdb expects models to be in reliable locations.

    Based on https://github.com/django/django/blob/1.9.3
    /django/core/management/commands/migrate.py#L285
    """
    connection = connections[DEFAULT_DB_ALIAS]
    with connection.schema_editor() as editor:
        editor.create_model(model)

main()

这是输出的结尾。您可以看到它只为每个表运行一个查询。

2018-10-30 15:58:25[DEBUG]django.db.backends.execute(): (0.000) SELECT "udjango_book"."id", "udjango_book"."name" FROM "udjango_book"; args=()
2018-10-30 15:58:25[DEBUG]django.db.backends.execute(): (0.000) SELECT "udjango_page"."id", "udjango_page"."number", "udjango_page"."book_id" FROM "udjango_page" WHERE "udjango_page"."book_id" IN (1, 2); args=(1, 2)
2018-10-30 15:58:25[DEBUG]django.db.backends.execute(): (0.000) SELECT "udjango_paragraph"."id", "udjango_paragraph"."number", "udjango_paragraph"."page_id" FROM "udjango_paragraph" WHERE "udjango_paragraph"."page_id" IN (1, 2, 3); args=(1, 2, 3)
Gone With The Wind
1 1
The Three Body Problem
1 1
1 2
2 1
2 2
于 2018-10-30T23:02:44.990 回答
1

进一步唐的回答,你可以使用Prefetch对象来应用你想要的任何过滤器,例如:

from django.db import models, connection

def query():
    paragraph_filter = models.Prefetch(
        'page_set__paragraph_set',
        Paragraph.objects.filter(number__gt=1))

    books = Book.objects.all().prefetch_related(
        'page_set', paragraph_filter)

    for book in books:
        for page in book.page_set.all():
            for paragraph in page.paragraph_set.all():
                print(paragraph)

    print(connection.queries)

Django 负责确保在少量查询中加载所有适当的对象(每个表一个,因此您将获得三个查询)

于 2018-10-30T23:18:33.440 回答