0

我有超过 100,000,000 个页面 URL,我怎样才能使 QuerySet 是动态的,因为每个类将有 10,000 个唯一 URL,而无需手动创建 10,000 个类中的整数?


# sitemap.py account_

from django.contrib.sitemaps import Sitemap
from django.shortcuts import reverse
from appname.models import Page
import datetime

from appname.sitemaps import Page000001
from appname.sitemaps import Page000002

ps_dict_01 = {
"ps_file_000001": Page000001,
"ps_file_000002": Page000002,
{

class Page000001(Sitemap):
    def items(self):
        return Passage.objects.all()[:10000]

    lastmod = datetime.datetime.now()
    changefreq = 'hourly'
    priority = 1.0
    protocol = 'http'

class Page000002(Sitemap):
    def items(self):
        return Passage.objects.all()[10000:20000]

    lastmod = datetime.datetime.now()
    changefreq = 'hourly'
    priority = 1.0
    protocol = 'http'

4

1 回答 1

0

您应该能够将查询集拆分为块并生成站点地图。

作为从这个要点分块查询集的示例

# utils.py

def queryset_iterator(queryset, chunk_size=1000):
    """
    Iterate over a Django Queryset ordered by the primary key
    This method loads a maximum of chunk_size (default: 1000) rows in it's
    memory at the same time while django normally would load all rows in it's
    memory. Using the iterator() method only causes it to not preload all the
    classes.
    Note that the implementation of the iterator does not support ordered query sets.
    """
        try:
            last_pk = queryset.order_by('-pk')[:1].get().pk
        except ObjectDoesNotExist:
            return

        pk = 0
        queryset = queryset.order_by('pk')
        while pk < last_pk:
            for row in queryset.filter(pk__gt=pk)[:chunk_size]:
                pk = row.pk
                yield row
            gc.collect()

您应该能够采用这种方法来生成站点地图

# sitemaps.py

from django.contrib.sitemaps import Sitemap

from appname.models import Page

from .utils import queryset_iterator


def generate_sitemaps():
    sitemaps = {}
    qs = Page.objects.all()
    i = 1
    for chunk in queryset_iterator(qs, chunk_size=10000):
        _sitemap = PageSitemap(items=chunk)
        sitemaps[f"page_{i}"] = _sitemap
        i += 1
    return sitemaps


class PageSitemap(Sitemap):
    changefreq = "never"
    priority = 0.5

    def __init__(self, items=None):
        if items:
            self.items = items
        super().__init__()

    def items(self):
        if self.items:
            return items
        else:
            return Page.objects.all()
# urls

from django.contrib.sitemaps.views import sitemap
from django.views.decorators.cache import cache_page

from .sitemaps import generate_sitemaps


urlpatterns = [
    path(
        'sitemap.xml',
        cache_page(timeout=60 * 60, cache='pages')(sitemap),
        {'sitemaps': generate_sitemaps()}
    ),
]
于 2021-07-09T09:00:28.503 回答