If you use Django Activity Stream, you’re likely to run into query performance problems when querying Action
instances when implementing a “most recent activity” style stream.
The reason for this performance hit is because the Action
model has actor
, action_object
and target
generic foreign keys. For each action
queried, you’ll also end up querying the User
table (Actor), the ContentType
table for the GFK types (action_object
and target
) and then their respective tables. This isn’t an N+1
query, it’s closer to an (N + 1) * (K + 1)
query, where N
is the number of Action
instances and K
is the number of distinct ContentType
used.
With some prefetching, we should be able to get this down to K + 1
query.
This is a solution derived from this old Django snippet, specifically for Django Activity Stream.
First off, let’s start with an implementation to prefetch relations for all Generic Foreign Keys of a queryset. This process goes as follows:
ContentType
information for distinct types through the GFK fieldsimport collections
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
def prefetch_relations(queryset):
"""
Prefetch content type relations for GenericForeignKeys to reduce N+1 queries.
"""
# Get all GenericForeignKey fields on all models of the queryset.
gfks = _queryset_generic_foreign_keys(queryset)
# Get mapping of GFK content_type -> list of GFK object_pks for all GFK's on the queryset.
gfks_data = _content_type_to_content_mapping_for_gfks(queryset, gfks)
for content_type, object_pks in gfks_data.items():
# Get all model instances referenced through a GFK.
gfk_models = prefetch_relations(
content_type.model_class().objects.filter(pk__in=object_pks).select_related()
)
for gfk_model in gfk_models:
for gfk in _queryset_gfk_content_generator(queryset, gfks):
qs_model, gfk_field_name, gfk_content_type, gfk_object_pk = gfk
if gfk_content_type != content_type:
continue
if gfk_object_pk != str(gfk_model.pk): # str compare otherwise UUID PK's puke. :(
continue
setattr(qs_model, gfk_field_name, gfk_model)
return queryset
def _queryset_generic_foreign_keys(queryset):
"""
Build mapping of name -> field for GenericForeignKey fields on the queryset.
"""
gfks = {}
for name, field in queryset.model.__dict__.items():
if not isinstance(field, GenericForeignKey):
continue
gfks[name] = field
return gfks
def _queryset_gfk_content_generator(queryset, gfks):
"""
Generator function that yields information about all GenericForeignKey fields for all models of a queryset.
"""
for model in queryset:
for field_name, field in gfks.items():
content_type_id = getattr(model, field.model._meta.get_field(field.ct_field).get_attname())
if not content_type_id:
continue
content_type = ContentType.objects.get_for_id(content_type_id)
object_pk = str(getattr(model, field.fk_field))
yield (model, field_name, content_type, object_pk)
def _content_type_to_content_mapping_for_gfks(queryset, gfks):
"""
Build mapping of content_type -> [content_pk] for the given queryset and its generic foreign keys.
"""
data = collections.defaultdict(list)
for model, field_name, content_type, object_pk in _queryset_gfk_content_generator(queryset, gfks):
data[content_type].append(object_pk)
return data
Now that we can functionality for prefetching generic relation values, we need to modify the QuerySet
and Manager
implementations we use for the Django Activity Stream models.
from actstream import gfk, managers
from .prefetch import prefetch_relations
class PrefetchGFKQuerySet(gfk.GFKQuerySet):
"""
QuerySet that extends :class:`~actstream.gfk.GFKQuerySet` to use
:func:`~activity.prefetch.prefetch_relations` instead of the default implementation.
"""
def fetch_generic_relations(self, *args):
return prefetch_relations(self)
class ActionManager(managers.ActionManager):
"""
Manager for :class:`~activity.models.Action`.
"""
def get_queryset(self):
return PrefetchGFKQuerySet(self.model)
class FollowManager(managers.FollowManager):
"""
Manager for :class:`~activity.models.Follow`.
"""
def get_queryset(self):
return PrefetchGFKQuerySet(self.model)
Now we’ll just setup some proxy models for the activity stream base models so we can override the default manager.
from actstream import models
class Action(models.Action):
"""
Proxy object to activity stream action model.
"""
objects = managers.ActionManager()
class Meta:
proxy = True
class Follow(models.Follow):
"""
Proxy object to activity stream action model.
"""
objects = managers.FollowManager()
class Meta:
proxy = True
Now queries like Action.objects.any
, Action.objects.user
, and Follow.objects.followers_qs
will all prefetch objects through their generic foreign keys.