django-json-api · jarekwg · Jul 22, 2021 · Jul 22, 2021 · Jul 22, 2021 · Jul 22, 2021
diff --git a/AUTHORS b/AUTHORS
@@ -11,6 +11,7 @@ David Vogt <[email protected]>
 Felix Viernickel <[email protected]>
 Greg Aker <[email protected]>
 Jamie Bliss <[email protected]>
+Jarek Głowacki <[email protected]>
 Jason Housley <[email protected]>
 Jeppe Fihl-Pearson <[email protected]>
 Jerel Unruh <[email protected]>

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,9 @@ any parts of the framework not mentioned in the documentation should generally b
 ### Changed
 
 * Moved resolving of `included_serialzers` and `related_serializers` classes to serializer's meta class.
+* `AutoPrefetchMixin` updated to be more clever about how relationships are prefetched, with recursion all the way down.
+* Expensive reverse relations are now automatically excluded from queries that don't explicitly name them in sparsefieldsets. Set `INCLUDE_EXPENSVE_FIELDS` to revert to old behaviour.
+* Removed `PreloadIncludesMixin`, as the logic did not work when nesting includes, and the laborious effort needed in its manual config was unnecessary. This removes support for `prefetch_for_includes` and `select_for_includes`
 
 ### Deprecated
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -226,8 +226,10 @@ from models import MyModel
 class MyViewset(ModelViewSet):
     queryset = MyModel.objects.all()
     serializer_class = MyModelSerializer
-    filter_backends = (filters.QueryParameterValidationFilter, filters.OrderingFilter,
-	                   django_filters.DjangoFilterBackend, SearchFilter)
+    filter_backends = (
+      filters.QueryParameterValidationFilter, filters.OrderingFilter,
+      django_filters.DjangoFilterBackend, SearchFilter
+    )
     filterset_fields = {
         'id': ('exact', 'lt', 'gt', 'gte', 'lte', 'in'),
         'descriptuon': ('icontains', 'iexact', 'contains'),
@@ -387,7 +389,7 @@ Example without format conversion:
 
 ``` js
 {
-	"data": [{
+    "data": [{
         "type": "blog_identity",
         "id": "3",
         "attributes": {
@@ -410,7 +412,7 @@ When set to dasherize:
 
 ``` js
 {
-	"data": [{
+    "data": [{
         "type": "blog-identity",
         "id": "3",
         "attributes": {
@@ -436,7 +438,7 @@ Example without pluralization:
 
 ``` js
 {
-	"data": [{
+    "data": [{
         "type": "identity",
         "id": "3",
         "attributes": {
@@ -459,7 +461,7 @@ When set to pluralize:
 
 ``` js
 {
-	"data": [{
+    "data": [{
         "type": "identities",
         "id": "3",
         "attributes": {
@@ -643,7 +645,7 @@ and increase performance.
 
 #### SerializerMethodResourceRelatedField
 
-`relations.SerializerMethodResourceRelatedField` combines behaviour of DRF `SerializerMethodField` and 
+`relations.SerializerMethodResourceRelatedField` combines behaviour of DRF `SerializerMethodField` and
 `ResourceRelatedField`, so it accepts `method_name` together with `model` and links-related arguments.
 `data` is rendered in `ResourceRelatedField` manner.
 
@@ -940,28 +942,12 @@ class QuestSerializer(serializers.ModelSerializer):
 
 #### Performance improvements
 
-Be aware that using included resources without any form of prefetching **WILL HURT PERFORMANCE** as it will introduce m\*(n+1) queries.
+Be aware that reverse relationships and M2Ms can be expensive to prepare.
 
-A viewset helper was therefore designed to automatically preload data when possible. Such is automatically available when subclassing `ModelViewSet` or `ReadOnlyModelViewSet`.
+As a result, these are excluded by default unless explicitly demanded with sparsefieldsets.
 
-It also allows to define custom `select_related` and `prefetch_related` for each requested `include` when needed in special cases:
+You can opt out of this auto-exclusion with the `JSON_API_INCLUDE_EXPENSVE_FIELDS` setting.
 
-`rest_framework_json_api.views.ModelViewSet`:
-```python
-from rest_framework_json_api import views
-
-# When MyViewSet is called with ?include=author it will dynamically prefetch author and author.bio
-class MyViewSet(views.ModelViewSet):
-    queryset = Book.objects.all()
-    select_for_includes = {
-        'author': ['author__bio'],
-    }
-    prefetch_for_includes = {
-        '__all__': [],
-        'all_authors': [Prefetch('all_authors', queryset=Author.objects.select_related('bio'))],
-        'category.section': ['category']
-    }
-```
 
 An additional convenience DJA class exists for read-only views, just as it does in DRF.
 ```python
@@ -971,31 +957,6 @@ class MyReadOnlyViewSet(views.ReadOnlyModelViewSet):
     # ...
 ```
 
-The special keyword `__all__` can be used to specify a prefetch which should be done regardless of the include, similar to making the prefetch yourself on the QuerySet.
-
-Using the helper to prefetch, rather than attempting to minimise queries via `select_related` might give you better performance depending on the characteristics of your data and database.
-
-For example:
-
-If you have a single model, e.g. Book, which has four relations e.g. Author, Publisher, CopyrightHolder, Category.
-
-To display 25 books and related models, you would need to either do:
-
-a) 1 query via selected_related, e.g. SELECT * FROM books LEFT JOIN author LEFT JOIN publisher LEFT JOIN CopyrightHolder LEFT JOIN Category
-
-b) 4 small queries via prefetch_related.
-
-If you have 1M books, 50k authors, 10k categories, 10k copyrightholders
-in the `select_related` scenario, you've just created a in-memory table
-with 1e18 rows which will likely exhaust any available memory and
-slow your database to crawl.
-
-The `prefetch_related` case will issue 4 queries, but they will be small and fast queries.
-<!--
-### Relationships
-### Errors
--->
-
 ## Generating an OpenAPI Specification (OAS) 3.0 schema document
 
 DRF >= 3.12 has a [new OAS schema functionality](https://www.django-rest-framework.org/api-guide/schemas/) to generate an
@@ -1115,4 +1076,3 @@ urlpatterns = [
     ...
 ]
 ```
-
diff --git a/example/tests/unit/test_filter_schema_params.py b/example/tests/unit/test_filter_schema_params.py
@@ -20,8 +20,6 @@ class DummyEntryViewSet(EntryViewSet):
     }
 
     def __init__(self, **kwargs):
-        # dummy up self.request since PreloadIncludesMixin expects it to be defined
-        self.request = None
         super(DummyEntryViewSet, self).__init__(**kwargs)
 
 

diff --git a/example/views.py b/example/views.py
@@ -236,11 +236,6 @@ def get_serializer_class(self):
 class CommentViewSet(ModelViewSet):
     queryset = Comment.objects.all()
     serializer_class = CommentSerializer
-    select_for_includes = {"writer": ["author__bio"]}
-    prefetch_for_includes = {
-        "__all__": [],
-        "author": ["author__bio", "author__entries"],
-    }
 
     def get_queryset(self, *args, **kwargs):
         entry_pk = self.kwargs.get("entry_pk", None)
@@ -285,7 +280,3 @@ class AuthorRelationshipView(RelationshipView):
 class LabResultViewSet(ReadOnlyModelViewSet):
     queryset = LabResults.objects.all()
     serializer_class = LabResultsSerializer
-    prefetch_for_includes = {
-        "__all__": [],
-        "author": ["author__bio", "author__entries"],
-    }
diff --git a/rest_framework_json_api/serializers.py b/rest_framework_json_api/serializers.py
@@ -29,6 +29,9 @@
     get_resource_type_from_serializer,
 )
 
+from .settings import json_api_settings
+from .utils.serializers import get_expensive_relational_fields
+
 
 class ResourceIdentifierObjectSerializer(BaseSerializer):
     default_error_messages = {
@@ -153,6 +156,43 @@ def validate_path(serializer_class, field_path, path):
         super(IncludedResourcesValidationMixin, self).__init__(*args, **kwargs)
 
 
+class OnDemandFieldsMixin:
+    """
+    Automatically certain fields from the serializer that have been deemed expensive.
+    In order to see these fields, the client must explcitly request them.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        # Pop any fields off the serializer that shouldn't come through.
+        for field in self.get_excluded_ondemand_fields():
+            self.fields.pop(field, None)
+
+    def get_excluded_ondemand_fields(self) -> list[str]:
+        """
+        Determine which fields should be popped off if not explicitly asked for.
+        Will not nominate any fields that have been designated as `demanded_fields` in context.
+        Ondemand fields are determined in like so:
+        - Fields that we automatically determine to be expensive, and thus automatically remove
+           from the default offering. Currently such fields are M2Ms and reverse FKs.
+        """
+        if json_api_settings.INCLUDE_EXPENSVE_FIELDS:
+            return set()
+
+        # If we've instantiated the serializer ourselves, we'll have fed `demanded_fields` into its context.
+        # If it's happened as part of drf render internals, then we have a fallback where the view
+        # has provided the entire sparsefields context for us to pick through.
+        if 'demanded_fields' in self.context:
+            demanded_fields = set(self.context.get('demanded_fields'))
+        else:
+            resource_name = get_resource_type_from_serializer(type(self))
+            demanded_fields = set(self.context.get('all_sparsefields', {}).get(resource_name, []))
+
+        # We only want to exclude those ondemand fields that haven't been explicitly requested.
+        return set(get_expensive_relational_fields(type(self))) - set(demanded_fields)
+
+
 class LazySerializersDict(Mapping):
     """
     A dictionary of serializers which lazily import dotted class path and self.
@@ -207,6 +247,7 @@ def __new__(cls, name, bases, attrs):
 # If user imports serializer from here we can catch class definition and check
 # nested serializers for depricated use.
 class Serializer(
+    OnDemandFieldsMixin,
     IncludedResourcesValidationMixin,
     SparseFieldsetsMixin,
     Serializer,
@@ -230,6 +271,7 @@ class Serializer(
 
 
 class HyperlinkedModelSerializer(
+    OnDemandFieldsMixin,
     IncludedResourcesValidationMixin,
     SparseFieldsetsMixin,
     HyperlinkedModelSerializer,
@@ -250,6 +292,7 @@ class HyperlinkedModelSerializer(
 
 
 class ModelSerializer(
+    OnDemandFieldsMixin,
     IncludedResourcesValidationMixin,
     SparseFieldsetsMixin,
     ModelSerializer,

diff --git a/rest_framework_json_api/settings.py b/rest_framework_json_api/settings.py
@@ -15,6 +15,7 @@
     "FORMAT_RELATED_LINKS": False,
     "PLURALIZE_TYPES": False,
     "UNIFORM_EXCEPTIONS": False,
+    "INCLUDE_EXPENSVE_FIELDS": False,
 }
 
 

diff --git a/rest_framework_json_api/utils.py → rest_framework_json_api/utils/__init__.py b/rest_framework_json_api/utils.py → rest_framework_json_api/utils/__init__.py
@@ -16,7 +16,7 @@
 from rest_framework import exceptions
 from rest_framework.exceptions import APIException
 
-from .settings import json_api_settings
+from ..settings import json_api_settings
 
 # Generic relation descriptor from django.contrib.contenttypes.
 if "django.contrib.contenttypes" not in settings.INSTALLED_APPS:  # pragma: no cover
@@ -472,3 +472,33 @@ def format_errors(data):
     if len(data) > 1 and isinstance(data, list):
         data.sort(key=lambda x: x.get("source", {}).get("pointer", ""))
     return {"errors": data}
+
+
+def includes_to_dict(includes: list[str]) -> dict:
+    """
+    Converts a bunch of jsonapi includes
+    [
+        'property.client',
+        'property.client.clientgroup',
+        'property.client.task_set.branch',
+        'property.branch',
+    ]
+    to a nested dict, ready for traversal
+    {
+        property: {
+            client: {
+                clientgroup: {},
+                task_set: {
+                    branch: {},
+                },
+            },
+            branch: {},
+        },
+    }
+    """
+    res = {}
+    for include in includes:
+        pos = res
+        for relational_field in include.split('.'):
+            pos = pos.setdefault(relational_field, {})
+    return res