From 293180127d066640b7cfefcec6dbff75aa1bf24e Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Mon, 23 Jun 2025 16:58:42 -0400 Subject: [PATCH 1/3] WIP --- mongoengine/base/datastructures.py | 57 ++++++++++++++++++++++++++ mongoengine/fields.py | 64 ++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/mongoengine/base/datastructures.py b/mongoengine/base/datastructures.py index dcb8438c7..e6d9a42c8 100644 --- a/mongoengine/base/datastructures.py +++ b/mongoengine/base/datastructures.py @@ -472,3 +472,60 @@ def __getattr__(self, name): def __repr__(self): return f"" + + +class RawDict: + def __init__(self, data, deserialize_method): + self._data = data + self.deserialize_method = deserialize_method + + def deserialize(self): + return self.deserialize_method(self._data) + + def __setitem__(self, key, item): + self._data[key] = item + + def __getitem__(self, key): + return self._data[key] + + def __repr__(self): + return repr(self._data) + + def __len__(self): + return len(self._data) + + def __delitem__(self, key): + del self._data[key] + + def clear(self): + return self._data.clear() + + def copy(self): + return self._data.copy() + + def has_key(self, k): + return k in self._data + + def update(self, *args, **kwargs): + return self._data.update(*args, **kwargs) + + def keys(self): + return self._data.keys() + + def values(self): + return self._data.values() + + def items(self): + return self._data.items() + + def pop(self, *args): + return self._data.pop(*args) + + def __cmp__(self, dict_): + return self.__cmp__(self._data, dict_) + + def __contains__(self, item): + return item in self._data + + def __iter__(self): + return iter(self._data) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 980098dfb..f0c0d5f58 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -16,6 +16,8 @@ from bson.decimal128 import Decimal128, create_decimal128_context from pymongo import ReturnDocument +from mongoengine.base.datastructures import RawDict + try: import dateutil except ImportError: @@ -81,6 +83,7 @@ "SortedListField", "EmbeddedDocumentListField", "DictField", + "LazyDictField", "MapField", "ReferenceField", "CachedReferenceField", @@ -1035,6 +1038,7 @@ def key_starts_with_dollar(d): return True +# TODO: make a LazyDictField that lazily deferences on access class DictField(ComplexBaseField): """A dictionary field that wraps a standard Python dictionary. This is similar to an embedded document, but the structure is not defined. @@ -1089,6 +1093,66 @@ def prepare_query_value(self, op, value): return super().prepare_query_value(op, value) +class LazyDictField(ComplexBaseField): + """A lazy dictionary field that wraps a standard Python dictionary. + Unlike the :class:`~mongoengine.fields.DictField`, it will + **not** be automatically deserialized. Manual deserialization must be triggered + using the ``deserialize()`` method. + + .. note:: + Required means it cannot be empty - as the default for DictFields is {} + """ + + def __init__(self, field=None, *args, **kwargs): + kwargs.setdefault("default", dict) + super().__init__(*args, field=field, **kwargs) + self.set_auto_dereferencing(False) + + def validate(self, value): + """Make sure that a list of valid fields is being used.""" + if not isinstance(value, dict): + self.error("Only dictionaries may be used in a DictField") + + if key_not_string(value): + msg = "Invalid dictionary key - documents must have only string keys" + self.error(msg) + + # Following condition applies to MongoDB >= 3.6 + # older Mongo has stricter constraints but + # it will be rejected upon insertion anyway + # Having a validation that depends on the MongoDB version + # is not straightforward as the field isn't aware of the connected Mongo + if key_starts_with_dollar(value): + self.error( + 'Invalid dictionary key name - keys may not startswith "$" characters' + ) + super().validate(value) + + def lookup_member(self, member_name): + return DictField(db_field=member_name) + + def prepare_query_value(self, op, value): + match_operators = [*STRING_OPERATORS] + + if op in match_operators and isinstance(value, str): + return StringField().prepare_query_value(op, value) + + if hasattr( + self.field, "field" + ): # Used for instance when using DictField(ListField(IntField())) + if op in ("set", "unset") and isinstance(value, dict): + return { + k: self.field.prepare_query_value(op, v) for k, v in value.items() + } + return self.field.prepare_query_value(op, value) + + return super().prepare_query_value(op, value) + + def to_python(self, value): + self._data = RawDict(value, super().to_python) + return self._data + + class MapField(DictField): """A field that maps a name to a specified field type. Similar to a DictField, except the 'value' of each item must match the specified From d4619bd337902111495ed535bf7c7691e9c42656 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 24 Jun 2025 10:27:05 -0400 Subject: [PATCH 2/3] INTPYTHON-617 - Improve DictField to_python performance --- mongoengine/fields.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index f0c0d5f58..9c0b13203 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -1038,7 +1038,6 @@ def key_starts_with_dollar(d): return True -# TODO: make a LazyDictField that lazily deferences on access class DictField(ComplexBaseField): """A dictionary field that wraps a standard Python dictionary. This is similar to an embedded document, but the structure is not defined. @@ -1092,6 +1091,14 @@ def prepare_query_value(self, op, value): return super().prepare_query_value(op, value) + def to_python(self, value): + to_python = getattr(self.field, "to_python", None) + return ( + {k: to_python(v) for k, v in value.items()} + if to_python and value + else value or None + ) + class LazyDictField(ComplexBaseField): """A lazy dictionary field that wraps a standard Python dictionary. From 40aa9a5d4a6b130932fc9c9df1e8f9da397f7d40 Mon Sep 17 00:00:00 2001 From: Noah Stapp Date: Tue, 24 Jun 2025 10:28:16 -0400 Subject: [PATCH 3/3] Cleanup --- mongoengine/base/datastructures.py | 57 --------------------------- mongoengine/fields.py | 63 ------------------------------ 2 files changed, 120 deletions(-) diff --git a/mongoengine/base/datastructures.py b/mongoengine/base/datastructures.py index e6d9a42c8..dcb8438c7 100644 --- a/mongoengine/base/datastructures.py +++ b/mongoengine/base/datastructures.py @@ -472,60 +472,3 @@ def __getattr__(self, name): def __repr__(self): return f"" - - -class RawDict: - def __init__(self, data, deserialize_method): - self._data = data - self.deserialize_method = deserialize_method - - def deserialize(self): - return self.deserialize_method(self._data) - - def __setitem__(self, key, item): - self._data[key] = item - - def __getitem__(self, key): - return self._data[key] - - def __repr__(self): - return repr(self._data) - - def __len__(self): - return len(self._data) - - def __delitem__(self, key): - del self._data[key] - - def clear(self): - return self._data.clear() - - def copy(self): - return self._data.copy() - - def has_key(self, k): - return k in self._data - - def update(self, *args, **kwargs): - return self._data.update(*args, **kwargs) - - def keys(self): - return self._data.keys() - - def values(self): - return self._data.values() - - def items(self): - return self._data.items() - - def pop(self, *args): - return self._data.pop(*args) - - def __cmp__(self, dict_): - return self.__cmp__(self._data, dict_) - - def __contains__(self, item): - return item in self._data - - def __iter__(self): - return iter(self._data) diff --git a/mongoengine/fields.py b/mongoengine/fields.py index 9c0b13203..e005fe0a5 100644 --- a/mongoengine/fields.py +++ b/mongoengine/fields.py @@ -16,8 +16,6 @@ from bson.decimal128 import Decimal128, create_decimal128_context from pymongo import ReturnDocument -from mongoengine.base.datastructures import RawDict - try: import dateutil except ImportError: @@ -83,7 +81,6 @@ "SortedListField", "EmbeddedDocumentListField", "DictField", - "LazyDictField", "MapField", "ReferenceField", "CachedReferenceField", @@ -1100,66 +1097,6 @@ def to_python(self, value): ) -class LazyDictField(ComplexBaseField): - """A lazy dictionary field that wraps a standard Python dictionary. - Unlike the :class:`~mongoengine.fields.DictField`, it will - **not** be automatically deserialized. Manual deserialization must be triggered - using the ``deserialize()`` method. - - .. note:: - Required means it cannot be empty - as the default for DictFields is {} - """ - - def __init__(self, field=None, *args, **kwargs): - kwargs.setdefault("default", dict) - super().__init__(*args, field=field, **kwargs) - self.set_auto_dereferencing(False) - - def validate(self, value): - """Make sure that a list of valid fields is being used.""" - if not isinstance(value, dict): - self.error("Only dictionaries may be used in a DictField") - - if key_not_string(value): - msg = "Invalid dictionary key - documents must have only string keys" - self.error(msg) - - # Following condition applies to MongoDB >= 3.6 - # older Mongo has stricter constraints but - # it will be rejected upon insertion anyway - # Having a validation that depends on the MongoDB version - # is not straightforward as the field isn't aware of the connected Mongo - if key_starts_with_dollar(value): - self.error( - 'Invalid dictionary key name - keys may not startswith "$" characters' - ) - super().validate(value) - - def lookup_member(self, member_name): - return DictField(db_field=member_name) - - def prepare_query_value(self, op, value): - match_operators = [*STRING_OPERATORS] - - if op in match_operators and isinstance(value, str): - return StringField().prepare_query_value(op, value) - - if hasattr( - self.field, "field" - ): # Used for instance when using DictField(ListField(IntField())) - if op in ("set", "unset") and isinstance(value, dict): - return { - k: self.field.prepare_query_value(op, v) for k, v in value.items() - } - return self.field.prepare_query_value(op, value) - - return super().prepare_query_value(op, value) - - def to_python(self, value): - self._data = RawDict(value, super().to_python) - return self._data - - class MapField(DictField): """A field that maps a name to a specified field type. Similar to a DictField, except the 'value' of each item must match the specified