Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bson/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def _dict_to_bson(
try:
elements.append(_element_to_bson(key, value, check_keys, opts))
except InvalidDocument as err:
raise InvalidDocument(f"Invalid document {doc} | {err}") from err
raise InvalidDocument(f"Invalid document: {err}", doc) from err
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this InvalidDocument except clause include the encoding of _id too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be a pre-existing bug, no? Happy to fix if we identify why the code currently looks like this. Maybe we assume that _id will be valid if it reaches this far?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct it would be a pre-existing bug. Could you open a ticket for it?

>>>> encode({'_id': set()})
Traceback (most recent call last):
  File "<python-input-6>", line 1, in <module>
    encode({'_id': set()})
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1053, in encode
    return _dict_to_bson(document, check_keys, codec_options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1006, in _dict_to_bson
    elements.append(_name_value_to_bson(b"_id\x00", doc["_id"], check_keys, opts))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 980, in _name_value_to_bson
    raise InvalidDocument(f"cannot encode object: {value!r}, of type: {type(value)!r}")
bson.errors.InvalidDocument: cannot encode object: set(), of type: <class 'set'>

vs the expected behavior:

>>>> encode({'_id': 1, 'foo': set()})
Traceback (most recent call last):
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1010, in _dict_to_bson
    elements.append(_element_to_bson(key, value, check_keys, opts))
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 994, in _element_to_bson
    return _name_value_to_bson(name, value, check_keys, opts)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 980, in _name_value_to_bson
    raise InvalidDocument(f"cannot encode object: {value!r}, of type: {type(value)!r}")
bson.errors.InvalidDocument: cannot encode object: set(), of type: <class 'set'>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<python-input-7>", line 1, in <module>
    encode({'_id': 1, 'foo': set()})
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1053, in encode
    return _dict_to_bson(document, check_keys, codec_options)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/shane/git/mongo-python-driver/bson/__init__.py", line 1012, in _dict_to_bson
    raise InvalidDocument(f"Invalid document {doc} | {err}") from err
bson.errors.InvalidDocument: Invalid document {'_id': 1, 'foo': set()} | cannot encode object: set(), of type: <class 'set'>

Copy link
Contributor Author

@NoahStapp NoahStapp Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened PYTHON-5560 to track.

except AttributeError:
raise TypeError(f"encoder expected a mapping type but got: {doc!r}") from None

Expand Down
15 changes: 4 additions & 11 deletions bson/_cbsonmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1645,7 +1645,7 @@ static int write_raw_doc(buffer_t buffer, PyObject* raw, PyObject* _raw_str) {
}


/* Update Invalid Document error message to include doc.
/* Update Invalid Document error to include doc as a property.
*/
void handle_invalid_doc_error(PyObject* dict) {
PyObject *etype = NULL, *evalue = NULL, *etrace = NULL;
Expand All @@ -1659,20 +1659,13 @@ void handle_invalid_doc_error(PyObject* dict) {
if (evalue && PyErr_GivenExceptionMatches(etype, InvalidDocument)) {
PyObject *msg = PyObject_Str(evalue);
if (msg) {
// Prepend doc to the existing message
PyObject *dict_str = PyObject_Str(dict);
if (dict_str == NULL) {
goto cleanup;
}
const char * dict_str_utf8 = PyUnicode_AsUTF8(dict_str);
if (dict_str_utf8 == NULL) {
goto cleanup;
}
// Add doc to the error class as a property.
PyObject_SetAttrString(InvalidDocument, "document", dict);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be "_document" now?

Copy link
Member

@ShaneHarvey ShaneHarvey Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This also highlights that this PR needs to add a test to ensure .document is the expected dict.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interestingly the tests fail if this is _document.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the two modified tests in this PR not sufficient to ensure that .document behaves as expected?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is setting a class attribute on the InvalidDocument class so the logic is incorrect, eg:

InvalidDocument.document = dict

Copy link
Contributor Author

@NoahStapp NoahStapp Sep 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry I misunderstood what you meant. You're right that _error returns a class, not an instance.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed. Had to do some C API delving to figure this out 😅

const char * msg_utf8 = PyUnicode_AsUTF8(msg);
if (msg_utf8 == NULL) {
goto cleanup;
}
PyObject *new_msg = PyUnicode_FromFormat("Invalid document %s | %s", dict_str_utf8, msg_utf8);
PyObject *new_msg = PyUnicode_FromFormat("Invalid document: %s", msg_utf8);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For an invalid field in a nested doc, does this yield errors like "Invalid document: Invalid document: Invalid document: Invalid document: Invalid document: ...."?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it works as expected:

encode({"t": {"f": Wrapper(1)}})

# Produces
Invalid document: cannot encode object: 1, of type: <class 'test.test_bson.TestBSON.test_doc_in_invalid_document_error_as_property.<locals>.Wrapper'>

Py_DECREF(evalue);
Py_DECREF(etype);
etype = InvalidDocument;
Expand Down
16 changes: 16 additions & 0 deletions bson/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"""Exceptions raised by the BSON package."""
from __future__ import annotations

from typing import Any, Optional


class BSONError(Exception):
"""Base class for all BSON exceptions."""
Expand All @@ -31,6 +33,20 @@ class InvalidStringData(BSONError):
class InvalidDocument(BSONError):
"""Raised when trying to create a BSON object from an invalid document."""

def __init__(self, message: str, document: Optional[Any] = None) -> None:
super().__init__(message)
self._document = document

@property
def document(self) -> Any:
"""The invalid document that caused the error.
..versionadded:: 4.16"""
return self._document

def _set_document(self, value: Any) -> None:
self._document = value


class InvalidId(BSONError):
"""Raised when trying to create an ObjectId from invalid data."""
9 changes: 9 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
Changelog
=========

Changes in Version 4.16.0 (XXXX/XX/XX)
--------------------------------------

PyMongo 4.16 brings a number of changes including:

- Removed invalid documents from :class:`bson.errors.InvalidDocument` error messages as
doing so may leak sensitive user data.
Instead, invalid documents are stored in :attr:`bson.errors.InvalidDocument.document`.

Changes in Version 4.15.1 (2025/09/16)
--------------------------------------

Expand Down
15 changes: 11 additions & 4 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,7 +1163,7 @@ def __repr__(self):
):
encode({"t": Wrapper(1)})

def test_doc_in_invalid_document_error_message(self):
def test_doc_in_invalid_document_error_as_property(self):
class Wrapper:
def __init__(self, val):
self.val = val
Expand All @@ -1173,10 +1173,11 @@ def __repr__(self):

self.assertEqual("1", repr(Wrapper(1)))
doc = {"t": Wrapper(1)}
with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"):
with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm:
encode(doc)
self.assertEqual(cm.exception.document, doc)

def test_doc_in_invalid_document_error_message_mapping(self):
def test_doc_in_invalid_document_error_as_property_mapping(self):
class MyMapping(abc.Mapping):
def keys(self):
return ["t"]
Expand All @@ -1192,6 +1193,11 @@ def __len__(self):
def __iter__(self):
return iter(["t"])

def __eq__(self, other):
if isinstance(other, MyMapping):
return True
return False

class Wrapper:
def __init__(self, val):
self.val = val
Expand All @@ -1201,8 +1207,9 @@ def __repr__(self):

self.assertEqual("1", repr(Wrapper(1)))
doc = MyMapping()
with self.assertRaisesRegex(InvalidDocument, f"Invalid document {doc}"):
with self.assertRaisesRegex(InvalidDocument, "Invalid document:") as cm:
encode(doc)
self.assertEqual(cm.exception.document, doc)


class TestCodecOptions(unittest.TestCase):
Expand Down
Loading