Skip to content

Commit 1a2d984

Browse files
committed
Tighten up the logic that bypasses normal log formatting in order to improve compatibility
1 parent 7e68bc9 commit 1a2d984

File tree

2 files changed

+61
-40
lines changed

2 files changed

+61
-40
lines changed

opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py

Lines changed: 15 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -504,51 +504,26 @@ def _translate(self, record: logging.LogRecord) -> LogRecord:
504504
observered_timestamp = time_ns()
505505
span_context = get_current_span().get_span_context()
506506
attributes = self._get_attributes(record)
507-
# This comment is taken from GanyedeNil's PR #3343, I have redacted it
508-
# slightly for clarity:
509-
# According to the definition of the Body field type in the
510-
# OTel 1.22.0 Logs Data Model article, the Body field should be of
511-
# type 'any' and should not use the str method to directly translate
512-
# the msg. This is because str only converts non-text types into a
513-
# human-readable form, rather than a standard format, which leads to
514-
# the need for additional operations when collected through a log
515-
# collector.
516-
# Considering that he Body field should be of type 'any' and should not
517-
# use the str method but record.msg is also a string type, then the
518-
# difference is just the self.args formatting?
519-
# The primary consideration depends on the ultimate purpose of the log.
520-
# Converting the default log directly into a string is acceptable as it
521-
# will be required to be presented in a more readable format. However,
522-
# this approach might not be as "standard" when hoping to aggregate
523-
# logs and perform subsequent data analysis. In the context of log
524-
# extraction, it would be more appropriate for the msg to be
525-
# converted into JSON format or remain unchanged, as it will eventually
526-
# be transformed into JSON. If the final output JSON data contains a
527-
# structure that appears similar to JSON but is not, it may confuse
528-
# users. This is particularly true for operation and maintenance
529-
# personnel who need to deal with log data in various languages.
530-
# Where is the JSON converting occur? and what about when the msg
531-
# represents something else but JSON, the expected behavior change?
532-
# For the ConsoleLogExporter, it performs the to_json operation in
533-
# opentelemetry.sdk._logs._internal.export.ConsoleLogExporter.__init__,
534-
# so it can handle any type of input without problems. As for the
535-
# OTLPLogExporter, it also handles any type of input encoding in
536-
# _encode_log located in
537-
# opentelemetry.exporter.otlp.proto.common._internal._log_encoder.
538-
# Therefore, no extra operation is needed to support this change.
539-
# The only thing to consider is the users who have already been using
540-
# this SDK. If they upgrade the SDK after this change, they will need
541-
# to readjust their logging collection rules to adapt to the latest
542-
# output format. Therefore, this change is considered a breaking
543-
# change and needs to be upgraded at an appropriate time.
544507
severity_number = std_to_otel(record.levelno)
545508
if self.formatter:
546509
body = self.format(record)
547510
else:
548-
if isinstance(record.msg, str) and record.args:
549-
body = record.getMessage()
550-
else:
511+
# `record.getMessage()` uses `record.msg` as a template to format
512+
# `record.args` into. There is a special case in `record.getMessage()`
513+
# where it will only attempt formatting if args are provided,
514+
# otherwise, it just stringifies `record.msg`.
515+
#
516+
# Since the OTLP body field has a type of 'any' and the logging module
517+
# is sometimes used in such a way that objects incorrectly end up
518+
# set as record.msg, in those cases we would like to bypass
519+
# `record.getMessage()` completely and set the body to the object
520+
# itself instead of its string representation.
521+
# For more background, see: https://github.com/open-telemetry/opentelemetry-python/pull/4216
522+
if not record.args and not isinstance(record.msg, str):
523+
# no args are provided so it's *mostly* safe to use the message template as the body
551524
body = record.msg
525+
else:
526+
body = record.getMessage()
552527

553528
# related to https://github.com/open-telemetry/opentelemetry-python/issues/3548
554529
# Severity Text = WARN as defined in https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity.

opentelemetry-sdk/tests/logs/test_export.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,52 @@ def test_simple_log_record_processor_different_msg_types(self):
230230
item.instrumentation_scope.name, "different_msg_types"
231231
)
232232

233+
def test_simple_log_record_processor_custom_single_obj(self):
234+
"""
235+
Tests that special-case handling for logging a single non-string object
236+
is correctly applied.
237+
"""
238+
exporter = InMemoryLogExporter()
239+
log_record_processor = BatchLogRecordProcessor(exporter)
240+
241+
provider = LoggerProvider()
242+
provider.add_log_record_processor(log_record_processor)
243+
244+
logger = logging.getLogger("single_obj")
245+
logger.addHandler(LoggingHandler(logger_provider=provider))
246+
247+
# NOTE: the behaviour of `record.getMessage` is detailed in the
248+
# `logging.Logger.debug` documentation:
249+
# > The msg is the message format string, and the args are the arguments
250+
# > which are merged into msg using the string formatting operator. [...]
251+
# > No % formatting operation is performed on msg when no args are supplied.
252+
253+
# This test uses the presence of '%s' in the first arg to determine if
254+
# formatting was applied
255+
256+
# string msg with no args - getMessage bypasses formatting and sets the string directly
257+
logger.warning("a string with a percent-s: %s")
258+
# string msg with args - getMessage formats args into the msg
259+
logger.warning("a string with a percent-s: %s", "and arg")
260+
# non-string msg with args - getMessage stringifies msg and formats args into it
261+
logger.warning(["a non-string with a percent-s", "%s"], "and arg")
262+
# non-string msg with no args:
263+
# - normally getMessage would stringify the object and bypass formatting
264+
# - SPECIAL CASE: bypass stringification as well to keep the raw object
265+
logger.warning(["a non-string with a percent-s", "%s"])
266+
log_record_processor.shutdown()
267+
268+
finished_logs = exporter.get_finished_logs()
269+
expected = [
270+
("a string with a percent-s: %s"),
271+
("a string with a percent-s: and arg"),
272+
("['a non-string with a percent-s', 'and arg']"),
273+
(["a non-string with a percent-s", "%s"]),
274+
]
275+
for emitted, expected in zip(finished_logs, expected):
276+
self.assertEqual(emitted.log_record.body, expected)
277+
self.assertEqual(emitted.instrumentation_scope.name, "single_obj")
278+
233279
def test_simple_log_record_processor_different_msg_types_with_formatter(
234280
self,
235281
):

0 commit comments

Comments
 (0)