From f581f220cda8b270644a413568cdc79e553a748c Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Fri, 1 Aug 2025 15:22:15 +0200 Subject: [PATCH 1/2] feat(http): add error handling for exporting --- .../exporter/otlp/proto/http/_log_exporter/__init__.py | 6 +++++- .../exporter/otlp/proto/http/metric_exporter/__init__.py | 8 +++++++- .../exporter/otlp/proto/http/trace_exporter/__init__.py | 6 +++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 2afdf66002..7122775762 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -184,7 +184,11 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: serialized_data = encode_logs(batch).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error("Failed to export logs batch reason: %s", error) + return LogExportResult.FAILURE if resp.ok: return LogExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index c6d657e7ae..81801d1a51 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -231,7 +231,13 @@ def export( serialized_data = encode_metrics(metrics_data).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error( + "Failed to export metrics batch reason: %s", error + ) + return MetricExportResult.FAILURE if resp.ok: return MetricExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index 055e829dab..f47eccaa8e 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -179,7 +179,11 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: serialized_data = encode_spans(spans).SerializePartialToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): - resp = self._export(serialized_data, deadline_sec - time()) + try: + resp = self._export(serialized_data, deadline_sec - time()) + except Exception as error: + _logger.error("Failed to export span batch reason: %s", error) + return SpanExportResult.FAILURE if resp.ok: return SpanExportResult.SUCCESS # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. From 2b50d49d7dc232343166d2ae24fffb046ff67c6e Mon Sep 17 00:00:00 2001 From: Paul Fischer Date: Mon, 27 Oct 2025 13:39:31 +0100 Subject: [PATCH 2/2] feat(http_exporter): allow to run retry loop on connection errors --- .../otlp/proto/http/_log_exporter/__init__.py | 28 +++++++++++-------- .../proto/http/metric_exporter/__init__.py | 28 +++++++++++-------- .../proto/http/trace_exporter/__init__.py | 28 +++++++++++-------- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py index 7122775762..23fe1020d6 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/_log_exporter/__init__.py @@ -184,30 +184,34 @@ def export(self, batch: Sequence[LogData]) -> LogExportResult: serialized_data = encode_logs(batch).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return LogExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export logs batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return LogExportResult.FAILURE except Exception as error: _logger.error("Failed to export logs batch reason: %s", error) - return LogExportResult.FAILURE - if resp.ok: - return LogExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export logs batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export logs batch due to timeout," + "max retries or shutdown." ) return LogExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting logs batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting logs batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_is_occuring.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py index 81801d1a51..0d71a6ed16 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/metric_exporter/__init__.py @@ -231,32 +231,36 @@ def export( serialized_data = encode_metrics(metrics_data).SerializeToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return MetricExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export metrics batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return MetricExportResult.FAILURE except Exception as error: _logger.error( "Failed to export metrics batch reason: %s", error ) - return MetricExportResult.FAILURE - if resp.ok: - return MetricExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export metrics batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export metrics batch due to timeout," + "max retries or shutdown." ) return MetricExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting metrics batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting metrics batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds) diff --git a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py index f47eccaa8e..91583da5a6 100644 --- a/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py +++ b/exporter/opentelemetry-exporter-otlp-proto-http/src/opentelemetry/exporter/otlp/proto/http/trace_exporter/__init__.py @@ -179,30 +179,34 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult: serialized_data = encode_spans(spans).SerializePartialToString() deadline_sec = time() + self._timeout for retry_num in range(_MAX_RETRYS): + # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. + backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) try: resp = self._export(serialized_data, deadline_sec - time()) + if resp.ok: + return SpanExportResult.SUCCESS + if not _is_retryable(resp): + _logger.error( + "Failed to export span batch code: %s, reason: %s", + resp.status_code, + resp.text, + ) + return SpanExportResult.FAILURE except Exception as error: _logger.error("Failed to export span batch reason: %s", error) - return SpanExportResult.FAILURE - if resp.ok: - return SpanExportResult.SUCCESS - # multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff. - backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2) + if ( - not _is_retryable(resp) - or retry_num + 1 == _MAX_RETRYS + retry_num + 1 == _MAX_RETRYS or backoff_seconds > (deadline_sec - time()) or self._shutdown ): _logger.error( - "Failed to export span batch code: %s, reason: %s", - resp.status_code, - resp.text, + "Failed to export span batch due to timeout," + "max retries or shutdown." ) return SpanExportResult.FAILURE _logger.warning( - "Transient error %s encountered while exporting span batch, retrying in %.2fs.", - resp.reason, + "Transient error encountered while exporting span batch, retrying in %.2fs.", backoff_seconds, ) shutdown = self._shutdown_in_progress.wait(backoff_seconds)