Skip to content

Commit da5cdba

Browse files
committed
Align metrics with workflow semconv: use duration{state=pending}, move trigger.type to workflow-level metrics, add workflow.count
1 parent 5655113 commit da5cdba

File tree

10 files changed

+369
-175
lines changed

10 files changed

+369
-175
lines changed

src/OpenTelemetry.Instrumentation.Hangfire/HangfireInstrumentationOptions.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@ public class HangfireInstrumentationOptions
4747
public Func<BackgroundJob, bool>? Filter { get; set; }
4848

4949
/// <summary>
50-
/// Gets or sets a value indicating whether to record queue latency metrics.
50+
/// Gets or sets a value indicating whether to record the pending state duration in metrics.
5151
/// </summary>
5252
/// <remarks>
53-
/// When enabled, records the time jobs spend waiting in the queue before execution.
53+
/// When enabled, records workflow.execution.duration with state="pending", representing
54+
/// the time jobs spend waiting in the queue before execution starts.
5455
/// This requires an additional database call per job execution to retrieve the enqueue timestamp.
5556
/// Default is <see langword="false"/> to avoid performance impact in high-throughput scenarios.
57+
/// When disabled, only execution duration (state="executing") is recorded.
5658
/// </remarks>
5759
public bool RecordQueueLatency { get; set; }
5860
}

src/OpenTelemetry.Instrumentation.Hangfire/Implementation/HangfireMetrics.cs

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,18 @@ namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
1212
/// </summary>
1313
internal static class HangfireMetrics
1414
{
15-
private static readonly Assembly Assembly = typeof(HangfireMetrics).Assembly;
16-
private static readonly AssemblyName AssemblyName = Assembly.GetName();
17-
internal static readonly string MeterName = AssemblyName.Name!;
18-
private static readonly string InstrumentationVersion = Assembly.GetPackageVersion();
19-
2015
// Metric name constants
2116
internal const string ExecutionCountMetricName = "workflow.execution.count";
2217
internal const string ExecutionDurationMetricName = "workflow.execution.duration";
2318
internal const string ExecutionStatusMetricName = "workflow.execution.status";
2419
internal const string ExecutionErrorsMetricName = "workflow.execution.errors";
20+
internal const string WorkflowCountMetricName = "workflow.count";
2521

26-
internal const string QueueLatencyMetricName = "hangfire.queue.latency";
22+
internal static readonly Assembly Assembly = typeof(HangfireMetrics).Assembly;
23+
internal static readonly AssemblyName AssemblyName = Assembly.GetName();
24+
internal static readonly string MeterName = AssemblyName.Name!;
25+
26+
private static readonly string InstrumentationVersion = Assembly.GetPackageVersion();
2727

2828
/// <summary>
2929
/// The meter instance for all Hangfire metrics.
@@ -35,38 +35,52 @@ internal static class HangfireMetrics
3535
/// Follows OpenTelemetry workflow semantic conventions.
3636
/// </summary>
3737
public static readonly Counter<long> ExecutionCount =
38-
Meter.CreateCounter<long>(ExecutionCountMetricName, unit: "{executions}",
38+
Meter.CreateCounter<long>(
39+
ExecutionCountMetricName,
40+
unit: "{executions}",
3941
description: "The number of task executions which have been initiated.");
4042

4143
/// <summary>
4244
/// Histogram for duration of an execution grouped by task, type and result.
4345
/// Follows OpenTelemetry workflow semantic conventions.
46+
/// Records duration for different execution phases using workflow.execution.state attribute:
47+
/// - state=pending: Time spent waiting in queue before execution.
48+
/// - state=executing: Time spent in actual execution.
4449
/// </summary>
4550
public static readonly Histogram<double> ExecutionDuration =
46-
Meter.CreateHistogram<double>(ExecutionDurationMetricName, unit: "s",
51+
Meter.CreateHistogram<double>(
52+
ExecutionDurationMetricName,
53+
unit: "s",
4754
description: "Duration of an execution grouped by task, type and result.");
4855

4956
/// <summary>
5057
/// UpDownCounter for the number of actively running tasks grouped by task and state.
5158
/// Follows OpenTelemetry workflow semantic conventions.
5259
/// </summary>
5360
public static readonly UpDownCounter<long> ExecutionStatus =
54-
Meter.CreateUpDownCounter<long>(ExecutionStatusMetricName, unit: "{executions}",
61+
Meter.CreateUpDownCounter<long>(
62+
ExecutionStatusMetricName,
63+
unit: "{executions}",
5564
description: "The number of actively running tasks grouped by task, type and the current state.");
5665

5766
/// <summary>
5867
/// Counter for the number of errors encountered in task runs (eg. compile, test failures).
5968
/// Follows OpenTelemetry workflow semantic conventions.
6069
/// </summary>
6170
public static readonly Counter<long> ExecutionErrors =
62-
Meter.CreateCounter<long>(ExecutionErrorsMetricName, unit: "{error}",
71+
Meter.CreateCounter<long>(
72+
ExecutionErrorsMetricName,
73+
unit: "{error}",
6374
description: "The number of errors encountered in task runs (eg. compile, test failures).");
6475

6576
/// <summary>
66-
/// Histogram for time tasks spend waiting in queue before execution.
67-
/// Hangfire-specific metric (not part of standard workflow conventions).
77+
/// Counter for the number of workflow instances which have been initiated.
78+
/// Follows OpenTelemetry workflow semantic conventions.
79+
/// In Hangfire, this tracks individual job completions. For batch workflows, this would track batch completion.
6880
/// </summary>
69-
public static readonly Histogram<double> QueueLatency =
70-
Meter.CreateHistogram<double>(QueueLatencyMetricName, unit: "s",
71-
description: "Time tasks spend waiting in queue before execution starts.");
81+
public static readonly Counter<long> WorkflowCount =
82+
Meter.CreateCounter<long>(
83+
WorkflowCountMetricName,
84+
unit: "{workflows}",
85+
description: "The number of workflow instances which have been initiated.");
7286
}

src/OpenTelemetry.Instrumentation.Hangfire/Implementation/HangfireMetricsInstrumentation.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ public HangfireMetricsInstrumentation(HangfireInstrumentationOptions options)
1919
this.AddFilter(new HangfireMetricsStateFilter());
2020
this.AddFilter(new HangfireMetricsErrorFilterAttribute());
2121

22-
// Only register queue latency filter if enabled (requires DB call per job)
22+
// Only register pending duration filter if enabled (requires DB call per job)
2323
if (options.RecordQueueLatency)
2424
{
25-
this.AddFilter(new HangfireQueueLatencyFilterAttribute());
25+
this.AddFilter(new HangfirePendingDurationFilterAttribute());
2626
}
2727
}
2828

src/OpenTelemetry.Instrumentation.Hangfire/Implementation/HangfireMetricsJobFilterAttribute.cs

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,46 @@ public void OnPerforming(PerformingContext performingContext)
2121

2222
public void OnPerformed(PerformedContext performedContext)
2323
{
24-
var executionTags = HangfireTagBuilder.BuildExecutionTags(performedContext.BackgroundJob.Job, performedContext.Exception);
24+
// Get recurring job ID if this job was triggered by a recurring job
25+
string? recurringJobId = null;
26+
try
27+
{
28+
recurringJobId = performedContext.Connection.GetJobParameter(
29+
performedContext.BackgroundJob.Id,
30+
"RecurringJobId");
31+
}
32+
catch
33+
{
34+
// If we can't get the recurring job ID, treat it as a non-recurring job
35+
}
2536

26-
HangfireMetrics.ExecutionCount.Add(1, executionTags);
37+
// Record execution count (without state attribute per semantic conventions)
38+
var countTags = HangfireTagBuilder.BuildExecutionCountTags(
39+
performedContext.BackgroundJob.Job,
40+
performedContext.Exception);
2741

42+
HangfireMetrics.ExecutionCount.Add(1, countTags);
43+
44+
// Record execution duration (with state="executing" to differentiate from pending phase)
2845
if (performedContext.Items.TryGetValue(StopwatchKey, out var stopwatchObj) && stopwatchObj is Stopwatch stopwatch)
2946
{
3047
stopwatch.Stop();
3148
var duration = stopwatch.Elapsed.TotalSeconds;
3249

33-
HangfireMetrics.ExecutionDuration.Record(duration, executionTags);
50+
var durationTags = HangfireTagBuilder.BuildExecutionTags(
51+
performedContext.BackgroundJob.Job,
52+
performedContext.Exception,
53+
workflowState: HangfireTagBuilder.StateExecuting);
54+
55+
HangfireMetrics.ExecutionDuration.Record(duration, durationTags);
3456
}
57+
58+
// Record workflow-level metrics (includes trigger type)
59+
var workflowTags = HangfireTagBuilder.BuildWorkflowTags(
60+
performedContext.BackgroundJob.Job,
61+
performedContext.Exception,
62+
recurringJobId);
63+
64+
HangfireMetrics.WorkflowCount.Add(1, workflowTags);
3565
}
3666
}

src/OpenTelemetry.Instrumentation.Hangfire/Implementation/HangfireMetricsStateFilter.cs

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,10 @@ public void OnStateApplied(ApplyStateContext context, IWriteOnlyTransaction tran
2121
}
2222

2323
var errorType = GetErrorTypeFromNewState(context.NewState);
24-
var recurringJobId = GetRecurringJobId(context);
2524
var tags = HangfireTagBuilder.BuildStateTags(
2625
context.BackgroundJob.Job,
2726
workflowState,
28-
errorType,
29-
recurringJobId);
27+
errorType);
3028

3129
HangfireMetrics.ExecutionStatus.Add(1, tags);
3230
}
@@ -40,12 +38,10 @@ public void OnStateUnapplied(ApplyStateContext context, IWriteOnlyTransaction tr
4038
}
4139

4240
var errorType = GetErrorTypeFromOldState(context);
43-
var recurringJobId = GetRecurringJobId(context);
4441
var tags = HangfireTagBuilder.BuildStateTags(
4542
context.BackgroundJob.Job,
4643
workflowState,
47-
errorType,
48-
recurringJobId);
44+
errorType);
4945

5046
HangfireMetrics.ExecutionStatus.Add(-1, tags);
5147
}
@@ -91,17 +87,4 @@ public void OnStateUnapplied(ApplyStateContext context, IWriteOnlyTransaction tr
9187

9288
return null;
9389
}
94-
95-
private static string? GetRecurringJobId(ApplyStateContext context)
96-
{
97-
try
98-
{
99-
return context.Connection.GetJobParameter(context.BackgroundJob.Id, "RecurringJobId");
100-
}
101-
catch
102-
{
103-
// Parameter doesn't exist or couldn't be retrieved
104-
return null;
105-
}
106-
}
10790
}
Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,14 @@
99
namespace OpenTelemetry.Instrumentation.Hangfire.Implementation;
1010

1111
/// <summary>
12-
/// Hangfire filter that records queue latency metrics.
12+
/// Hangfire filter that records workflow.execution.duration metric for the pending state.
1313
/// </summary>
1414
/// <remarks>
1515
/// This filter captures the EnqueuedAt timestamp when a job enters the Enqueued state
16-
/// and calculates queue latency when the job starts executing.
16+
/// and records workflow.execution.duration{state="pending"} when the job starts executing.
17+
/// This represents the time a job spent waiting in the queue before execution started.
1718
/// </remarks>
18-
internal sealed class HangfireQueueLatencyFilterAttribute : JobFilterAttribute, IServerFilter, IElectStateFilter
19+
internal sealed class HangfirePendingDurationFilterAttribute : JobFilterAttribute, IServerFilter, IElectStateFilter
1920
{
2021
private const string EnqueuedAtParameter = "OpenTelemetry.EnqueuedAt";
2122

@@ -52,10 +53,14 @@ public void OnPerforming(PerformingContext performingContext)
5253
if (!string.IsNullOrEmpty(enqueuedAtStr))
5354
{
5455
var enqueuedAt = JobHelper.DeserializeDateTime(enqueuedAtStr);
55-
var queueLatency = (DateTime.UtcNow - enqueuedAt).TotalSeconds;
56+
var pendingDuration = (DateTime.UtcNow - enqueuedAt).TotalSeconds;
5657

57-
var tags = HangfireTagBuilder.BuildCommonTags(performingContext.BackgroundJob.Job);
58-
HangfireMetrics.QueueLatency.Record(queueLatency, tags);
58+
// Record workflow.execution.duration with state="pending"
59+
var tags = HangfireTagBuilder.BuildExecutionTags(
60+
performingContext.BackgroundJob.Job,
61+
exception: null,
62+
workflowState: HangfireTagBuilder.StatePending);
63+
HangfireMetrics.ExecutionDuration.Record(pendingDuration, tags);
5964
}
6065
}
6166
catch
@@ -66,6 +71,6 @@ public void OnPerforming(PerformingContext performingContext)
6671

6772
public void OnPerformed(PerformedContext performedContext)
6873
{
69-
// No-op: This filter only handles queue latency in OnPerforming
74+
// No-op: This filter only handles pending state duration in OnPerforming
7075
}
7176
}

0 commit comments

Comments
 (0)