Skip to content

fix(instr-undici): fix user agent extraction and handle of multiple values on headers #2875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 4, 2025
Merged
5 changes: 3 additions & 2 deletions plugins/node/instrumentation-undici/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ export interface UndiciRequest {
path: string;
/**
* Serialized string of headers in the form `name: value\r\n` for v5
* Array of strings v6
* Array of strings `[key1, value1, key2, value2]`, where values are
* `string | string[]` for v6
*/
headers: string | string[];
headers: string | (string | string[])[];
/**
* Helper method to add headers (from v6)
*/
Expand Down
90 changes: 55 additions & 35 deletions plugins/node/instrumentation-undici/src/undici.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,50 @@
});
}

private parseRequestHeaders(request: UndiciRequest) {
const result = new Map<string, string | string[]>();

if (Array.isArray(request.headers)) {
// headers are an array [k1, v2, k2, v2] (undici v6+)
// values could be string or a string[] for multiple values
for (let i = 0; i < request.headers.length; i += 2) {
const key = request.headers[i];
const value = request.headers[i + 1];

// Key should always be a string, but the types don't know that, and let's be safe
if (typeof key === 'string') {
result.set(key.toLowerCase(), value);
}
}
} else if (typeof request.headers === 'string') {
// headers are a raw string (undici v5)
// headers could be repeated in several lines for multiple values
const headers = request.headers.split('\r\n');
for (const line of headers) {
if (!line) {
continue;
}
const colonIndex = line.indexOf(':');
if (colonIndex === -1) {
// Invalid header? Probably this can't happen, but again let's be safe.
continue;

Check warning on line 194 in plugins/node/instrumentation-undici/src/undici.ts

View check run for this annotation

Codecov / codecov/patch

plugins/node/instrumentation-undici/src/undici.ts#L194

Added line #L194 was not covered by tests
}
const key = line.substring(0, colonIndex).toLowerCase();
const value = line.substring(colonIndex + 1).trim();
const allValues = result.get(key);

if (allValues && Array.isArray(allValues)) {
allValues.push(value);

Check warning on line 201 in plugins/node/instrumentation-undici/src/undici.ts

View check run for this annotation

Codecov / codecov/patch

plugins/node/instrumentation-undici/src/undici.ts#L201

Added line #L201 was not covered by tests
} else if (allValues) {
result.set(key, [allValues, value]);
} else {
result.set(key, value);
}
}
}
return result;
}

// This is the 1st message we receive for each request (fired after request creation). Here we will
// create the span and populate some atttributes, then link the span to the request for further
// span processing
Expand Down Expand Up @@ -218,24 +262,13 @@
}

// Get user agent from headers
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: Not for this PR, but https://opentelemetry.io/docs/specs/semconv/http/http-spans/ says that user_agent.original is Opt-In. I.e. collecting it should be off by default. We should do that in a separate (breaking) PR change.

let userAgent;
if (Array.isArray(request.headers)) {
const idx = request.headers.findIndex(
h => h.toLowerCase() === 'user-agent'
);
if (idx >= 0) {
userAgent = request.headers[idx + 1];
}
} else if (typeof request.headers === 'string') {
const headers = request.headers.split('\r\n');
const uaHeader = headers.find(h =>
h.toLowerCase().startsWith('user-agent')
);
userAgent =
uaHeader && uaHeader.substring(uaHeader.indexOf(':') + 1).trim();
}
const headersMap = this.parseRequestHeaders(request);
const userAgentValues = headersMap.get('user-agent');

if (userAgent) {
if (userAgentValues) {
const userAgent = Array.isArray(userAgentValues)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Choosing the last user-agent value is an "interesting thing" to have a comment on, I think. Could point to the curl example as justification.

? userAgentValues[userAgentValues.length - 1]
: userAgentValues;
attributes[SemanticAttributes.USER_AGENT_ORIGINAL] = userAgent;
}

Expand Down Expand Up @@ -329,27 +362,14 @@
const headersToAttribs = new Set(
config.headersToSpanAttributes.requestHeaders.map(n => n.toLowerCase())
);
const headersMap = this.parseRequestHeaders(request);

// headers could be in form
// ['name: value', ...] for v5
// ['name', 'value', ...] for v6
const rawHeaders = Array.isArray(request.headers)
? request.headers
: request.headers.split('\r\n');
rawHeaders.forEach((h, idx) => {
const sepIndex = h.indexOf(':');
const hasSeparator = sepIndex !== -1;
const name = (
hasSeparator ? h.substring(0, sepIndex) : h
).toLowerCase();
const value = hasSeparator
? h.substring(sepIndex + 1)
: rawHeaders[idx + 1];

for (const [name, value] of headersMap.entries()) {
if (headersToAttribs.has(name)) {
spanAttributes[`http.request.header.${name}`] = value.trim();
const attrValue = Array.isArray(value) ? value.join(', ') : value;
spanAttributes[`http.request.header.${name}`] = attrValue.trim();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I don't think the .trim() is necessary now because parseRequestHeaders does that.

}
});
}
}

span.setAttributes(spanAttributes);
Expand Down
108 changes: 76 additions & 32 deletions plugins/node/instrumentation-undici/test/undici.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -797,43 +797,87 @@ describe('UndiciInstrumentation `undici` tests', function () {
});
});

it('should not report an user-agent if it was not defined', async function () {
let spans = memoryExporter.getFinishedSpans();
assert.strictEqual(spans.length, 0);
const userAgentRequests: Array<{
name: string;
headers: Record<string, string | string[] | undefined>;
expectedUserAgent: string | undefined;
}> = [
{
name: 'no user-agent',
headers: { 'foo-client': 'bar' },
expectedUserAgent: undefined,
},
{
name: 'a user-agent',
headers: { 'foo-client': 'bar', 'user-agent': 'custom' },
expectedUserAgent: 'custom',
},
{
name: 'explicitly-undefined user-agent',
headers: { 'foo-client': 'bar', 'user-agent': undefined },
expectedUserAgent: undefined,
},
// contra the spec, but we shouldn't crash
{
name: 'multiple user-agents',
headers: {
'foo-client': 'bar',
'user-agent': ['agent', 'other-agent'],
},
expectedUserAgent: 'other-agent',
},
{
name: 'another header with value user-agent',
headers: { 'foo-client': 'user-agent', 'user-agent': 'custom' },
expectedUserAgent: 'custom',
},
{
name: 'another header with multiple values',
headers: { 'foo-client': ['one', 'two'], 'user-agent': 'custom' },
expectedUserAgent: 'custom',
},
{
name: 'another header with explicitly-undefined value',
headers: { 'foo-client': undefined, 'user-agent': 'custom' },
expectedUserAgent: 'custom',
},
];

// Do some requests
const headers = {
'foo-client': 'bar',
};
for (const testCase of userAgentRequests) {
it(`should report the correct user-agent when the request has ${testCase.name}`, async function () {
let spans = memoryExporter.getFinishedSpans();
assert.strictEqual(spans.length, 0);

const queryRequestUrl = `${protocol}://${hostname}:${mockServer.port}/?query=test`;
const queryResponse = await undici.request(queryRequestUrl, { headers });
await consumeResponseBody(queryResponse.body);
const queryRequestUrl = `${protocol}://${hostname}:${mockServer.port}/?query=test`;
const queryResponse = await undici.request(queryRequestUrl, {
headers: testCase.headers,
});
await consumeResponseBody(queryResponse.body);

assert.ok(
queryResponse.headers['propagation-error'] == null,
'propagation is set for instrumented requests'
);
assert.ok(
queryResponse.headers['propagation-error'] == null,
'propagation is set for instrumented requests'
);

spans = memoryExporter.getFinishedSpans();
const span = spans[0];
assert.ok(span, 'a span is present');
assert.strictEqual(spans.length, 1);
assertSpan(span, {
hostname: 'localhost',
httpStatusCode: queryResponse.statusCode,
httpMethod: 'GET',
path: '/',
query: '?query=test',
reqHeaders: headers,
resHeaders: queryResponse.headers,
spans = memoryExporter.getFinishedSpans();
const span = spans[0];
assert.ok(span, 'a span is present');
assert.strictEqual(spans.length, 1);
assertSpan(span, {
hostname: 'localhost',
httpStatusCode: queryResponse.statusCode,
httpMethod: 'GET',
path: '/',
query: '?query=test',
reqHeaders: testCase.headers,
resHeaders: queryResponse.headers,
});
assert.strictEqual(
span.attributes['user_agent.original'],
testCase.expectedUserAgent
);
});
assert.strictEqual(
span.attributes['user_agent.original'],
undefined,
'user-agent is undefined'
);
});
}

it('should create valid span if request.path is a full URL', async function () {
let spans = memoryExporter.getFinishedSpans();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ export const assertSpan = (
if (userAgent) {
assert.strictEqual(
span.attributes[SemanticAttributes.USER_AGENT_ORIGINAL],
userAgent
Array.isArray(userAgent) ? userAgent[userAgent.length - 1] : userAgent
);
}
}
Expand Down