-
Notifications
You must be signed in to change notification settings - Fork 581
fix(instr-undici): fix user agent extraction and handle of multiple values on headers #2875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
8534336
f41050d
c5c1a77
3b18bf8
9a7f8d7
a263b4d
72fee91
96880ea
01c1c86
48ed223
64eaaba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,6 +165,42 @@ | |
}); | ||
} | ||
|
||
/** | ||
* Yield an object { key, value } for each header in the request. Skips | ||
* likely-invalid headers. Multi-valued headers are passed through. | ||
*/ | ||
private *parseRequestHeaders( | ||
request: UndiciRequest | ||
): Generator<{ key: string; value: string | string[] }> { | ||
if (Array.isArray(request.headers)) { | ||
// headers are an array [k1, v2, k2, v2] (undici v6+) | ||
for (let i = 0; i < request.headers.length; i += 2) { | ||
const key = request.headers[i]; | ||
if (typeof key !== 'string') { | ||
// Shouldn't happen, but the types don't know that, and let's be safe | ||
continue; | ||
} | ||
yield { key, value: request.headers[i + 1] }; | ||
} | ||
} else if (typeof request.headers === 'string') { | ||
// headers are a raw string (undici v5) | ||
const headers = request.headers.split('\r\n'); | ||
for (const line of headers) { | ||
if (!line) { | ||
continue; | ||
} | ||
const colonIndex = line.indexOf(':'); | ||
if (colonIndex === -1) { | ||
// Invalid header? Probably this can't happen, but again let's be safe. | ||
continue; | ||
} | ||
const key = line.substring(0, colonIndex); | ||
const value = line.substring(colonIndex + 1).trim(); | ||
yield { key, value }; | ||
} | ||
} | ||
} | ||
|
||
// This is the 1st message we receive for each request (fired after request creation). Here we will | ||
// create the span and populate some atttributes, then link the span to the request for further | ||
// span processing | ||
|
@@ -218,25 +254,14 @@ | |
} | ||
|
||
// Get user agent from headers | ||
let userAgent; | ||
if (Array.isArray(request.headers)) { | ||
const idx = request.headers.findIndex( | ||
h => h.toLowerCase() === 'user-agent' | ||
); | ||
if (idx >= 0) { | ||
userAgent = request.headers[idx + 1]; | ||
for (const { key, value } of this.parseRequestHeaders(request)) { | ||
if (key.toLowerCase() === 'user-agent') { | ||
// user-agent should only appear once per the spec, but the library doesn't | ||
// prevent passing it multiple times, so we handle that to be safe. | ||
const userAgent = Array.isArray(value) ? value[0] : value; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (non-blocking) FWIW, from the
So we could consider taking the last value if it is an array. I am fine with the current code, though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a similar conversation in #2781 (comment) but we did not get into a conclusion. IMO is not a bad idea to follow what other well known tools like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in 9a7f8d7 |
||
attributes[SemanticAttributes.USER_AGENT_ORIGINAL] = userAgent; | ||
break; | ||
} | ||
} else if (typeof request.headers === 'string') { | ||
const headers = request.headers.split('\r\n'); | ||
const uaHeader = headers.find(h => | ||
h.toLowerCase().startsWith('user-agent') | ||
); | ||
userAgent = | ||
uaHeader && uaHeader.substring(uaHeader.indexOf(':') + 1).trim(); | ||
} | ||
|
||
if (userAgent) { | ||
attributes[SemanticAttributes.USER_AGENT_ORIGINAL] = userAgent; | ||
} | ||
|
||
// Get attributes from the hook if present | ||
|
@@ -330,26 +355,14 @@ | |
config.headersToSpanAttributes.requestHeaders.map(n => n.toLowerCase()) | ||
); | ||
|
||
// headers could be in form | ||
// ['name: value', ...] for v5 | ||
// ['name', 'value', ...] for v6 | ||
const rawHeaders = Array.isArray(request.headers) | ||
? request.headers | ||
: request.headers.split('\r\n'); | ||
rawHeaders.forEach((h, idx) => { | ||
const sepIndex = h.indexOf(':'); | ||
const hasSeparator = sepIndex !== -1; | ||
const name = ( | ||
hasSeparator ? h.substring(0, sepIndex) : h | ||
).toLowerCase(); | ||
const value = hasSeparator | ||
? h.substring(sepIndex + 1) | ||
: rawHeaders[idx + 1]; | ||
|
||
for (const { key, value } of this.parseRequestHeaders(request)) { | ||
const name = key.toLowerCase(); | ||
if (headersToAttribs.has(name)) { | ||
spanAttributes[`http.request.header.${name}`] = value.trim(); | ||
spanAttributes[`http.request.header.${name}`] = value | ||
.toString() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: To be extremely spec picky, https://www.rfc-editor.org/rfc/rfc9110.html#section-5.3 says (emphasis mine):
So I would say it is slightly preferred to have the joining of an array of values use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go with the preferred separator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
.trim(); | ||
} | ||
}); | ||
} | ||
} | ||
|
||
span.setAttributes(spanAttributes); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note: Not for this PR, but https://opentelemetry.io/docs/specs/semconv/http/http-spans/ says that
user_agent.original
is Opt-In. I.e. collecting it should be off by default. We should do that in a separate (breaking) PR change.