diff --git a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts index d2bead618..20df66228 100644 --- a/js/packages/openinference-instrumentation-openai/src/instrumentation.ts +++ b/js/packages/openinference-instrumentation-openai/src/instrumentation.ts @@ -70,6 +70,12 @@ const INSTRUMENTATION_NAME = "@arizeai/openinference-instrumentation-openai"; */ let _isOpenInferencePatched = false; +/** + * Map to store URL information for each request using trace context + * Uses trace ID + span ID as the key to avoid concurrent request overwrites + */ +const requestUrlMap = new Map(); + /** * function to check if instrumentation is enabled / disabled */ @@ -95,6 +101,128 @@ function getExecContext(span: Span) { return execContext; } +/** + * Extracts URL attributes for debugging purposes (especially useful for Azure) + * @param fullUrl The complete URL of the request + * @param baseUrl The base URL of the client + * @returns Object containing URL attributes for debugging + */ +function getUrlAttributes( + fullUrl: string, + baseUrl?: string, +): Record { + const attributes: Record = {}; + + try { + const url = new URL(fullUrl); + + // Always include the full URL for complete debugging context + attributes["url.full"] = fullUrl; + + // Extract the path component + if (baseUrl) { + try { + const baseUrlObj = new URL(baseUrl); + const fullUrlObj = new URL(fullUrl); + + // If the hosts match, calculate the path difference + if (baseUrlObj.hostname === fullUrlObj.hostname) { + // For Azure OpenAI, we want to reconstruct the deployment path + // baseUrl example: "https://example.openai.azure.com/openai/deployments/gpt-4" + // fullUrl example: "https://example.openai.azure.com/chat/completions" + // We want to extract the deployment info from baseUrl and combine with the endpoint + + const basePath = baseUrlObj.pathname; + const fullPath = fullUrlObj.pathname; + + // Extract deployment information from the base URL + if (basePath.includes("/deployments/")) { + // Extract the deployment part: "deployments/model-name" + const deploymentMatch = basePath.match(/\/deployments\/([^/]+)/); + if (deploymentMatch) { + const deploymentName = deploymentMatch[1]; + const endpoint = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; + attributes["url.path"] = + `deployments/${deploymentName}/${endpoint}`; + } else { + // Fallback to just the endpoint + attributes["url.path"] = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; + } + } else { + // Not a deployment URL, use the full path + attributes["url.path"] = fullPath.startsWith("/") + ? fullPath.substring(1) + : fullPath; + } + } else { + // Different hosts, use pathname without leading slash + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; + } + } catch { + // If URL parsing fails, use the pathname + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; + } + } else { + const pathname = url.pathname.startsWith("/") + ? url.pathname.substring(1) + : url.pathname; + attributes["url.path"] = pathname || "/"; + } + + // Safely extract api_version query parameter for Azure + if (url.search) { + const queryParams = new URLSearchParams(url.search); + const apiVersion = queryParams.get("api-version"); + if (apiVersion) { + attributes["url.query.api_version"] = apiVersion; + } + } + } catch (error) { + diag.debug("Failed to extract URL attributes", error); + } + + return attributes; +} + +/** + * Gets URL attributes for a request from stored request information + * @param span The span to get URL attributes for + * @returns URL attributes object + */ +function getStoredUrlAttributes(span: Span): Record { + try { + const spanContext = span.spanContext(); + const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; + const urlInfo = requestUrlMap.get(contextKey); + if (urlInfo) { + diag.debug("Retrieved URL info from requestUrlMap", { + urlInfo, + contextKey, + }); + // Clean up after use to prevent memory leaks + requestUrlMap.delete(contextKey); + return getUrlAttributes(urlInfo.url, urlInfo.baseUrl); + } else { + diag.debug("No URL info found in requestUrlMap for this span", { + contextKey, + }); + } + } catch (error) { + diag.debug("Failed to get stored URL attributes", error); + } + return {}; +} + /** * Gets the appropriate LLM provider based on the OpenAI client instance * Follows the same logic as the Python implementation by checking the baseURL host @@ -256,6 +384,87 @@ export class OpenAIInstrumentation extends InstrumentationBase { // eslint-disable-next-line @typescript-eslint/no-this-alias const instrumentation: OpenAIInstrumentation = this; + // Patch the post method to capture URL information + this._wrap( + module.OpenAI.prototype, + "post", + // eslint-disable-next-line @typescript-eslint/no-explicit-any + (original: any): any => { + return function patchedPost( + this: unknown, + path: string, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + body?: any, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + options?: any, + ) { + // Store URL information for this specific request + try { + const clientInstance = this as { + baseURL?: string; + _client?: { baseURL?: string }; + }; + + let baseUrl: string | undefined; + if ( + clientInstance.baseURL && + typeof clientInstance.baseURL === "string" + ) { + baseUrl = clientInstance.baseURL; + } else if ( + clientInstance._client?.baseURL && + typeof clientInstance._client.baseURL === "string" + ) { + baseUrl = clientInstance._client.baseURL; + } + + if (baseUrl) { + // Construct the full URL with query parameters if available + let fullUrl = new URL(path, baseUrl).toString(); + + // Add query parameters if they exist in options + if (options?.query && typeof options.query === "object") { + const url = new URL(fullUrl); + Object.entries(options.query).forEach(([key, value]) => { + if (value !== undefined && value !== null) { + url.searchParams.set(key, String(value)); + } + }); + fullUrl = url.toString(); + } + + // Store URL info using the current active span context + const activeSpan = trace.getActiveSpan(); + if (activeSpan) { + const spanContext = activeSpan.spanContext(); + const contextKey = `${spanContext.traceId}-${spanContext.spanId}`; + requestUrlMap.set(contextKey, { url: fullUrl, baseUrl }); + diag.debug("Stored URL info for request", { + fullUrl, + baseUrl, + contextKey, + }); + // Clean up old entries to prevent memory leaks + if (requestUrlMap.size > 1000) { + const oldestKey = requestUrlMap.keys().next().value; + if (oldestKey) { + requestUrlMap.delete(oldestKey); + } + } + } + } + } catch (error) { + diag.debug( + "Failed to capture URL information in post method", + error, + ); + } + + return original.apply(this, [path, body, options]); + }; + }, + ); + // Patch create chat completions type ChatCompletionCreateType = typeof module.OpenAI.Chat.Completions.prototype.create; @@ -324,6 +533,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getChatCompletionLLMOutputMessagesAttributes(result), ...getUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -410,6 +621,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getCompletionOutputValueAndMimeType(result), ...getUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -481,6 +694,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { span.setAttributes({ // Do not record the output data as it can be large ...getEmbeddingEmbeddingsAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(span), }); } span.setStatus({ code: SpanStatusCode.OK }); @@ -566,6 +781,8 @@ export class OpenAIInstrumentation extends InstrumentationBase { [SemanticConventions.LLM_MODEL_NAME]: result.model, ...getResponsesOutputMessagesAttributes(result), ...getResponsesUsageAttributes(result), + // Add URL attributes now that the request has completed + ...getStoredUrlAttributes(span), }); span.setStatus({ code: SpanStatusCode.OK }); span.end(); @@ -614,6 +831,7 @@ export class OpenAIInstrumentation extends InstrumentationBase { moduleVersion?: string, ) { diag.debug(`Removing patch for ${MODULE_NAME}@${moduleVersion}`); + this._unwrap(moduleExports.OpenAI.prototype, "post"); this._unwrap(moduleExports.OpenAI.Chat.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Completions.prototype, "create"); this._unwrap(moduleExports.OpenAI.Embeddings.prototype, "create"); diff --git a/js/packages/openinference-instrumentation-openai/test/openai.test.ts b/js/packages/openinference-instrumentation-openai/test/openai.test.ts index 3d31305f5..aa4ace03e 100644 --- a/js/packages/openinference-instrumentation-openai/test/openai.test.ts +++ b/js/packages/openinference-instrumentation-openai/test/openai.test.ts @@ -1650,4 +1650,17 @@ describe("OpenAIInstrumentation with a custom tracer provider", () => { expect(span.attributes["llm.model_name"]).toBe("gpt-3.5-turbo-0613"); }); }); + + describe("URL extraction", () => { + it("should detect Azure provider correctly", () => { + const azureClient = new OpenAI({ + apiKey: "test-key", + baseURL: + "https://test-resource.openai.azure.com/openai/deployments/gpt-4", + }); + + // Just verify the client was created with Azure base URL + expect(azureClient.baseURL).toContain("openai.azure.com"); + }); + }); });