Skip to content

Commit d2ec162

Browse files
authored
Merge pull request #925 from anthhub/feat/mcp-audio-comp
Add audio URL detection and processing in markdown plugins
2 parents 7578ab2 + e56a738 commit d2ec162

File tree

2 files changed

+330
-6
lines changed

2 files changed

+330
-6
lines changed

packages/ai-workspace-common/src/components/markdown/plugins/mcp-call/rehypePlugin.ts

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ const BASE64_IMAGE_URL_REGEX =
2020
const HTTP_IMAGE_URL_REGEX =
2121
/https?:\/\/[^\s"'<>]+\.(?<format>png|jpeg|jpg|gif|webp|svg)[^\s"'<>]*/i;
2222

23+
// Regular expression to match HTTP/HTTPS audio links
24+
const HTTP_AUDIO_URL_REGEX =
25+
/https?:\/\/[^\s"'<>]+\.(?<format>mp3|wav|ogg|flac|m4a|aac)[^\s"'<>]*/i;
26+
2327
/**
2428
* Utility function to safely extract content from regex matches
2529
* @param content The content to extract from
@@ -65,6 +69,27 @@ const extractImageUrl = (
6569
return { url: undefined, format: undefined, isHttp: false };
6670
};
6771

72+
/**
73+
* Extract audio URL from a string
74+
* @param str The string to search in
75+
* @returns The found audio URL, format and whether it's an HTTP link
76+
*/
77+
const extractAudioUrl = (
78+
str: string,
79+
): { url: string | undefined; format: string | undefined; isHttp: boolean } => {
80+
// Check if it contains an HTTP audio URL
81+
const httpMatch = HTTP_AUDIO_URL_REGEX.exec(str);
82+
if (httpMatch?.groups && httpMatch[0]) {
83+
return {
84+
url: httpMatch[0],
85+
format: httpMatch.groups.format,
86+
isHttp: true,
87+
};
88+
}
89+
90+
return { url: undefined, format: undefined, isHttp: false };
91+
};
92+
6893
/**
6994
* Rehype plugin to process tool_use tags in markdown
7095
* When parsing <tool_use> tags, if a <result> exists, extract both <arguments> and <result> and put them on the same node property.
@@ -106,6 +131,12 @@ function rehypePlugin() {
106131
let isHttpUrl = false;
107132
let imageNameFromArgs = 'image'; // Default image name
108133

134+
// Attempt to find and process audio data in the result
135+
let audioUrlFromDetails: string | undefined;
136+
let audioFormatFromDetails: string | undefined;
137+
let _isAudioHttpUrl = false;
138+
let audioNameFromArgs = 'audio'; // Default audio name
139+
109140
// 1. Directly search for image URL in the result string
110141
const { url, format, isHttp } = extractImageUrl(resultStr);
111142
if (url) {
@@ -129,6 +160,30 @@ function rehypePlugin() {
129160
}
130161
}
131162

163+
// 1. Directly search for audio URL in the result string
164+
const audioResult = extractAudioUrl(resultStr);
165+
if (audioResult.url) {
166+
audioUrlFromDetails = audioResult.url;
167+
audioFormatFromDetails = audioResult.format;
168+
// isAudioHttpUrl 变量在这里不需要使用,但我们保留它以保持代码结构一致性
169+
_isAudioHttpUrl = true;
170+
} else {
171+
// 2. If direct search fails, try to parse JSON and search in the stringified JSON result
172+
try {
173+
const resultObj = JSON.parse(resultStr);
174+
const resultJsonStr = JSON.stringify(resultObj);
175+
const jsonAudioResult = extractAudioUrl(resultJsonStr);
176+
177+
if (jsonAudioResult.url) {
178+
audioUrlFromDetails = jsonAudioResult.url;
179+
audioFormatFromDetails = jsonAudioResult.format;
180+
_isAudioHttpUrl = jsonAudioResult.isHttp;
181+
}
182+
} catch (_e) {
183+
// Not a JSON result, or JSON parsing failed
184+
}
185+
}
186+
132187
if (imageUrlFromDetails && imageFormatFromDetails) {
133188
// Set different attributes based on whether it's an HTTP link or not
134189
if (isHttpUrl) {
@@ -165,6 +220,40 @@ function rehypePlugin() {
165220
attributes['data-tool-image-name'] =
166221
`${imageNameFromArgs}.${imageFormatFromDetails}`;
167222
}
223+
224+
// Handle audio URL if found
225+
if (audioUrlFromDetails && audioFormatFromDetails) {
226+
// Set audio URL attribute
227+
attributes['data-tool-audio-http-url'] = audioUrlFromDetails;
228+
229+
// Attempt to get audio name from arguments
230+
if (argsStr) {
231+
try {
232+
const argsObj = JSON.parse(argsStr);
233+
if (typeof argsObj.params === 'string') {
234+
const paramsObj = JSON.parse(argsObj.params);
235+
if (paramsObj && typeof paramsObj.name === 'string') {
236+
const trimmedName = paramsObj.name.trim();
237+
if (trimmedName) {
238+
// Ensure non-empty name after trimming
239+
audioNameFromArgs = trimmedName;
240+
}
241+
}
242+
} else if (argsObj && typeof argsObj.name === 'string') {
243+
const trimmedName = argsObj.name.trim();
244+
if (trimmedName) {
245+
// Ensure non-empty name after trimming
246+
audioNameFromArgs = trimmedName;
247+
}
248+
}
249+
} catch (_e) {
250+
// Argument parsing failed
251+
}
252+
}
253+
attributes['data-tool-audio-name'] =
254+
`${audioNameFromArgs}.${audioFormatFromDetails}`;
255+
attributes['data-tool-audio-format'] = audioFormatFromDetails;
256+
}
168257
}
169258

170259
// Create a new node with the extracted data for tool_use
@@ -250,6 +339,12 @@ function rehypePlugin() {
250339
let isHttpUrl = false;
251340
let imageNameFromArgs = 'image'; // Default image name
252341

342+
// Attempt to find and process audio data in the result
343+
let audioUrlFromDetails: string | undefined;
344+
let audioFormatFromDetails: string | undefined;
345+
let _isAudioHttpUrl = false;
346+
let audioNameFromArgs = 'audio'; // Default audio name
347+
253348
// Directly search for image URL in the result string
254349
const { url, format, isHttp } = extractImageUrl(resultStr);
255350
if (url) {
@@ -273,6 +368,30 @@ function rehypePlugin() {
273368
}
274369
}
275370

371+
// Directly search for audio URL in the result string
372+
const audioResult = extractAudioUrl(resultStr);
373+
if (audioResult.url) {
374+
audioUrlFromDetails = audioResult.url;
375+
audioFormatFromDetails = audioResult.format;
376+
// isAudioHttpUrl 变量在这里不需要使用,但我们保留它以保持代码结构一致性
377+
_isAudioHttpUrl = true;
378+
} else {
379+
// If direct search fails, try to parse JSON and search in the stringified JSON result
380+
try {
381+
const resultObj = JSON.parse(resultStr);
382+
const resultJsonStr = JSON.stringify(resultObj);
383+
const jsonAudioResult = extractAudioUrl(resultJsonStr);
384+
385+
if (jsonAudioResult.url) {
386+
audioUrlFromDetails = jsonAudioResult.url;
387+
audioFormatFromDetails = jsonAudioResult.format;
388+
_isAudioHttpUrl = jsonAudioResult.isHttp;
389+
}
390+
} catch (_e) {
391+
// Not a JSON result, or JSON parsing failed
392+
}
393+
}
394+
276395
if (imageUrlFromDetails && imageFormatFromDetails) {
277396
// Set different attributes based on whether it's an HTTP link or not
278397
if (isHttpUrl) {
@@ -307,6 +426,40 @@ function rehypePlugin() {
307426
attributes['data-tool-image-name'] =
308427
`${imageNameFromArgs}.${imageFormatFromDetails}`;
309428
}
429+
430+
// Handle audio URL if found
431+
if (audioUrlFromDetails && audioFormatFromDetails) {
432+
// Set audio URL attribute
433+
attributes['data-tool-audio-http-url'] = audioUrlFromDetails;
434+
435+
// Attempt to get audio name from arguments
436+
if (argsStr) {
437+
try {
438+
const argsObj = JSON.parse(argsStr);
439+
if (typeof argsObj.params === 'string') {
440+
const paramsObj = JSON.parse(argsObj.params);
441+
if (paramsObj && typeof paramsObj.name === 'string') {
442+
const trimmedName = paramsObj.name.trim();
443+
if (trimmedName) {
444+
// Ensure non-empty name after trimming
445+
audioNameFromArgs = trimmedName;
446+
}
447+
}
448+
} else if (argsObj && typeof argsObj.name === 'string') {
449+
const trimmedName = argsObj.name.trim();
450+
if (trimmedName) {
451+
// Ensure non-empty name after trimming
452+
audioNameFromArgs = trimmedName;
453+
}
454+
}
455+
} catch (_e) {
456+
// Argument parsing failed
457+
}
458+
}
459+
attributes['data-tool-audio-name'] =
460+
`${audioNameFromArgs}.${audioFormatFromDetails}`;
461+
attributes['data-tool-audio-format'] = audioFormatFromDetails;
462+
}
310463
}
311464

312465
// Create a new node with the extracted data for tool_use

0 commit comments

Comments
 (0)