From 7b3049b44bfc451433caca0074dd55eec3502171 Mon Sep 17 00:00:00 2001 From: leq6c Date: Sun, 15 Jun 2025 10:37:24 +0900 Subject: [PATCH] frontend: fix transcript glitch --- frontend_vapi/lib/oto-websocket.ts | 33 ++++++++++ frontend_vapi/lib/time.ts | 10 +++ frontend_vapi/lib/transcript.ts | 97 ++++++++++++++++++++++++++++++ frontend_vapi/pages/record.tsx | 69 ++++++++++++++------- 4 files changed, 186 insertions(+), 23 deletions(-) create mode 100644 frontend_vapi/lib/oto-websocket.ts create mode 100644 frontend_vapi/lib/time.ts create mode 100644 frontend_vapi/lib/transcript.ts diff --git a/frontend_vapi/lib/oto-websocket.ts b/frontend_vapi/lib/oto-websocket.ts new file mode 100644 index 0000000..51c77a4 --- /dev/null +++ b/frontend_vapi/lib/oto-websocket.ts @@ -0,0 +1,33 @@ +import { TranscriptBeautifyData, TranscriptSegment } from "./transcript"; + +export interface WebSocketMessage { + type: "transcribe" | "transcript-beautify" | "detect-action" | "error"; + data?: any; + message?: string; +} + +export const handleOtoWsTranscribe = (message: WebSocketMessage) => { + const segment: TranscriptSegment = { + audioStart: message.data.audioStart || 0, + audioEnd: message.data.audioEnd || 0, + transcript: message.data.transcript, + finalized: message.data.finalized, + beautified: false, + id: message.data.finalized + ? `${message.data.audioStart || 0}-${message.data.audioEnd || 0}` + : "partial-current", // Use simple ID for partial transcripts + }; + + return segment; +}; + +export const handleOtoWsTranscriptBeautify = (message: WebSocketMessage) => { + const beautifyData: TranscriptBeautifyData = { + audioStart: message.data.audioStart, + audioEnd: message.data.audioEnd, + transcript: message.data.transcript, + segments: message.data.segments, + }; + + return beautifyData; +}; diff --git a/frontend_vapi/lib/time.ts b/frontend_vapi/lib/time.ts new file mode 100644 index 0000000..5a47f06 --- /dev/null +++ b/frontend_vapi/lib/time.ts @@ -0,0 +1,10 @@ +export const formatTimestamp = (milliseconds: number): string => { + const totalSeconds = Math.floor(milliseconds / 1000); + const hours = Math.floor(totalSeconds / 3600); + const minutes = Math.floor((totalSeconds % 3600) / 60); + const seconds = totalSeconds % 60; + + return `${hours.toString().padStart(2, "0")}:${minutes + .toString() + .padStart(2, "0")}:${seconds.toString().padStart(2, "0")}`; +}; diff --git a/frontend_vapi/lib/transcript.ts b/frontend_vapi/lib/transcript.ts new file mode 100644 index 0000000..b29402a --- /dev/null +++ b/frontend_vapi/lib/transcript.ts @@ -0,0 +1,97 @@ +import { formatTimestamp } from "./time"; + +export interface TranscriptSegment { + audioStart: number; + audioEnd: number; + transcript: string; + finalized: boolean; + beautified: boolean; + id?: string; // Add unique identifier for tracking partial updates +} + +export interface TranscriptBeautifyData { + audioStart: number; + audioEnd: number; + transcript: string; + segments: TranscriptSegment[]; +} + +export const handleTranscriptSegment = ( + prev: TranscriptSegment[], + segment: TranscriptSegment +): TranscriptSegment[] => { + if (segment.finalized) { + // For finalized segments, remove any existing partial transcript and add the finalized one + const filteredSegments = prev.filter((s) => s.finalized); // Remove partial transcripts + return [ + ...filteredSegments, + { ...segment, id: `${segment.audioStart}-${segment.audioEnd}` }, + ]; + } else { + // For partial segments, replace any existing partial transcript + const finalizedSegments = prev.filter((s) => s.finalized); // Keep only finalized segments + return [ + ...finalizedSegments, + { + ...segment, + id: "partial-current", + audioStart: 999999999, + audioEnd: 999999999, + }, + ]; // Add current partial + } +}; + +export const handleTranscriptBeautify = ( + prev: TranscriptSegment[], + beautifyData: TranscriptBeautifyData +): TranscriptSegment[] => { + // Filter out segments that fall within the beautified range + const filteredSegments = prev.filter( + (segment) => + segment.audioEnd <= beautifyData.audioStart || + segment.audioStart >= beautifyData.audioEnd + ); + + const beautifiedSegments = beautifyData.segments.map((segment) => ({ + audioStart: segment.audioStart, + audioEnd: segment.audioEnd, + transcript: segment.transcript, + finalized: true, + beautified: true, + })); + + // Insert in chronological order + const newSegments = [...filteredSegments, ...beautifiedSegments]; + return newSegments.sort((a, b) => a.audioStart - b.audioStart); +}; + +export const handleFormatTranscript = (transcriptSegments: TranscriptSegment[]): string => { + return transcriptSegments.sort((a, b) => a.audioStart - b.audioStart) + .map(segment => { + let ret = ''; + + // For partial transcripts, don't show timing (might not be meaningful) + if (segment.finalized) { + const startTime = formatTimestamp(segment.audioStart); + const endTime = formatTimestamp(segment.audioEnd); + ret = `[${startTime}-${endTime}]`; + } else { + ret = '[Live]'; // Show "Live" for partial transcripts + } + + // Add status indicators + if (segment.beautified) { + ret += ' ✓'; // Beautified + } else if (segment.finalized) { + ret += ' *'; // Finalized but not beautified + } else { + ret += ' ~'; // Partial/interim transcript + } + + ret += `\n${segment.transcript}`; + + return ret; + }) + .join('\n\n'); +}; \ No newline at end of file diff --git a/frontend_vapi/pages/record.tsx b/frontend_vapi/pages/record.tsx index 71d718d..56af6aa 100644 --- a/frontend_vapi/pages/record.tsx +++ b/frontend_vapi/pages/record.tsx @@ -13,6 +13,8 @@ import { sendRealtimeAudioData, validateUUID, } from "../lib/oto-api"; +import { handleFormatTranscript, handleTranscriptBeautify, handleTranscriptSegment, TranscriptSegment } from "../lib/transcript"; +import { handleOtoWsTranscribe, handleOtoWsTranscriptBeautify } from "../lib/oto-websocket"; /** * 日常会話録音画面 - リアルタイム音声ストリーミング版 @@ -30,6 +32,7 @@ export default function RecordPage() { const [isStreaming, setIsStreaming] = useState(false); const [hasPermission, setHasPermission] = useState(false); const [volume, setVolume] = useState(0); + const [lastVolumeSetDateTime, setLastVolumeSetDateTime] = useState(null); // Audio streaming references const streamRef = useRef(null); @@ -38,6 +41,15 @@ export default function RecordPage() { const audioContextRef = useRef(null); const animationFrameRef = useRef(null); + // transcript segments + const [transcriptSegments, setTranscriptSegments] = useState([]); + const transcriptContentRef = useRef(null); + useEffect(() => { + if (transcriptContentRef.current && transcriptSegments.length > 0) { + transcriptContentRef.current.scrollTop = transcriptContentRef.current.scrollHeight; + } + }, [transcriptSegments]); + // Audio streaming statistics const [audioStats, setAudioStats] = useState({ totalChunks: 0, @@ -91,7 +103,9 @@ export default function RecordPage() { } const rms = Math.sqrt(sum / bufferLength); const volumeLevel = Math.round((rms / 255) * 100); + setVolume(volumeLevel); + setLastVolumeSetDateTime(new Date()); // Continue monitoring animationFrameRef.current = requestAnimationFrame(monitorVolume); @@ -261,6 +275,7 @@ export default function RecordPage() { }, 10000); let authTimeout: NodeJS.Timeout | null = null; + let transcriptSegments: TranscriptSegment[] = []; ws.onopen = () => { clearTimeout(connectionTimeout); @@ -422,14 +437,18 @@ export default function RecordPage() { break; case "transcribe": console.log("📝 Transcription:", message.data?.transcript); - if (message.data?.transcript) { - setTranscript((prev) => prev + message.data.transcript); + if (message.data) { + const segment = handleOtoWsTranscribe(message); + transcriptSegments = handleTranscriptSegment(transcriptSegments, segment); + setTranscriptSegments(transcriptSegments); } break; case "transcript-beautify": console.log("✨ Beautified transcript:", message.data?.transcript); - if (message.data?.transcript) { - setTranscript(message.data.transcript); + if (message.data) { + const beautifyData = handleOtoWsTranscriptBeautify(message); + transcriptSegments = handleTranscriptBeautify(transcriptSegments, beautifyData); + setTranscriptSegments(transcriptSegments); } break; case "detect-action": @@ -706,7 +725,7 @@ export default function RecordPage() { const wsState = websocketRef.current?.readyState; if (wsState === WebSocket.OPEN) { try { - console.log(`🎤 Sending audio chunk (${event.data.size} bytes) - WebSocket state: ${wsState}`); + //console.log(`🎤 Sending audio chunk (${event.data.size} bytes) - WebSocket state: ${wsState}`); // Send audio data in JSON format (not binary) for server compatibility sendRealtimeAudioData(websocketRef.current, event.data, false); } catch (error) { @@ -942,11 +961,26 @@ export default function RecordPage() { {/* Recording Controls */}
{/* Large Microphone Button - Click to Start/Stop */} -
+
+ {/* Volume Ring Indicator */} + {isStreaming && volume > 0 && ( +
+
+
+ )} - - {/* Volume Ring Indicator */} - {isStreaming && volume > 0 && ( -
- )}
{/* Status Text */} @@ -1002,7 +1025,7 @@ export default function RecordPage() {
{isStreaming - ? "Click the microphone to stop streaming" + ? "Click the button to stop streaming" : connectionStatus === "authenticated" ? "Click the microphone to start streaming" : connectionStatus === "connecting" @@ -1015,7 +1038,7 @@ export default function RecordPage() { {!isStreaming ? (