@@ -13,6 +13,8 @@ import {
13
13
sendRealtimeAudioData ,
14
14
validateUUID ,
15
15
} from "../lib/oto-api" ;
16
+ import { handleFormatTranscript , handleTranscriptBeautify , handleTranscriptSegment , TranscriptSegment } from "../lib/transcript" ;
17
+ import { handleOtoWsTranscribe , handleOtoWsTranscriptBeautify } from "../lib/oto-websocket" ;
16
18
17
19
/**
18
20
* 日常会話録音画面 - リアルタイム音声ストリーミング版
@@ -30,6 +32,7 @@ export default function RecordPage() {
30
32
const [ isStreaming , setIsStreaming ] = useState ( false ) ;
31
33
const [ hasPermission , setHasPermission ] = useState ( false ) ;
32
34
const [ volume , setVolume ] = useState ( 0 ) ;
35
+ const [ lastVolumeSetDateTime , setLastVolumeSetDateTime ] = useState < Date | null > ( null ) ;
33
36
34
37
// Audio streaming references
35
38
const streamRef = useRef < MediaStream | null > ( null ) ;
@@ -38,6 +41,15 @@ export default function RecordPage() {
38
41
const audioContextRef = useRef < AudioContext | null > ( null ) ;
39
42
const animationFrameRef = useRef < number | null > ( null ) ;
40
43
44
+ // transcript segments
45
+ const [ transcriptSegments , setTranscriptSegments ] = useState < TranscriptSegment [ ] > ( [ ] ) ;
46
+ const transcriptContentRef = useRef < HTMLDivElement > ( null ) ;
47
+ useEffect ( ( ) => {
48
+ if ( transcriptContentRef . current && transcriptSegments . length > 0 ) {
49
+ transcriptContentRef . current . scrollTop = transcriptContentRef . current . scrollHeight ;
50
+ }
51
+ } , [ transcriptSegments ] ) ;
52
+
41
53
// Audio streaming statistics
42
54
const [ audioStats , setAudioStats ] = useState ( {
43
55
totalChunks : 0 ,
@@ -91,7 +103,9 @@ export default function RecordPage() {
91
103
}
92
104
const rms = Math . sqrt ( sum / bufferLength ) ;
93
105
const volumeLevel = Math . round ( ( rms / 255 ) * 100 ) ;
106
+
94
107
setVolume ( volumeLevel ) ;
108
+ setLastVolumeSetDateTime ( new Date ( ) ) ;
95
109
96
110
// Continue monitoring
97
111
animationFrameRef . current = requestAnimationFrame ( monitorVolume ) ;
@@ -261,6 +275,7 @@ export default function RecordPage() {
261
275
} , 10000 ) ;
262
276
263
277
let authTimeout : NodeJS . Timeout | null = null ;
278
+ let transcriptSegments : TranscriptSegment [ ] = [ ] ;
264
279
265
280
ws . onopen = ( ) => {
266
281
clearTimeout ( connectionTimeout ) ;
@@ -422,14 +437,18 @@ export default function RecordPage() {
422
437
break ;
423
438
case "transcribe" :
424
439
console . log ( "📝 Transcription:" , message . data ?. transcript ) ;
425
- if ( message . data ?. transcript ) {
426
- setTranscript ( ( prev ) => prev + message . data . transcript ) ;
440
+ if ( message . data ) {
441
+ const segment = handleOtoWsTranscribe ( message ) ;
442
+ transcriptSegments = handleTranscriptSegment ( transcriptSegments , segment ) ;
443
+ setTranscriptSegments ( transcriptSegments ) ;
427
444
}
428
445
break ;
429
446
case "transcript-beautify" :
430
447
console . log ( "✨ Beautified transcript:" , message . data ?. transcript ) ;
431
- if ( message . data ?. transcript ) {
432
- setTranscript ( message . data . transcript ) ;
448
+ if ( message . data ) {
449
+ const beautifyData = handleOtoWsTranscriptBeautify ( message ) ;
450
+ transcriptSegments = handleTranscriptBeautify ( transcriptSegments , beautifyData ) ;
451
+ setTranscriptSegments ( transcriptSegments ) ;
433
452
}
434
453
break ;
435
454
case "detect-action" :
@@ -706,7 +725,7 @@ export default function RecordPage() {
706
725
const wsState = websocketRef . current ?. readyState ;
707
726
if ( wsState === WebSocket . OPEN ) {
708
727
try {
709
- console . log ( `🎤 Sending audio chunk (${ event . data . size } bytes) - WebSocket state: ${ wsState } ` ) ;
728
+ // console.log(`🎤 Sending audio chunk (${event.data.size} bytes) - WebSocket state: ${wsState}`);
710
729
// Send audio data in JSON format (not binary) for server compatibility
711
730
sendRealtimeAudioData ( websocketRef . current , event . data , false ) ;
712
731
} catch ( error ) {
@@ -942,11 +961,26 @@ export default function RecordPage() {
942
961
{ /* Recording Controls */ }
943
962
< div className = "text-center mb-8" >
944
963
{ /* Large Microphone Button - Click to Start/Stop */ }
945
- < div className = "relative" >
964
+ < div className = "relative mb-4" >
965
+ { /* Volume Ring Indicator */ }
966
+ { isStreaming && volume > 0 && (
967
+ < div className = "absolute w-full h-full flex items-center justify-center top-0 left-0" >
968
+ < div
969
+ className = "rounded-full border-4 border-red-200 bg-red-200"
970
+ style = { {
971
+ opacity : Math . min ( volume / 50 , 1 ) ,
972
+ //animationDuration: `${Math.max(0.5, 2 - volume / 50)}s`,
973
+ width : `auto` ,
974
+ height : `calc(100% + ${ volume * 0.7 } px)` ,
975
+ aspectRatio : "1/1" ,
976
+ } }
977
+ />
978
+ </ div >
979
+ ) }
946
980
< button
947
981
onClick = { isStreaming ? stopRecording : startRecording }
948
982
disabled = { connectionStatus === "connecting" }
949
- className = { `inline-flex items-center justify-center w-32 h-32 rounded-full mb-4 transition-all duration-300 transform hover:scale-105 active:scale-95 focus:outline-none focus:ring-4 focus:ring-opacity-50 ${
983
+ className = { `inline-flex items-center justify-center w-32 h-32 rounded-full transition-all duration-300 transform hover:scale-105 active:scale-95 focus:outline-none focus:ring-4 focus:ring-opacity-50 ${
950
984
isStreaming
951
985
? "bg-red-500 hover:bg-red-600 animate-pulse focus:ring-red-300"
952
986
: connectionStatus === "authenticated"
@@ -963,17 +997,6 @@ export default function RecordPage() {
963
997
< Mic size = { 40 } className = "text-white" />
964
998
) }
965
999
</ button >
966
-
967
- { /* Volume Ring Indicator */ }
968
- { isStreaming && volume > 0 && (
969
- < div
970
- className = "absolute inset-0 rounded-full border-4 border-green-400 animate-ping"
971
- style = { {
972
- opacity : Math . min ( volume / 50 , 1 ) ,
973
- animationDuration : `${ Math . max ( 0.5 , 2 - volume / 50 ) } s` ,
974
- } }
975
- />
976
- ) }
977
1000
</ div >
978
1001
979
1002
{ /* Status Text */ }
@@ -1002,7 +1025,7 @@ export default function RecordPage() {
1002
1025
1003
1026
< div className = "text-sm text-gray-500" >
1004
1027
{ isStreaming
1005
- ? "Click the microphone to stop streaming"
1028
+ ? "Click the button to stop streaming"
1006
1029
: connectionStatus === "authenticated"
1007
1030
? "Click the microphone to start streaming"
1008
1031
: connectionStatus === "connecting"
@@ -1015,7 +1038,7 @@ export default function RecordPage() {
1015
1038
{ ! isStreaming ? (
1016
1039
< Button
1017
1040
onClick = { startRecording }
1018
- className = "px-6 py-2 text-sm"
1041
+ className = "px-6 py-2 text-sm hidden "
1019
1042
size = "sm"
1020
1043
disabled = { connectionStatus === "connecting" }
1021
1044
variant = "outline"
@@ -1026,7 +1049,7 @@ export default function RecordPage() {
1026
1049
< Button
1027
1050
onClick = { stopRecording }
1028
1051
variant = "destructive"
1029
- className = "px-6 py-2 text-sm"
1052
+ className = "px-6 py-2 text-sm hidden "
1030
1053
size = "sm"
1031
1054
>
1032
1055
< Square size = { 16 } className = "mr-1" />
@@ -1043,8 +1066,8 @@ export default function RecordPage() {
1043
1066
< h3 className = "text-lg font-semibold text-gray-900 mb-3" >
1044
1067
Real-time Transcription
1045
1068
</ h3 >
1046
- < div className = "text-gray-700 leading-relaxed whitespace-pre-wrap" >
1047
- { transcript }
1069
+ < div className = "text-gray-700 leading-relaxed whitespace-pre-wrap overflow-y-auto max-h-[320px] scroll-smooth" ref = { transcriptContentRef } >
1070
+ { handleFormatTranscript ( transcriptSegments ) }
1048
1071
</ div >
1049
1072
</ div >
1050
1073
) }
0 commit comments