1
1
package org .myrobotlab .service ;
2
2
3
- import org .myrobotlab .framework .Service ;
4
- import org .myrobotlab .service .config .ServiceConfig ;
3
+ import io .github .givimad .whisperjni .WhisperContext ;
4
+ import io .github .givimad .whisperjni .WhisperFullParams ;
5
+ import io .github .givimad .whisperjni .WhisperJNI ;
6
+ import org .myrobotlab .framework .Platform ;
7
+ import org .myrobotlab .service .abstracts .AbstractSpeechRecognizer ;
8
+ import org .myrobotlab .service .config .LlamaConfig ;
9
+ import org .myrobotlab .service .config .WhisperConfig ;
10
+ import org .myrobotlab .service .data .Locale ;
11
+
12
+ import javax .sound .sampled .AudioFormat ;
13
+ import javax .sound .sampled .AudioSystem ;
14
+ import javax .sound .sampled .Line ;
15
+ import javax .sound .sampled .LineUnavailableException ;
16
+ import javax .sound .sampled .Mixer ;
17
+ import javax .sound .sampled .TargetDataLine ;
18
+ import java .io .File ;
19
+ import java .io .FileOutputStream ;
20
+ import java .io .IOException ;
21
+ import java .net .URL ;
22
+ import java .nio .ByteBuffer ;
23
+ import java .nio .ByteOrder ;
24
+ import java .nio .ShortBuffer ;
25
+ import java .nio .channels .Channels ;
26
+ import java .nio .channels .FileChannel ;
27
+ import java .nio .channels .ReadableByteChannel ;
28
+ import java .nio .file .Path ;
29
+ import java .util .Map ;
30
+
31
+ public class Whisper extends AbstractSpeechRecognizer <WhisperConfig > {
32
+ private transient WhisperJNI whisper ;
33
+
34
+ private transient WhisperContext ctx ;
35
+
36
+ private transient WhisperFullParams params ;
37
+
38
+ private transient Thread listeningThread = new Thread ();
39
+
5
40
6
- public class Whisper extends Service <ServiceConfig > {
7
41
/**
8
42
* Constructor of service, reservedkey typically is a services name and inId
9
43
* will be its process id
@@ -14,4 +48,144 @@ public class Whisper extends Service<ServiceConfig> {
14
48
public Whisper (String reservedKey , String inId ) {
15
49
super (reservedKey , inId );
16
50
}
51
+
52
+ public void loadModel (String modelPath ) {
53
+ try {
54
+ whisper = new WhisperJNI ();
55
+ WhisperJNI .loadLibrary ();
56
+ ctx = whisper .init (Path .of (modelPath ));
57
+ } catch (IOException e ) {
58
+ throw new RuntimeException (e );
59
+ }
60
+
61
+ params = new WhisperFullParams ();
62
+ params .nThreads = Platform .getLocalInstance ().getNumPhysicalProcessors ();
63
+ params .printRealtime = true ;
64
+ params .printProgress = true ;
65
+
66
+ }
67
+
68
+ public String findModelPath (String modelName ) {
69
+ // First, we loop over all user-defined
70
+ // model directories
71
+ for (String dir : config .modelPaths ) {
72
+ File path = new File (dir + fs + modelName );
73
+ if (path .exists ()) {
74
+ return path .getAbsolutePath ();
75
+ }
76
+ }
77
+
78
+ // Now, we check our data directory for any downloaded models
79
+ File path = new File (getDataDir () + fs + modelName );
80
+ if (path .exists ()) {
81
+ return path .getAbsolutePath ();
82
+ } else if (config .modelUrls .containsKey (modelName )) {
83
+ // Model was not in data but we do have a URL for it
84
+ try (FileOutputStream fileOutputStream = new FileOutputStream (path )) {
85
+ ReadableByteChannel readableByteChannel = Channels .newChannel (new URL (config .modelUrls .get (modelName )).openStream ());
86
+ FileChannel fileChannel = fileOutputStream .getChannel ();
87
+ info ("Downloading model %s to path %s from URL %s" , modelName , path , config .modelUrls .get (modelName ));
88
+ fileChannel .transferFrom (readableByteChannel , 0 , Long .MAX_VALUE );
89
+ } catch (IOException e ) {
90
+ throw new RuntimeException (e );
91
+ }
92
+ return path .getAbsolutePath ();
93
+ }
94
+ // Cannot find the model anywhere
95
+ error ("Could not locate model {}, add its URL to download it or add a directory where it is located" , modelName );
96
+ return null ;
97
+ }
98
+
99
+ @ Override
100
+ public void startListening () {
101
+
102
+ listeningThread = new Thread (() -> {
103
+ AudioFormat format = new AudioFormat (16000.0f , 16 , 1 , true , false );
104
+ TargetDataLine microphone = null ;
105
+
106
+ Mixer .Info [] mixerInfos = AudioSystem .getMixerInfo ();
107
+ for (Mixer .Info info : mixerInfos ){
108
+ Mixer m = AudioSystem .getMixer (info );
109
+ Line .Info [] lineInfos = m .getTargetLineInfo ();
110
+ for (Line .Info lineInfo :lineInfos ){
111
+ System .out .println (info .getName ()+"---" +lineInfo );
112
+ // Hard-code for my mic right now
113
+ if (info .getName ().contains ("U0x46d0x825" )) {
114
+ try {
115
+ microphone = (TargetDataLine ) m .getLine (lineInfo );
116
+ microphone .open (format );
117
+ System .out .println ("Sample rate: " + format .getSampleRate ());
118
+ } catch (LineUnavailableException e ) {
119
+ throw new RuntimeException (e );
120
+ }
121
+ }
122
+
123
+ }
124
+
125
+ }
126
+
127
+ int numBytesRead ;
128
+
129
+ microphone .start ();
130
+ while (config .listening ) {
131
+ int CHUNK_SIZE = (int )((format .getFrameSize () * format .getFrameRate ())) * 5 ;
132
+ ByteBuffer captureBuffer = ByteBuffer .allocate (CHUNK_SIZE );
133
+ captureBuffer .order (ByteOrder .LITTLE_ENDIAN );
134
+ numBytesRead = microphone .read (captureBuffer .array (), 0 , CHUNK_SIZE );
135
+ System .out .println ("Num bytes read=" + numBytesRead );
136
+ ShortBuffer shortBuffer = captureBuffer .asShortBuffer ();
137
+ // transform the samples to f32 samples
138
+ float [] samples = new float [captureBuffer .capacity () / 2 ];
139
+ int index = 0 ;
140
+ shortBuffer .position (0 );
141
+ while (shortBuffer .hasRemaining ()) {
142
+ samples [index ++] = Float .max (-1f , Float .min (((float ) shortBuffer .get ()) / (float ) Short .MAX_VALUE , 1f ));
143
+ }
144
+ int result = whisper .full (ctx , params , samples , samples .length );
145
+ if (result != 0 ) {
146
+ throw new RuntimeException ("Transcription failed with code " + result );
147
+ }
148
+ int numSegments = whisper .fullNSegments (ctx );
149
+ System .out .println ("Inference done, numSegments=" + numSegments );
150
+ for (int i = 0 ; i < numSegments ; i ++) {
151
+ System .out .println (whisper .fullGetSegmentText (ctx , i ));
152
+ invoke ("publishRecognized" , whisper .fullGetSegmentText (ctx , i ));
153
+ }
154
+
155
+ }
156
+ microphone .close ();
157
+ });
158
+ super .startListening ();
159
+
160
+ listeningThread .start ();
161
+ }
162
+
163
+ @ Override
164
+ public WhisperConfig apply (WhisperConfig c ) {
165
+ super .apply (c );
166
+
167
+ if (config .selectedModel != null && !config .selectedModel .isEmpty ()) {
168
+ String modelPath = findModelPath (config .selectedModel );
169
+ if (modelPath != null ) {
170
+ loadModel (modelPath );
171
+ } else {
172
+ error ("Could not find selected model {}" , config .selectedModel );
173
+ }
174
+ }
175
+
176
+ return config ;
177
+ }
178
+
179
+ /**
180
+ * locales this service supports - implementation can simply get
181
+ * runtime.getLocales() if acceptable or create their own locales
182
+ *
183
+ * @return map of string to locale
184
+ */
185
+ @ Override
186
+ public Map <String , Locale > getLocales () {
187
+ return null ;
188
+ }
189
+
190
+
17
191
}
0 commit comments