Skip to content

Commit 1bae408

Browse files
authored
Add speaker diarization API for HarmonyOS. (#1609)
1 parent 14944d8 commit 1bae408

18 files changed

+279
-79
lines changed
Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,14 @@
1-
export {
2-
listRawfileDir,
3-
readWave,
4-
readWaveFromBinary,
5-
} from "libsherpa_onnx.so";
1+
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
62

7-
export {
8-
CircularBuffer,
3+
export { CircularBuffer,
94
SileroVadConfig,
105
SpeechSegment,
116
Vad,
127
VadConfig,
138
} from './src/main/ets/components/Vad';
149

1510

16-
export {
17-
Samples,
11+
export { Samples,
1812
OfflineStream,
1913
FeatureConfig,
2014
OfflineTransducerModelConfig,
@@ -31,8 +25,7 @@ export {
3125
OfflineRecognizer,
3226
} from './src/main/ets/components/NonStreamingAsr';
3327

34-
export {
35-
OnlineStream,
28+
export { OnlineStream,
3629
OnlineTransducerModelConfig,
3730
OnlineParaformerModelConfig,
3831
OnlineZipformer2CtcModelConfig,
@@ -43,17 +36,23 @@ export {
4336
OnlineRecognizer,
4437
} from './src/main/ets/components/StreamingAsr';
4538

46-
export {
47-
OfflineTtsVitsModelConfig,
39+
export { OfflineTtsVitsModelConfig,
4840
OfflineTtsModelConfig,
4941
OfflineTtsConfig,
5042
OfflineTts,
5143
TtsOutput,
5244
TtsInput,
5345
} from './src/main/ets/components/NonStreamingTts';
5446

55-
export {
56-
SpeakerEmbeddingExtractorConfig,
47+
export { SpeakerEmbeddingExtractorConfig,
5748
SpeakerEmbeddingExtractor,
5849
SpeakerEmbeddingManager,
5950
} from './src/main/ets/components/SpeakerIdentification';
51+
52+
export { OfflineSpeakerSegmentationPyannoteModelConfig,
53+
OfflineSpeakerSegmentationModelConfig,
54+
OfflineSpeakerDiarizationConfig,
55+
OfflineSpeakerDiarizationSegment,
56+
OfflineSpeakerDiarization,
57+
FastClusteringConfig,
58+
} from './src/main/ets/components/NonStreamingSpeakerDiarization';

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speaker-diarization.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,17 @@ static SherpaOnnxFastClusteringConfig GetFastClusteringConfig(
101101
static Napi::External<SherpaOnnxOfflineSpeakerDiarization>
102102
CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
103103
Napi::Env env = info.Env();
104+
105+
#if __OHOS__
106+
if (info.Length() != 2) {
107+
std::ostringstream os;
108+
os << "Expect only 2 arguments. Given: " << info.Length();
109+
110+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
111+
112+
return {};
113+
}
114+
#else
104115
if (info.Length() != 1) {
105116
std::ostringstream os;
106117
os << "Expect only 1 argument. Given: " << info.Length();
@@ -109,6 +120,7 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
109120

110121
return {};
111122
}
123+
#endif
112124

113125
if (!info[0].IsObject()) {
114126
Napi::TypeError::New(env, "Expect an object as the argument")
@@ -129,8 +141,18 @@ CreateOfflineSpeakerDiarizationWrapper(const Napi::CallbackInfo &info) {
129141
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_on, minDurationOn);
130142
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_duration_off, minDurationOff);
131143

144+
#if __OHOS__
145+
std::unique_ptr<NativeResourceManager,
146+
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
147+
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
148+
&OH_ResourceManager_ReleaseNativeResourceManager);
149+
150+
const SherpaOnnxOfflineSpeakerDiarization *sd =
151+
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(&c, mgr.get());
152+
#else
132153
const SherpaOnnxOfflineSpeakerDiarization *sd =
133154
SherpaOnnxCreateOfflineSpeakerDiarization(&c);
155+
#endif
134156

135157
if (c.segmentation.pyannote.model) {
136158
delete[] c.segmentation.pyannote.model;
@@ -224,9 +246,17 @@ static Napi::Array OfflineSpeakerDiarizationProcessWrapper(
224246

225247
Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
226248

249+
#if __OHOS__
250+
// Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by
251+
// sizeof(float) here
252+
const SherpaOnnxOfflineSpeakerDiarizationResult *r =
253+
SherpaOnnxOfflineSpeakerDiarizationProcess(
254+
sd, samples.Data(), samples.ElementLength() / sizeof(float));
255+
#else
227256
const SherpaOnnxOfflineSpeakerDiarizationResult *r =
228257
SherpaOnnxOfflineSpeakerDiarizationProcess(sd, samples.Data(),
229258
samples.ElementLength());
259+
#endif
230260

231261
int32_t num_segments =
232262
SherpaOnnxOfflineSpeakerDiarizationResultGetNumSegments(r);

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,8 @@ export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string,
6262
export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
6363
export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
6464
export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;
65+
66+
export const createOfflineSpeakerDiarization: (config: object, mgr?: object) => object;
67+
export const getOfflineSpeakerDiarizationSampleRate: (handle: object) => number;
68+
export const offlineSpeakerDiarizationProcess: (handle: object, samples: Float32Array) => object;
69+
export const offlineSpeakerDiarizationSetConfig: (handle: object, config: object) => void;

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/wave-writer.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,15 @@ static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) {
6767

6868
Napi::Float32Array samples = obj.Get("samples").As<Napi::Float32Array>();
6969
int32_t sample_rate = obj.Get("sampleRate").As<Napi::Number>().Int32Value();
70-
70+
#if __OHOS__
71+
int32_t ok = SherpaOnnxWriteWave(
72+
samples.Data(), samples.ElementLength() / sizeof(float), sample_rate,
73+
info[0].As<Napi::String>().Utf8Value().c_str());
74+
#else
7175
int32_t ok =
7276
SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate,
7377
info[0].As<Napi::String>().Utf8Value().c_str());
78+
#endif
7479

7580
return Napi::Boolean::New(env, ok);
7681
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
import {
2+
createOfflineSpeakerDiarization,
3+
getOfflineSpeakerDiarizationSampleRate,
4+
offlineSpeakerDiarizationProcess,
5+
offlineSpeakerDiarizationSetConfig,
6+
} from 'libsherpa_onnx.so';
7+
8+
import { SpeakerEmbeddingExtractorConfig } from './SpeakerIdentification';
9+
10+
export class OfflineSpeakerSegmentationPyannoteModelConfig {
11+
public model: string = '';
12+
}
13+
14+
export class OfflineSpeakerSegmentationModelConfig {
15+
public pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = new OfflineSpeakerSegmentationPyannoteModelConfig();
16+
public numThreads: number = 1;
17+
public debug: boolean = false;
18+
public provider: string = 'cpu';
19+
}
20+
21+
export class FastClusteringConfig {
22+
public numClusters: number = -1;
23+
public threshold: number = 0.5;
24+
}
25+
26+
export class OfflineSpeakerDiarizationConfig {
27+
public segmentation: OfflineSpeakerSegmentationModelConfig = new OfflineSpeakerSegmentationModelConfig();
28+
public embedding: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
29+
public clustering: FastClusteringConfig = new FastClusteringConfig();
30+
public minDurationOn: number = 0.2;
31+
public minDurationOff: number = 0.5;
32+
}
33+
34+
export class OfflineSpeakerDiarizationSegment {
35+
public start: number = 0; // in secondspublic end: number = 0; // in secondspublic speaker: number =
36+
0; // ID of the speaker; count from 0
37+
}
38+
39+
export class OfflineSpeakerDiarization {
40+
public config: OfflineSpeakerDiarizationConfig;
41+
public sampleRate: number;
42+
private handle: object;
43+
44+
constructor(config: OfflineSpeakerDiarizationConfig, mgr?: object) {
45+
this.handle = createOfflineSpeakerDiarization(config, mgr);
46+
this.config = config;
47+
48+
this.sampleRate = getOfflineSpeakerDiarizationSampleRate(this.handle);
49+
}
50+
51+
/**
52+
* samples is a 1-d float32 array. Each element of the array should be
53+
* in the range [-1, 1].
54+
*
55+
* We assume its sample rate equals to this.sampleRate.
56+
*
57+
* Returns an array of object, where an object is
58+
*
59+
* {
60+
* "start": start_time_in_seconds,
61+
* "end": end_time_in_seconds,
62+
* "speaker": an_integer,
63+
* }
64+
*/
65+
process(samples: Float32Array): OfflineSpeakerDiarizationSegment {
66+
return offlineSpeakerDiarizationProcess(this.handle, samples) as OfflineSpeakerDiarizationSegment;
67+
}
68+
69+
setConfig(config: OfflineSpeakerDiarizationConfig) {
70+
offlineSpeakerDiarizationSetConfig(this.handle, config);
71+
this.config.clustering = config.clustering;
72+
}
73+
}

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/SpeakerIdentification.ets

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,15 @@ export class SpeakerEmbeddingExtractor {
3535
}
3636

3737
createStream(): OnlineStream {
38-
return new OnlineStream(
39-
speakerEmbeddingExtractorCreateStream(this.handle));
38+
return new OnlineStream(speakerEmbeddingExtractorCreateStream(this.handle));
4039
}
4140

4241
isReady(stream: OnlineStream): boolean {
4342
return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
4443
}
4544

4645
compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
47-
return speakerEmbeddingExtractorComputeEmbedding(
48-
this.handle, stream.handle, enableExternalBuffer);
46+
return speakerEmbeddingExtractorComputeEmbedding(this.handle, stream.handle, enableExternalBuffer);
4947
}
5048
}
5149

@@ -106,9 +104,7 @@ export class SpeakerEmbeddingManager {
106104

107105
addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
108106
const c: SpeakerNameWithEmbeddingN = {
109-
name: speaker.name,
110-
vv: flatten(speaker.v),
111-
n: speaker.v.length,
107+
name: speaker.name, vv: flatten(speaker.v), n: speaker.v.length,
112108
};
113109
return speakerEmbeddingManagerAddListFlattened(this.handle, c);
114110
}

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/StreamingAsr.ets

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,7 @@ export class OnlineRecognizer {
125125
}
126126

127127
getResult(stream: OnlineStream): OnlineRecognizerResult {
128-
const jsonStr: string =
129-
getOnlineStreamResultAsJson(this.handle, stream.handle);
128+
const jsonStr: string = getOnlineStreamResultAsJson(this.handle, stream.handle);
130129

131130
let o = JSON.parse(jsonStr) as OnlineRecognizerResultJson;
132131

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/Vad.ets

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ export class CircularBuffer {
6262

6363
// return a float32 array
6464
get(startIndex: number, n: number, enableExternalBuffer: boolean = true): Float32Array {
65-
return circularBufferGet(
66-
this.handle, startIndex, n, enableExternalBuffer);
65+
return circularBufferGet(this.handle, startIndex, n, enableExternalBuffer);
6766
}
6867

6968
pop(n: number) {
@@ -93,8 +92,7 @@ export class Vad {
9392
private handle: object;
9493

9594
constructor(config: VadConfig, bufferSizeInSeconds?: number, mgr?: object) {
96-
this.handle =
97-
createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
95+
this.handle = createVoiceActivityDetector(config, bufferSizeInSeconds, mgr);
9896
this.config = config;
9997
}
10098

scripts/node-addon-api/lib/non-streaming-speaker-diarization.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class OfflineSpeakerDiarization {
2727
}
2828

2929
setConfig(config) {
30-
addon.offlineSpeakerDiarizationSetConfig(config);
30+
addon.offlineSpeakerDiarizationSetConfig(this.handle, config);
3131
this.config.clustering = config.clustering;
3232
}
3333
}

sherpa-onnx/c-api/c-api.cc

Lines changed: 45 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1784,8 +1784,8 @@ struct SherpaOnnxOfflineSpeakerDiarizationResult {
17841784
sherpa_onnx::OfflineSpeakerDiarizationResult impl;
17851785
};
17861786

1787-
const SherpaOnnxOfflineSpeakerDiarization *
1788-
SherpaOnnxCreateOfflineSpeakerDiarization(
1787+
static sherpa_onnx::OfflineSpeakerDiarizationConfig
1788+
GetOfflineSpeakerDiarizationConfig(
17891789
const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
17901790
sherpa_onnx::OfflineSpeakerDiarizationConfig sd_config;
17911791

@@ -1820,6 +1820,22 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
18201820

18211821
sd_config.min_duration_off = SHERPA_ONNX_OR(config->min_duration_off, 0.5);
18221822

1823+
if (sd_config.segmentation.debug || sd_config.embedding.debug) {
1824+
#if __OHOS__
1825+
SHERPA_ONNX_LOGE("%{public}s\n", sd_config.ToString().c_str());
1826+
#else
1827+
SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
1828+
#endif
1829+
}
1830+
1831+
return sd_config;
1832+
}
1833+
1834+
const SherpaOnnxOfflineSpeakerDiarization *
1835+
SherpaOnnxCreateOfflineSpeakerDiarization(
1836+
const SherpaOnnxOfflineSpeakerDiarizationConfig *config) {
1837+
auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
1838+
18231839
if (!sd_config.Validate()) {
18241840
SHERPA_ONNX_LOGE("Errors in config");
18251841
return nullptr;
@@ -1831,10 +1847,6 @@ SherpaOnnxCreateOfflineSpeakerDiarization(
18311847
sd->impl =
18321848
std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(sd_config);
18331849

1834-
if (sd_config.segmentation.debug || sd_config.embedding.debug) {
1835-
SHERPA_ONNX_LOGE("%s\n", sd_config.ToString().c_str());
1836-
}
1837-
18381850
return sd;
18391851
}
18401852

@@ -2029,5 +2041,32 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
20292041
}
20302042

20312043
#endif // #if SHERPA_ONNX_ENABLE_TTS == 1
2044+
//
2045+
#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
2046+
const SherpaOnnxOfflineSpeakerDiarization *
2047+
SherpaOnnxCreateOfflineSpeakerDiarizationOHOS(
2048+
const SherpaOnnxOfflineSpeakerDiarizationConfig *config,
2049+
NativeResourceManager *mgr) {
2050+
if (!mgr) {
2051+
return SherpaOnnxCreateOfflineSpeakerDiarization(config);
2052+
}
2053+
2054+
auto sd_config = GetOfflineSpeakerDiarizationConfig(config);
2055+
2056+
if (!sd_config.Validate()) {
2057+
SHERPA_ONNX_LOGE("Errors in config");
2058+
return nullptr;
2059+
}
2060+
2061+
SherpaOnnxOfflineSpeakerDiarization *sd =
2062+
new SherpaOnnxOfflineSpeakerDiarization;
2063+
2064+
sd->impl =
2065+
std::make_unique<sherpa_onnx::OfflineSpeakerDiarization>(mgr, sd_config);
2066+
2067+
return sd;
2068+
}
2069+
2070+
#endif // #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
20322071

20332072
#endif // #ifdef __OHOS__

0 commit comments

Comments
 (0)