Skip to content

Commit 314545f

Browse files
authored
Add speaker identification APIs for HarmonyOS (#1607)
* Add speaker embedding extractor API for HarmonyOS * Add ArkTS API for speaker identification
1 parent a743a44 commit 314545f

19 files changed

+374
-60
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,3 +123,5 @@ sherpa-onnx-online-punct-en-2024-08-06
123123
sherpa-onnx-pyannote-segmentation-3-0
124124
sherpa-onnx-moonshine-tiny-en-int8
125125
sherpa-onnx-moonshine-base-en-int8
126+
harmony-os/SherpaOnnxHar/sherpa_onnx/LICENSE
127+
harmony-os/SherpaOnnxHar/sherpa_onnx/CHANGELOG.md

harmony-os/SherpaOnnxHar/sherpa_onnx/Index.ets

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,9 @@ export {
5151
TtsOutput,
5252
TtsInput,
5353
} from './src/main/ets/components/NonStreamingTts';
54+
55+
export {
56+
SpeakerEmbeddingExtractorConfig,
57+
SpeakerEmbeddingExtractor,
58+
SpeakerEmbeddingManager,
59+
} from './src/main/ets/components/SpeakerIdentification';

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,17 @@
1111
static Napi::External<SherpaOnnxSpeakerEmbeddingExtractor>
1212
CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
1313
Napi::Env env = info.Env();
14+
15+
#if __OHOS__
16+
if (info.Length() != 2) {
17+
std::ostringstream os;
18+
os << "Expect only 2 arguments. Given: " << info.Length();
19+
20+
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
21+
22+
return {};
23+
}
24+
#else
1425
if (info.Length() != 1) {
1526
std::ostringstream os;
1627
os << "Expect only 1 argument. Given: " << info.Length();
@@ -19,6 +30,7 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
1930

2031
return {};
2132
}
33+
#endif
2234

2335
if (!info[0].IsObject()) {
2436
Napi::TypeError::New(env, "You should pass an object as the only argument.")
@@ -46,8 +58,18 @@ CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) {
4658

4759
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
4860

61+
#if __OHOS__
62+
std::unique_ptr<NativeResourceManager,
63+
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
64+
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
65+
&OH_ResourceManager_ReleaseNativeResourceManager);
66+
67+
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
68+
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(&c, mgr.get());
69+
#else
4970
const SherpaOnnxSpeakerEmbeddingExtractor *extractor =
5071
SherpaOnnxCreateSpeakerEmbeddingExtractor(&c);
72+
#endif
5173

5274
if (c.model) {
5375
delete[] c.model;

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/types/libsherpa_onnx/Index.d.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,18 @@ export type TtsOutput = {
4747

4848
export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
4949
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
50+
51+
export const createSpeakerEmbeddingExtractor: (config: object, mgr?: object) => object;
52+
export const speakerEmbeddingExtractorDim: (handle: object) => number;
53+
export const speakerEmbeddingExtractorCreateStream: (handle: object) => object;
54+
export const speakerEmbeddingExtractorIsReady: (handle: object, stream: object) => boolean;
55+
export const speakerEmbeddingExtractorComputeEmbedding: (handle: object, stream: object, enableExternalBuffer: boolean) => Float32Array;
56+
export const createSpeakerEmbeddingManager: (dim: number) => object;
57+
export const speakerEmbeddingManagerAdd: (handle: object, speaker: {name: string, v: Float32Array}) => boolean;
58+
export const speakerEmbeddingManagerAddListFlattened: (handle: object, speaker: {name: string, vv: Float32Array, n: number}) => boolean;
59+
export const speakerEmbeddingManagerRemove: (handle: object, name: string) => boolean;
60+
export const speakerEmbeddingManagerSearch: (handle: object, obj: {v: Float32Array, threshold: number}) => string;
61+
export const speakerEmbeddingManagerVerify: (handle: object, obj: {name: string, v: Float32Array, threshold: number}) => boolean;
62+
export const speakerEmbeddingManagerContains: (handle: object, name: string) => boolean;
63+
export const speakerEmbeddingManagerNumSpeakers: (handle: object) => number;
64+
export const speakerEmbeddingManagerGetAllSpeakers: (handle: object) => Array<string>;

harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/ets/components/NonStreamingTts.ets

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import {
44
getOfflineTtsSampleRate,
55
offlineTtsGenerate,
66
offlineTtsGenerateAsync,
7-
} from "libsherpa_onnx.so";
7+
} from 'libsherpa_onnx.so';
88

99
export class OfflineTtsVitsModelConfig {
1010
public model: string = '';
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
import {
2+
createSpeakerEmbeddingExtractor,
3+
createSpeakerEmbeddingManager,
4+
speakerEmbeddingExtractorComputeEmbedding,
5+
speakerEmbeddingExtractorCreateStream,
6+
speakerEmbeddingExtractorDim,
7+
speakerEmbeddingExtractorIsReady,
8+
speakerEmbeddingManagerAdd,
9+
speakerEmbeddingManagerAddListFlattened,
10+
speakerEmbeddingManagerContains,
11+
speakerEmbeddingManagerGetAllSpeakers,
12+
speakerEmbeddingManagerNumSpeakers,
13+
speakerEmbeddingManagerRemove,
14+
speakerEmbeddingManagerSearch,
15+
speakerEmbeddingManagerVerify
16+
} from 'libsherpa_onnx.so';
17+
import { OnlineStream } from './StreamingAsr';
18+
19+
export class SpeakerEmbeddingExtractorConfig {
20+
public model: string = '';
21+
public numThreads: number = 1;
22+
public debug: boolean = false;
23+
public provider: string = 'cpu';
24+
}
25+
26+
export class SpeakerEmbeddingExtractor {
27+
public config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
28+
public dim: number;
29+
private handle: object;
30+
31+
constructor(config: SpeakerEmbeddingExtractorConfig, mgr?: object) {
32+
this.handle = createSpeakerEmbeddingExtractor(config, mgr);
33+
this.config = config;
34+
this.dim = speakerEmbeddingExtractorDim(this.handle);
35+
}
36+
37+
createStream(): OnlineStream {
38+
return new OnlineStream(
39+
speakerEmbeddingExtractorCreateStream(this.handle));
40+
}
41+
42+
isReady(stream: OnlineStream): boolean {
43+
return speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
44+
}
45+
46+
compute(stream: OnlineStream, enableExternalBuffer: boolean = true): Float32Array {
47+
return speakerEmbeddingExtractorComputeEmbedding(
48+
this.handle, stream.handle, enableExternalBuffer);
49+
}
50+
}
51+
52+
function flatten(arrayList: Float32Array[]): Float32Array {
53+
let n = 0;
54+
for (let i = 0; i < arrayList.length; ++i) {
55+
n += arrayList[i].length;
56+
}
57+
let ans = new Float32Array(n);
58+
59+
let offset = 0;
60+
for (let i = 0; i < arrayList.length; ++i) {
61+
ans.set(arrayList[i], offset);
62+
offset += arrayList[i].length;
63+
}
64+
return ans;
65+
}
66+
67+
interface SpeakerNameWithEmbedding {
68+
name: string;
69+
v: Float32Array;
70+
}
71+
72+
interface SpeakerNameWithEmbeddingList {
73+
name: string;
74+
v: Float32Array[];
75+
}
76+
77+
interface SpeakerNameWithEmbeddingN {
78+
name: string;
79+
vv: Float32Array;
80+
n: number;
81+
}
82+
83+
interface EmbeddingWithThreshold {
84+
v: Float32Array;
85+
threshold: number;
86+
}
87+
88+
interface SpeakerNameEmbeddingThreshold {
89+
name: string;
90+
v: Float32Array;
91+
threshold: number;
92+
}
93+
94+
export class SpeakerEmbeddingManager {
95+
public dim: number;
96+
private handle: object;
97+
98+
constructor(dim: number) {
99+
this.handle = createSpeakerEmbeddingManager(dim);
100+
this.dim = dim;
101+
}
102+
103+
add(speaker: SpeakerNameWithEmbedding): boolean {
104+
return speakerEmbeddingManagerAdd(this.handle, speaker);
105+
}
106+
107+
addMulti(speaker: SpeakerNameWithEmbeddingList): boolean {
108+
const c: SpeakerNameWithEmbeddingN = {
109+
name: speaker.name,
110+
vv: flatten(speaker.v),
111+
n: speaker.v.length,
112+
};
113+
return speakerEmbeddingManagerAddListFlattened(this.handle, c);
114+
}
115+
116+
remove(name: string): boolean {
117+
return speakerEmbeddingManagerRemove(this.handle, name);
118+
}
119+
120+
search(obj: EmbeddingWithThreshold): string {
121+
return speakerEmbeddingManagerSearch(this.handle, obj);
122+
}
123+
124+
verify(obj: SpeakerNameEmbeddingThreshold): boolean {
125+
return speakerEmbeddingManagerVerify(this.handle, obj);
126+
}
127+
128+
contains(name: string): boolean {
129+
return speakerEmbeddingManagerContains(this.handle, name);
130+
}
131+
132+
getNumSpeakers(): number {
133+
return speakerEmbeddingManagerNumSpeakers(this.handle);
134+
}
135+
136+
getAllSpeakerNames(): string[] {
137+
return speakerEmbeddingManagerGetAllSpeakers(this.handle);
138+
}
139+
}

sherpa-onnx/c-api/c-api.cc

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,8 +1328,8 @@ struct SherpaOnnxSpeakerEmbeddingExtractor {
13281328
std::unique_ptr<sherpa_onnx::SpeakerEmbeddingExtractor> impl;
13291329
};
13301330

1331-
const SherpaOnnxSpeakerEmbeddingExtractor *
1332-
SherpaOnnxCreateSpeakerEmbeddingExtractor(
1331+
static sherpa_onnx::SpeakerEmbeddingExtractorConfig
1332+
GetSpeakerEmbeddingExtractorConfig(
13331333
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
13341334
sherpa_onnx::SpeakerEmbeddingExtractorConfig c;
13351335
c.model = SHERPA_ONNX_OR(config->model, "");
@@ -1342,9 +1342,21 @@ SherpaOnnxCreateSpeakerEmbeddingExtractor(
13421342
}
13431343

13441344
if (config->debug) {
1345+
#if __OHOS__
1346+
SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
1347+
#else
13451348
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
1349+
#endif
13461350
}
13471351

1352+
return c;
1353+
}
1354+
1355+
const SherpaOnnxSpeakerEmbeddingExtractor *
1356+
SherpaOnnxCreateSpeakerEmbeddingExtractor(
1357+
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config) {
1358+
auto c = GetSpeakerEmbeddingExtractorConfig(config);
1359+
13481360
if (!c.Validate()) {
13491361
SHERPA_ONNX_LOGE("Errors in config!");
13501362
return nullptr;
@@ -1983,6 +1995,23 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS(
19831995
return p;
19841996
}
19851997

1998+
const SherpaOnnxSpeakerEmbeddingExtractor *
1999+
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
2000+
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
2001+
NativeResourceManager *mgr) {
2002+
if (!mgr) {
2003+
return SherpaOnnxCreateSpeakerEmbeddingExtractor(config);
2004+
}
2005+
2006+
auto c = GetSpeakerEmbeddingExtractorConfig(config);
2007+
2008+
auto p = new SherpaOnnxSpeakerEmbeddingExtractor;
2009+
2010+
p->impl = std::make_unique<sherpa_onnx::SpeakerEmbeddingExtractor>(mgr, c);
2011+
2012+
return p;
2013+
}
2014+
19862015
#if SHERPA_ONNX_ENABLE_TTS == 1
19872016
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
19882017
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {

sherpa-onnx/c-api/c-api.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1572,6 +1572,11 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
15721572

15731573
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
15741574
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
1575+
1576+
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
1577+
SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
1578+
const SherpaOnnxSpeakerEmbeddingExtractorConfig *config,
1579+
NativeResourceManager *mgr);
15751580
#endif
15761581

15771582
#if defined(__GNUC__)

sherpa-onnx/csrc/offline-tts-vits-impl.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
6262
for (const auto &f : files) {
6363
if (config.model.debug) {
6464
#if __OHOS__
65-
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
66-
#else
6765
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
66+
#else
67+
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
6868
#endif
6969
}
7070
std::unique_ptr<fst::FarReader<fst::StdArc>> reader(

sherpa-onnx/csrc/speaker-embedding-extractor-general-impl.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ class SpeakerEmbeddingExtractorGeneralImpl
2222
const SpeakerEmbeddingExtractorConfig &config)
2323
: model_(config) {}
2424

25-
#if __ANDROID_API__ >= 9
25+
template <typename Manager>
2626
SpeakerEmbeddingExtractorGeneralImpl(
27-
AAssetManager *mgr, const SpeakerEmbeddingExtractorConfig &config)
27+
Manager *mgr, const SpeakerEmbeddingExtractorConfig &config)
2828
: model_(mgr, config) {}
29-
#endif
3029

3130
int32_t Dim() const override { return model_.GetMetaData().output_dim; }
3231

@@ -46,9 +45,15 @@ class SpeakerEmbeddingExtractorGeneralImpl
4645
std::vector<float> Compute(OnlineStream *s) const override {
4746
int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames();
4847
if (num_frames <= 0) {
48+
#if __OHOS__
49+
SHERPA_ONNX_LOGE(
50+
"Please make sure IsReady(s) returns true. num_frames: %{public}d",
51+
num_frames);
52+
#else
4953
SHERPA_ONNX_LOGE(
5054
"Please make sure IsReady(s) returns true. num_frames: %d",
5155
num_frames);
56+
#endif
5257
return {};
5358
}
5459

@@ -64,8 +69,13 @@ class SpeakerEmbeddingExtractorGeneralImpl
6469
if (meta_data.feature_normalize_type == "global-mean") {
6570
SubtractGlobalMean(features.data(), num_frames, feat_dim);
6671
} else {
72+
#if __OHOS__
73+
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %{public}s",
74+
meta_data.feature_normalize_type.c_str());
75+
#else
6776
SHERPA_ONNX_LOGE("Unsupported feature_normalize_type: %s",
6877
meta_data.feature_normalize_type.c_str());
78+
#endif
6979
exit(-1);
7080
}
7181
}

0 commit comments

Comments
 (0)