|
19 | 19 |
|
20 | 20 | wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx |
21 | 21 |
|
| 22 | +or download ten-vad.onnx, for instance |
| 23 | +
|
| 24 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/ten-vad.onnx |
| 25 | +
|
| 26 | +Please replace --silero-vad-model with --ten-vad-model below to use ten-vad. |
| 27 | +
|
22 | 28 | (1) For paraformer |
23 | 29 |
|
24 | 30 | ./python-api-examples/generate-subtitles.py \ |
@@ -124,8 +130,13 @@ def get_args(): |
124 | 130 | parser.add_argument( |
125 | 131 | "--silero-vad-model", |
126 | 132 | type=str, |
127 | | - required=True, |
128 | | - help="Path to silero_vad.onnx", |
| 133 | + help="Path to silero_vad.onnx.", |
| 134 | + ) |
| 135 | + |
| 136 | + parser.add_argument( |
| 137 | + "--ten-vad-model", |
| 138 | + type=str, |
| 139 | + help="Path to ten-vad.onnx", |
129 | 140 | ) |
130 | 141 |
|
131 | 142 | parser.add_argument( |
@@ -499,7 +510,12 @@ def __str__(self): |
499 | 510 | def main(): |
500 | 511 | args = get_args() |
501 | 512 | assert_file_exists(args.tokens) |
502 | | - assert_file_exists(args.silero_vad_model) |
| 513 | + if args.silero_vad_model: |
| 514 | + assert_file_exists(args.silero_vad_model) |
| 515 | + elif args.ten_vad_model: |
| 516 | + assert_file_exists(args.ten_vad_model) |
| 517 | + else: |
| 518 | + raise ValueError("You need to supply one vad model") |
503 | 519 |
|
504 | 520 | assert args.num_threads > 0, args.num_threads |
505 | 521 |
|
@@ -536,18 +552,34 @@ def main(): |
536 | 552 | stream = recognizer.create_stream() |
537 | 553 |
|
538 | 554 | config = sherpa_onnx.VadModelConfig() |
539 | | - config.silero_vad.model = args.silero_vad_model |
540 | | - config.silero_vad.threshold = 0.5 |
541 | | - config.silero_vad.min_silence_duration = 0.25 # seconds |
542 | | - config.silero_vad.min_speech_duration = 0.25 # seconds |
543 | | - |
544 | | - # If the current segment is larger than this value, then it increases |
545 | | - # the threshold to 0.9 internally. After detecting this segment, |
546 | | - # it resets the threshold to its original value. |
547 | | - config.silero_vad.max_speech_duration = 5 # seconds |
548 | | - config.sample_rate = args.sample_rate |
549 | | - |
550 | | - window_size = config.silero_vad.window_size |
| 555 | + if args.silero_vad_model: |
| 556 | + config.silero_vad.model = args.silero_vad_model |
| 557 | + config.silero_vad.threshold = 0.2 |
| 558 | + config.silero_vad.min_silence_duration = 0.25 # seconds |
| 559 | + config.silero_vad.min_speech_duration = 0.25 # seconds |
| 560 | + |
| 561 | + # If the current segment is larger than this value, then it increases |
| 562 | + # the threshold to 0.9 internally. After detecting this segment, |
| 563 | + # it resets the threshold to its original value. |
| 564 | + config.silero_vad.max_speech_duration = 5 # seconds |
| 565 | + config.sample_rate = args.sample_rate |
| 566 | + |
| 567 | + window_size = config.silero_vad.window_size |
| 568 | + print("use silero-vad") |
| 569 | + else: |
| 570 | + config.ten_vad.model = args.ten_vad_model |
| 571 | + config.ten_vad.threshold = 0.2 |
| 572 | + config.ten_vad.min_silence_duration = 0.25 # seconds |
| 573 | + config.ten_vad.min_speech_duration = 0.25 # seconds |
| 574 | + |
| 575 | + # If the current segment is larger than this value, then it increases |
| 576 | + # the threshold to 0.9 internally. After detecting this segment, |
| 577 | + # it resets the threshold to its original value. |
| 578 | + config.ten_vad.max_speech_duration = 5 # seconds |
| 579 | + config.sample_rate = args.sample_rate |
| 580 | + |
| 581 | + window_size = config.ten_vad.window_size |
| 582 | + print("use ten-vad") |
551 | 583 |
|
552 | 584 | buffer = [] |
553 | 585 | vad = sherpa_onnx.VoiceActivityDetector(config, buffer_size_in_seconds=100) |
|
0 commit comments