@@ -58,7 +58,7 @@ struct AudioFFTConfig : public AudioInfo {
58
58
// / TX_MODE = FFT, RX_MODE = IFFT
59
59
RxTxMode rxtx_mode = TX_MODE;
60
60
// / caller
61
- void * ref = nullptr ;
61
+ void * ref = nullptr ;
62
62
};
63
63
64
64
// / And individual FFT Bin
@@ -213,8 +213,10 @@ class AudioFFTBase : public AudioStream {
213
213
bool begin () override {
214
214
bins = cfg.length / 2 ;
215
215
// define window functions
216
- if (cfg.window_function_fft ==nullptr ) cfg.window_function_fft = cfg.window_function ;
217
- if (cfg.window_function_ifft ==nullptr ) cfg.window_function_ifft = cfg.window_function ;
216
+ if (cfg.window_function_fft == nullptr )
217
+ cfg.window_function_fft = cfg.window_function ;
218
+ if (cfg.window_function_ifft == nullptr )
219
+ cfg.window_function_ifft = cfg.window_function ;
218
220
// define default stride value if not defined
219
221
if (cfg.stride == 0 ) cfg.stride = cfg.length ;
220
222
@@ -229,8 +231,8 @@ class AudioFFTBase : public AudioStream {
229
231
if (cfg.window_function_fft != nullptr ) {
230
232
cfg.window_function_fft ->begin (cfg.length );
231
233
}
232
- if (cfg.window_function_ifft != nullptr
233
- && cfg.window_function_ifft != cfg.window_function_fft ) {
234
+ if (cfg.window_function_ifft != nullptr &&
235
+ cfg.window_function_ifft != cfg.window_function_fft ) {
234
236
cfg.window_function_ifft ->begin (cfg.length );
235
237
}
236
238
@@ -247,7 +249,7 @@ class AudioFFTBase : public AudioStream {
247
249
is_valid_rxtx = true ;
248
250
}
249
251
250
- if (!is_valid_rxtx){
252
+ if (!is_valid_rxtx) {
251
253
LOGE (" Invalid rxtx_mode" );
252
254
return false ;
253
255
}
@@ -267,7 +269,9 @@ class AudioFFTBase : public AudioStream {
267
269
}
268
270
}
269
271
270
- operator bool () override { return p_driver != nullptr && p_driver->isValid (); }
272
+ operator bool () override {
273
+ return p_driver != nullptr && p_driver->isValid ();
274
+ }
271
275
272
276
// / Notify change of audio information
273
277
void setAudioInfo (AudioInfo info) override {
@@ -318,10 +322,11 @@ class AudioFFTBase : public AudioStream {
318
322
if (rfft_data.size () == 0 ) return 0 ;
319
323
320
324
// get data via callback if there is no more data
321
- if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr && rfft_data.available () == 0 ) {
325
+ if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr &&
326
+ rfft_data.available () == 0 ) {
322
327
cfg.callback (*this );
323
328
}
324
-
329
+
325
330
// execute rfft when we consumed all data
326
331
if (has_rfft_data && rfft_data.available () == 0 ) {
327
332
rfft ();
@@ -352,7 +357,6 @@ class AudioFFTBase : public AudioStream {
352
357
// / time before the fft
353
358
unsigned long resultTimeBegin () { return timestamp_begin; }
354
359
355
-
356
360
// / Determines the result values in the max magnitude bin
357
361
AudioFFTResult result () {
358
362
AudioFFTResult ret_value;
@@ -387,6 +391,161 @@ class AudioFFTBase : public AudioStream {
387
391
}
388
392
}
389
393
394
+ // / Convert the FFT result to MEL spectrum
395
+ float *toMEL (int n_bins, float min_freq = 0 .0f , float max_freq = 0 .0f ) {
396
+ // calculate mel bins
397
+ if (n_bins <= 0 ) n_bins = size ();
398
+ if (min_freq <= 0 .0f ) min_freq = frequency (0 );
399
+ if (max_freq <= 0 .0f ) max_freq = frequency (size () - 1 );
400
+ mel_bins.resize (n_bins);
401
+
402
+ // Convert min and max frequencies to MEL scale
403
+ float min_mel = 2595 .0f * log10 (1 .0f + (min_freq / 700 .0f ));
404
+ float max_mel = 2595 .0f * log10 (1 .0f + (max_freq / 700 .0f ));
405
+
406
+ // Create equally spaced points in the MEL scale
407
+ Vector<float > mel_points;
408
+ mel_points.resize (n_bins + 2 ); // +2 for the endpoints
409
+
410
+ float mel_step = (max_mel - min_mel) / (n_bins + 1 );
411
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
412
+ mel_points[i] = min_mel + i * mel_step;
413
+ }
414
+
415
+ // Convert MEL points back to frequency
416
+ Vector<float > freq_points;
417
+ freq_points.resize (n_bins + 2 );
418
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
419
+ freq_points[i] = 700 .0f * (pow (10 .0f , mel_points[i] / 2595 .0f ) - 1 .0f );
420
+ }
421
+
422
+ // Convert frequency points to FFT bin indices
423
+ Vector<int > bin_indices;
424
+ bin_indices.resize (n_bins + 2 );
425
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
426
+ bin_indices[i] = round (freq_points[i] * cfg.length / cfg.sample_rate );
427
+ // Ensure bin index is within valid range
428
+ if (bin_indices[i] >= bins) bin_indices[i] = bins - 1 ;
429
+ if (bin_indices[i] < 0 ) bin_indices[i] = 0 ;
430
+ }
431
+
432
+ // Create and apply triangular filters
433
+ for (int i = 0 ; i < n_bins; i++) {
434
+ float mel_sum = 0 .0f ;
435
+
436
+ int start_bin = bin_indices[i];
437
+ int mid_bin = bin_indices[i + 1 ];
438
+ int end_bin = bin_indices[i + 2 ];
439
+
440
+ // Apply first half of triangle filter (ascending)
441
+ for (int j = start_bin; j < mid_bin; j++) {
442
+ if (j >= bins) break ;
443
+ float weight = (j - start_bin) / float (mid_bin - start_bin);
444
+ mel_sum += magnitude (j) * weight;
445
+ }
446
+
447
+ // Apply second half of triangle filter (descending)
448
+ for (int j = mid_bin; j < end_bin; j++) {
449
+ if (j >= bins) break ;
450
+ float weight = (end_bin - j) / float (end_bin - mid_bin);
451
+ mel_sum += magnitude (j) * weight;
452
+ }
453
+
454
+ mel_bins[i] = mel_sum;
455
+ }
456
+
457
+ return mel_bins.data ();
458
+ }
459
+
460
+ /* *
461
+ * @brief Convert MEL spectrum back to linear frequency spectrum
462
+ *
463
+ * @param values Pointer to MEL spectrum values
464
+ * @param n_bins Number of MEL bins
465
+ * @return bool Success status
466
+ */
467
+ bool fromMEL (float *values, int n_bins, float min_freq = 0 .0f ,
468
+ float max_freq = 0 .0f ) {
469
+ if (n_bins <= 0 || values == nullptr ) return false ;
470
+
471
+ // Use default frequency range if not specified
472
+ if (min_freq <= 0 .0f ) min_freq = frequency (0 );
473
+ if (max_freq <= 0 .0f ) max_freq = frequency (size () - 1 );
474
+
475
+ // Clear the current magnitude array
476
+ for (int i = 0 ; i < bins; i++) {
477
+ FFTBin bin;
478
+ bin.clear ();
479
+ setBin (i, bin);
480
+ }
481
+
482
+ // Convert min and max frequencies to MEL scale
483
+ float min_mel = 2595 .0f * log10 (1 .0f + (min_freq / 700 .0f ));
484
+ float max_mel = 2595 .0f * log10 (1 .0f + (max_freq / 700 .0f ));
485
+
486
+ // Create equally spaced points in the MEL scale
487
+ Vector<float > mel_points;
488
+ mel_points.resize (n_bins + 2 ); // +2 for the endpoints
489
+
490
+ float mel_step = (max_mel - min_mel) / (n_bins + 1 );
491
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
492
+ mel_points[i] = min_mel + i * mel_step;
493
+ }
494
+
495
+ // Convert MEL points back to frequency
496
+ Vector<float > freq_points;
497
+ freq_points.resize (n_bins + 2 );
498
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
499
+ freq_points[i] = 700 .0f * (pow (10 .0f , mel_points[i] / 2595 .0f ) - 1 .0f );
500
+ }
501
+
502
+ // Convert frequency points to FFT bin indices
503
+ Vector<int > bin_indices;
504
+ bin_indices.resize (n_bins + 2 );
505
+ for (int i = 0 ; i < n_bins + 2 ; i++) {
506
+ bin_indices[i] = round (freq_points[i] * cfg.length / cfg.sample_rate );
507
+ // Ensure bin index is within valid range
508
+ if (bin_indices[i] >= bins) bin_indices[i] = bins - 1 ;
509
+ if (bin_indices[i] < 0 ) bin_indices[i] = 0 ;
510
+ }
511
+
512
+ // Distribute MEL energy back to linear frequency bins
513
+ Vector<float > linear_magnitudes;
514
+ linear_magnitudes.resize (bins);
515
+
516
+ for (int i = 0 ; i < n_bins; i++) {
517
+ int start_bin = bin_indices[i];
518
+ int mid_bin = bin_indices[i + 1 ];
519
+ int end_bin = bin_indices[i + 2 ];
520
+
521
+ // Apply first half of triangle (ascending)
522
+ for (int j = start_bin; j < mid_bin; j++) {
523
+ if (j >= bins) break ;
524
+ float weight = (j - start_bin) / float (mid_bin - start_bin);
525
+ linear_magnitudes[j] += values[i] * weight;
526
+ }
527
+
528
+ // Apply second half of triangle (descending)
529
+ for (int j = mid_bin; j < end_bin; j++) {
530
+ if (j >= bins) break ;
531
+ float weight = (end_bin - j) / float (end_bin - mid_bin);
532
+ linear_magnitudes[j] += values[i] * weight;
533
+ }
534
+ }
535
+
536
+ // Set magnitude values and create simple phase (all zeros)
537
+ for (int i = 0 ; i < bins; i++) {
538
+ if (linear_magnitudes[i] > 0 ) {
539
+ FFTBin bin;
540
+ bin.real = linear_magnitudes[i];
541
+ bin.img = 0 .0f ;
542
+ setBin (i, bin);
543
+ }
544
+ }
545
+
546
+ return true ;
547
+ }
548
+
390
549
// / provides access to the FFTDriver which implements the basic FFT
391
550
// / functionality
392
551
FFTDriver *driver () { return p_driver; }
@@ -401,7 +560,7 @@ class AudioFFTBase : public AudioStream {
401
560
}
402
561
403
562
// / Determine the bin number from the frequency
404
- int frequencyToBin (int freq){
563
+ int frequencyToBin (int freq) {
405
564
int max_freq = cfg.sample_rate / 2 ;
406
565
return map (freq, 0 , max_freq, 0 , size ());
407
566
}
@@ -425,7 +584,7 @@ class AudioFFTBase : public AudioStream {
425
584
}
426
585
427
586
// / calculates the phase
428
- float phase (int bin){
587
+ float phase (int bin) {
429
588
FFTBin fft_bin;
430
589
getBin (bin, fft_bin);
431
590
return atan2 (fft_bin.img , fft_bin.real );
@@ -464,18 +623,16 @@ class AudioFFTBase : public AudioStream {
464
623
return rc_first_half && rc_2nd_half;
465
624
}
466
625
// / sets the value of a bin
467
- bool setBin (int pos, FFTBin &bin) {
468
- return setBin (pos, bin.real , bin.img );
469
- }
626
+ bool setBin (int pos, FFTBin &bin) { return setBin (pos, bin.real , bin.img ); }
470
627
// / gets the value of a bin
471
628
bool getBin (int pos, FFTBin &bin) { return p_driver->getBin (pos, bin); }
472
629
473
630
// / clears the fft data
474
- void clearBins (){
475
- FFTBin empty{0 ,0 };
476
- for (int j= 0 ; j< size (); j++){
631
+ void clearBins () {
632
+ FFTBin empty{0 , 0 };
633
+ for (int j = 0 ; j < size (); j++) {
477
634
setBin (j, empty);
478
- }
635
+ }
479
636
}
480
637
481
638
// / Provides the actual configuration
@@ -491,6 +648,7 @@ class AudioFFTBase : public AudioStream {
491
648
FFTInverseOverlapAdder rfft_add{0 };
492
649
Vector<float > l_magnitudes{0 };
493
650
Vector<float > step_data{0 };
651
+ Vector<float > mel_bins{0 };
494
652
SingleBuffer<uint8_t > stride_buffer{0 };
495
653
RingBuffer<uint8_t > rfft_data{0 };
496
654
bool has_rfft_data = false ;
@@ -502,15 +660,16 @@ class AudioFFTBase : public AudioStream {
502
660
T sample;
503
661
for (int j = 0 ; j < count; j += cfg.channels ) {
504
662
sample = dataT[j + cfg.channel_used ];
505
- if (writeStrideBuffer ((uint8_t *)&sample, sizeof (T))){
663
+ if (writeStrideBuffer ((uint8_t *)&sample, sizeof (T))) {
506
664
// process data if buffer is full
507
- T* samples = (T*) stride_buffer.data ();
665
+ T * samples = (T *) stride_buffer.data ();
508
666
int sample_count = stride_buffer.size () / sizeof (T);
509
667
assert (sample_count == cfg.length );
510
- for (int j= 0 ; j< sample_count; j++){
668
+ for (int j = 0 ; j < sample_count; j++) {
511
669
T out_sample = samples[j];
512
670
T windowed_sample = windowedSample (out_sample, j);
513
- float scaled_sample = 1 .0f / NumberConverter::maxValueT<T>() * windowed_sample;
671
+ float scaled_sample =
672
+ 1 .0f / NumberConverter::maxValueT<T>() * windowed_sample;
514
673
p_driver->setValue (j, scaled_sample);
515
674
}
516
675
@@ -520,8 +679,7 @@ class AudioFFTBase : public AudioStream {
520
679
stride_buffer.clearArray (cfg.stride * sizeof (T));
521
680
522
681
// validate available data in stride buffer
523
- if (cfg.stride == cfg.length ) assert (stride_buffer.available ()==0 );
524
-
682
+ if (cfg.stride == cfg.length ) assert (stride_buffer.available () == 0 );
525
683
}
526
684
}
527
685
}
0 commit comments