Skip to content

Commit 056c35a

Browse files
committed
AudioFFT: toMEL() and fromMEL()
1 parent 4453928 commit 056c35a

File tree

1 file changed

+183
-25
lines changed

1 file changed

+183
-25
lines changed

src/AudioTools/AudioLibs/AudioFFT.h

Lines changed: 183 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ struct AudioFFTConfig : public AudioInfo {
5858
/// TX_MODE = FFT, RX_MODE = IFFT
5959
RxTxMode rxtx_mode = TX_MODE;
6060
/// caller
61-
void* ref = nullptr;
61+
void *ref = nullptr;
6262
};
6363

6464
/// And individual FFT Bin
@@ -213,8 +213,10 @@ class AudioFFTBase : public AudioStream {
213213
bool begin() override {
214214
bins = cfg.length / 2;
215215
// define window functions
216-
if (cfg.window_function_fft==nullptr) cfg.window_function_fft = cfg.window_function;
217-
if (cfg.window_function_ifft==nullptr) cfg.window_function_ifft = cfg.window_function;
216+
if (cfg.window_function_fft == nullptr)
217+
cfg.window_function_fft = cfg.window_function;
218+
if (cfg.window_function_ifft == nullptr)
219+
cfg.window_function_ifft = cfg.window_function;
218220
// define default stride value if not defined
219221
if (cfg.stride == 0) cfg.stride = cfg.length;
220222

@@ -229,8 +231,8 @@ class AudioFFTBase : public AudioStream {
229231
if (cfg.window_function_fft != nullptr) {
230232
cfg.window_function_fft->begin(cfg.length);
231233
}
232-
if (cfg.window_function_ifft != nullptr
233-
&& cfg.window_function_ifft != cfg.window_function_fft) {
234+
if (cfg.window_function_ifft != nullptr &&
235+
cfg.window_function_ifft != cfg.window_function_fft) {
234236
cfg.window_function_ifft->begin(cfg.length);
235237
}
236238

@@ -247,7 +249,7 @@ class AudioFFTBase : public AudioStream {
247249
is_valid_rxtx = true;
248250
}
249251

250-
if (!is_valid_rxtx){
252+
if (!is_valid_rxtx) {
251253
LOGE("Invalid rxtx_mode");
252254
return false;
253255
}
@@ -267,7 +269,9 @@ class AudioFFTBase : public AudioStream {
267269
}
268270
}
269271

270-
operator bool() override { return p_driver != nullptr && p_driver->isValid(); }
272+
operator bool() override {
273+
return p_driver != nullptr && p_driver->isValid();
274+
}
271275

272276
/// Notify change of audio information
273277
void setAudioInfo(AudioInfo info) override {
@@ -318,10 +322,11 @@ class AudioFFTBase : public AudioStream {
318322
if (rfft_data.size() == 0) return 0;
319323

320324
// get data via callback if there is no more data
321-
if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr && rfft_data.available() == 0) {
325+
if (cfg.rxtx_mode == RX_MODE && cfg.callback != nullptr &&
326+
rfft_data.available() == 0) {
322327
cfg.callback(*this);
323328
}
324-
329+
325330
// execute rfft when we consumed all data
326331
if (has_rfft_data && rfft_data.available() == 0) {
327332
rfft();
@@ -352,7 +357,6 @@ class AudioFFTBase : public AudioStream {
352357
/// time before the fft
353358
unsigned long resultTimeBegin() { return timestamp_begin; }
354359

355-
356360
/// Determines the result values in the max magnitude bin
357361
AudioFFTResult result() {
358362
AudioFFTResult ret_value;
@@ -387,6 +391,161 @@ class AudioFFTBase : public AudioStream {
387391
}
388392
}
389393

394+
/// Convert the FFT result to MEL spectrum
395+
float *toMEL(int n_bins, float min_freq = 0.0f, float max_freq = 0.0f) {
396+
// calculate mel bins
397+
if (n_bins <= 0) n_bins = size();
398+
if (min_freq <= 0.0f) min_freq = frequency(0);
399+
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
400+
mel_bins.resize(n_bins);
401+
402+
// Convert min and max frequencies to MEL scale
403+
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
404+
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
405+
406+
// Create equally spaced points in the MEL scale
407+
Vector<float> mel_points;
408+
mel_points.resize(n_bins + 2); // +2 for the endpoints
409+
410+
float mel_step = (max_mel - min_mel) / (n_bins + 1);
411+
for (int i = 0; i < n_bins + 2; i++) {
412+
mel_points[i] = min_mel + i * mel_step;
413+
}
414+
415+
// Convert MEL points back to frequency
416+
Vector<float> freq_points;
417+
freq_points.resize(n_bins + 2);
418+
for (int i = 0; i < n_bins + 2; i++) {
419+
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
420+
}
421+
422+
// Convert frequency points to FFT bin indices
423+
Vector<int> bin_indices;
424+
bin_indices.resize(n_bins + 2);
425+
for (int i = 0; i < n_bins + 2; i++) {
426+
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
427+
// Ensure bin index is within valid range
428+
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
429+
if (bin_indices[i] < 0) bin_indices[i] = 0;
430+
}
431+
432+
// Create and apply triangular filters
433+
for (int i = 0; i < n_bins; i++) {
434+
float mel_sum = 0.0f;
435+
436+
int start_bin = bin_indices[i];
437+
int mid_bin = bin_indices[i + 1];
438+
int end_bin = bin_indices[i + 2];
439+
440+
// Apply first half of triangle filter (ascending)
441+
for (int j = start_bin; j < mid_bin; j++) {
442+
if (j >= bins) break;
443+
float weight = (j - start_bin) / float(mid_bin - start_bin);
444+
mel_sum += magnitude(j) * weight;
445+
}
446+
447+
// Apply second half of triangle filter (descending)
448+
for (int j = mid_bin; j < end_bin; j++) {
449+
if (j >= bins) break;
450+
float weight = (end_bin - j) / float(end_bin - mid_bin);
451+
mel_sum += magnitude(j) * weight;
452+
}
453+
454+
mel_bins[i] = mel_sum;
455+
}
456+
457+
return mel_bins.data();
458+
}
459+
460+
/**
461+
* @brief Convert MEL spectrum back to linear frequency spectrum
462+
*
463+
* @param values Pointer to MEL spectrum values
464+
* @param n_bins Number of MEL bins
465+
* @return bool Success status
466+
*/
467+
bool fromMEL(float *values, int n_bins, float min_freq = 0.0f,
468+
float max_freq = 0.0f) {
469+
if (n_bins <= 0 || values == nullptr) return false;
470+
471+
// Use default frequency range if not specified
472+
if (min_freq <= 0.0f) min_freq = frequency(0);
473+
if (max_freq <= 0.0f) max_freq = frequency(size() - 1);
474+
475+
// Clear the current magnitude array
476+
for (int i = 0; i < bins; i++) {
477+
FFTBin bin;
478+
bin.clear();
479+
setBin(i, bin);
480+
}
481+
482+
// Convert min and max frequencies to MEL scale
483+
float min_mel = 2595.0f * log10(1.0f + (min_freq / 700.0f));
484+
float max_mel = 2595.0f * log10(1.0f + (max_freq / 700.0f));
485+
486+
// Create equally spaced points in the MEL scale
487+
Vector<float> mel_points;
488+
mel_points.resize(n_bins + 2); // +2 for the endpoints
489+
490+
float mel_step = (max_mel - min_mel) / (n_bins + 1);
491+
for (int i = 0; i < n_bins + 2; i++) {
492+
mel_points[i] = min_mel + i * mel_step;
493+
}
494+
495+
// Convert MEL points back to frequency
496+
Vector<float> freq_points;
497+
freq_points.resize(n_bins + 2);
498+
for (int i = 0; i < n_bins + 2; i++) {
499+
freq_points[i] = 700.0f * (pow(10.0f, mel_points[i] / 2595.0f) - 1.0f);
500+
}
501+
502+
// Convert frequency points to FFT bin indices
503+
Vector<int> bin_indices;
504+
bin_indices.resize(n_bins + 2);
505+
for (int i = 0; i < n_bins + 2; i++) {
506+
bin_indices[i] = round(freq_points[i] * cfg.length / cfg.sample_rate);
507+
// Ensure bin index is within valid range
508+
if (bin_indices[i] >= bins) bin_indices[i] = bins - 1;
509+
if (bin_indices[i] < 0) bin_indices[i] = 0;
510+
}
511+
512+
// Distribute MEL energy back to linear frequency bins
513+
Vector<float> linear_magnitudes;
514+
linear_magnitudes.resize(bins);
515+
516+
for (int i = 0; i < n_bins; i++) {
517+
int start_bin = bin_indices[i];
518+
int mid_bin = bin_indices[i + 1];
519+
int end_bin = bin_indices[i + 2];
520+
521+
// Apply first half of triangle (ascending)
522+
for (int j = start_bin; j < mid_bin; j++) {
523+
if (j >= bins) break;
524+
float weight = (j - start_bin) / float(mid_bin - start_bin);
525+
linear_magnitudes[j] += values[i] * weight;
526+
}
527+
528+
// Apply second half of triangle (descending)
529+
for (int j = mid_bin; j < end_bin; j++) {
530+
if (j >= bins) break;
531+
float weight = (end_bin - j) / float(end_bin - mid_bin);
532+
linear_magnitudes[j] += values[i] * weight;
533+
}
534+
}
535+
536+
// Set magnitude values and create simple phase (all zeros)
537+
for (int i = 0; i < bins; i++) {
538+
if (linear_magnitudes[i] > 0) {
539+
FFTBin bin;
540+
bin.real = linear_magnitudes[i];
541+
bin.img = 0.0f;
542+
setBin(i, bin);
543+
}
544+
}
545+
546+
return true;
547+
}
548+
390549
/// provides access to the FFTDriver which implements the basic FFT
391550
/// functionality
392551
FFTDriver *driver() { return p_driver; }
@@ -401,7 +560,7 @@ class AudioFFTBase : public AudioStream {
401560
}
402561

403562
/// Determine the bin number from the frequency
404-
int frequencyToBin(int freq){
563+
int frequencyToBin(int freq) {
405564
int max_freq = cfg.sample_rate / 2;
406565
return map(freq, 0, max_freq, 0, size());
407566
}
@@ -425,7 +584,7 @@ class AudioFFTBase : public AudioStream {
425584
}
426585

427586
/// calculates the phase
428-
float phase(int bin){
587+
float phase(int bin) {
429588
FFTBin fft_bin;
430589
getBin(bin, fft_bin);
431590
return atan2(fft_bin.img, fft_bin.real);
@@ -464,18 +623,16 @@ class AudioFFTBase : public AudioStream {
464623
return rc_first_half && rc_2nd_half;
465624
}
466625
/// sets the value of a bin
467-
bool setBin(int pos, FFTBin &bin) {
468-
return setBin(pos, bin.real, bin.img);
469-
}
626+
bool setBin(int pos, FFTBin &bin) { return setBin(pos, bin.real, bin.img); }
470627
/// gets the value of a bin
471628
bool getBin(int pos, FFTBin &bin) { return p_driver->getBin(pos, bin); }
472629

473630
/// clears the fft data
474-
void clearBins(){
475-
FFTBin empty{0,0};
476-
for (int j=0; j< size(); j++){
631+
void clearBins() {
632+
FFTBin empty{0, 0};
633+
for (int j = 0; j < size(); j++) {
477634
setBin(j, empty);
478-
}
635+
}
479636
}
480637

481638
/// Provides the actual configuration
@@ -491,6 +648,7 @@ class AudioFFTBase : public AudioStream {
491648
FFTInverseOverlapAdder rfft_add{0};
492649
Vector<float> l_magnitudes{0};
493650
Vector<float> step_data{0};
651+
Vector<float> mel_bins{0};
494652
SingleBuffer<uint8_t> stride_buffer{0};
495653
RingBuffer<uint8_t> rfft_data{0};
496654
bool has_rfft_data = false;
@@ -502,15 +660,16 @@ class AudioFFTBase : public AudioStream {
502660
T sample;
503661
for (int j = 0; j < count; j += cfg.channels) {
504662
sample = dataT[j + cfg.channel_used];
505-
if (writeStrideBuffer((uint8_t *)&sample, sizeof(T))){
663+
if (writeStrideBuffer((uint8_t *)&sample, sizeof(T))) {
506664
// process data if buffer is full
507-
T* samples = (T*) stride_buffer.data();
665+
T *samples = (T *)stride_buffer.data();
508666
int sample_count = stride_buffer.size() / sizeof(T);
509667
assert(sample_count == cfg.length);
510-
for (int j=0; j< sample_count; j++){
668+
for (int j = 0; j < sample_count; j++) {
511669
T out_sample = samples[j];
512670
T windowed_sample = windowedSample(out_sample, j);
513-
float scaled_sample = 1.0f / NumberConverter::maxValueT<T>() * windowed_sample;
671+
float scaled_sample =
672+
1.0f / NumberConverter::maxValueT<T>() * windowed_sample;
514673
p_driver->setValue(j, scaled_sample);
515674
}
516675

@@ -520,8 +679,7 @@ class AudioFFTBase : public AudioStream {
520679
stride_buffer.clearArray(cfg.stride * sizeof(T));
521680

522681
// validate available data in stride buffer
523-
if (cfg.stride == cfg.length) assert(stride_buffer.available()==0);
524-
682+
if (cfg.stride == cfg.length) assert(stride_buffer.available() == 0);
525683
}
526684
}
527685
}

0 commit comments

Comments
 (0)