diff --git a/apps/common-app/src/examples/Piano/Piano.tsx b/apps/common-app/src/examples/Piano/Piano.tsx index e455a080..0313baca 100644 --- a/apps/common-app/src/examples/Piano/Piano.tsx +++ b/apps/common-app/src/examples/Piano/Piano.tsx @@ -30,6 +30,10 @@ const Piano: FC = () => { }); notesRef.current = newNotes as Record; + + return () => { + audioContextRef.current?.close(); + }; }, []); return ( diff --git a/apps/common-app/src/examples/Piano/PianoNote.tsx b/apps/common-app/src/examples/Piano/PianoNote.tsx index 4d795875..0e8b9352 100644 --- a/apps/common-app/src/examples/Piano/PianoNote.tsx +++ b/apps/common-app/src/examples/Piano/PianoNote.tsx @@ -49,9 +49,6 @@ class PianoNote { this.oscillator.stop(tNow + 0.1); - this.gain.disconnect(this.audioContext.destination); - this.oscillator.disconnect(this.gain); - this.oscillator = null; this.gain = null; } diff --git a/apps/common-app/src/examples/SharedUtils/soundEngines/HiHat.ts b/apps/common-app/src/examples/SharedUtils/soundEngines/HiHat.ts index fff4d2fb..34e79efe 100644 --- a/apps/common-app/src/examples/SharedUtils/soundEngines/HiHat.ts +++ b/apps/common-app/src/examples/SharedUtils/soundEngines/HiHat.ts @@ -41,7 +41,7 @@ class HiHat implements SoundEngine { gain.gain.exponentialRampToValueAtTime(this.volume * 0.33, time + 0.03); gain.gain.exponentialRampToValueAtTime(this.volume * 0.0001, time + 0.3); gain.gain.setValueAtTime(0, time + 0.3 + 0.001); - //number of inputs of filter is 1 on android- check it + oscillator.connect(bandpassFilter); bandpassFilter.connect(highpassFilter); highpassFilter.connect(gain); diff --git a/packages/react-native-audio-api/android/CMakeLists.txt b/packages/react-native-audio-api/android/CMakeLists.txt index dcfe8efd..ee853ec9 100644 --- a/packages/react-native-audio-api/android/CMakeLists.txt +++ b/packages/react-native-audio-api/android/CMakeLists.txt @@ -4,6 +4,18 @@ project(react-native-audio-api) set(CMAKE_VERBOSE_MAKEFILE ON) set(CMAKE_CXX_STANDARD 17) +# Detect the operating system +if(APPLE) + set(HAVE_ACCELERATE TRUE) +endif() + +# Detect the processor and SIMD support +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64") + set(HAVE_ARM_NEON_INTRINSICS TRUE) +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64") + set(HAVE_X86_SSE2 TRUE) +endif() + include("${REACT_NATIVE_DIR}/ReactAndroid/cmake-utils/folly-flags.cmake") add_compile_options(${folly_FLAGS}) diff --git a/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.cpp b/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.cpp index 2e53cfef..cab6607e 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.cpp +++ b/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.cpp @@ -11,7 +11,7 @@ AudioBuffer::AudioBuffer(int numberOfChannels, int length, int sampleRate) throw std::invalid_argument("Invalid number of channels"); } - channels_ = new float*[numberOfChannels]; + channels_ = new float *[numberOfChannels]; for (int i = 0; i < numberOfChannels; i++) { channels_[i] = new float[length]; diff --git a/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.h b/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.h index 06d11505..ba2e5514 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.h +++ b/packages/react-native-audio-api/android/src/main/cpp/AudioBuffer/AudioBuffer.h @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include -#include namespace audioapi { diff --git a/packages/react-native-audio-api/android/src/main/cpp/AudioBufferSourceNode/AudioBufferSourceNode.cpp b/packages/react-native-audio-api/android/src/main/cpp/AudioBufferSourceNode/AudioBufferSourceNode.cpp index a4fb14b4..b29121d0 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/AudioBufferSourceNode/AudioBufferSourceNode.cpp +++ b/packages/react-native-audio-api/android/src/main/cpp/AudioBufferSourceNode/AudioBufferSourceNode.cpp @@ -14,11 +14,11 @@ bool AudioBufferSourceNode::getLoop() const { } std::shared_ptr AudioBufferSourceNode::getBuffer() const { - if (!buffer_.has_value()) { - throw std::runtime_error("Buffer is not set"); - } + if (!buffer_.has_value()) { + throw std::runtime_error("Buffer is not set"); + } - return buffer_.value(); + return buffer_.value(); } void AudioBufferSourceNode::setLoop(bool loop) { diff --git a/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.cpp b/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.cpp index 136e5bbd..c1e3bc3a 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.cpp +++ b/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.cpp @@ -25,9 +25,7 @@ bool AudioDestinationNode::processAudio(float *audioData, int32_t numFrames) { for (auto &node : inputNodes_) { if (node->processAudio(mixingBuffer.get(), numFrames)) { - for (int i = 0; i < numSamples; i++) { - audioData[i] += mixingBuffer[i]; - } + VectorMath::add(audioData, mixingBuffer.get(), audioData, numSamples); } } diff --git a/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.h b/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.h index b5025f00..3124e704 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.h +++ b/packages/react-native-audio-api/android/src/main/cpp/AudioDestinationNode/AudioDestinationNode.h @@ -5,6 +5,7 @@ #include #include "AudioNode.h" +#include "VectorMath.h" namespace audioapi { diff --git a/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.cpp b/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.cpp index 472240f6..b02c2b3d 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.cpp +++ b/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.cpp @@ -50,14 +50,9 @@ bool StereoPannerNode::processAudio(float *audioData, int32_t numFrames) { } void StereoPannerNode::normalize(float *audioData, int32_t numFrames) { - auto maxValue = 1.0f; - - for (int i = 0; i < numFrames * channelCount_; i++) { - maxValue = std::max(maxValue, std::abs(audioData[i])); - } - - for (int i = 0; i < numFrames * channelCount_; i++) { - audioData[i] /= maxValue; - } + auto maxValue = std::max( + 1.0f, VectorMath::maximumMagnitude(audioData, numFrames * channelCount_)); + VectorMath::multiplyByScalar( + audioData, 1.0f / maxValue, audioData, numFrames * channelCount_); } } // namespace audioapi diff --git a/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.h b/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.h index d9d42250..f3e8e6d3 100644 --- a/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.h +++ b/packages/react-native-audio-api/android/src/main/cpp/StereoPannerNode/StereoPannerNode.h @@ -5,6 +5,7 @@ #include "AudioNode.h" #include "AudioParam.h" +#include "VectorMath.h" namespace audioapi { diff --git a/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.cpp b/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.cpp new file mode 100644 index 00000000..f03b50c8 --- /dev/null +++ b/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.cpp @@ -0,0 +1,609 @@ +/* + * Copyright (C) 2010, Google Inc. All rights reserved. + * Copyright (C) 2020, Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "VectorMath.h" + +#if defined(HAVE_ACCELERATE) +#include +#endif + +#if defined(HAVE_X86_SSE2) +#include +#endif + +#if defined(HAVE_ARM_NEON_INTRINSICS) +#include +#endif + +#include +#include + +namespace audioapi::VectorMath { + +#if defined(HAVE_ACCELERATE) + +void multiplyByScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess) { + vDSP_vsmul( + inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess); +} + +void addScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess) { + vDSP_vsadd( + inputVector, 1, &scalar, outputVector, 1, numberOfElementsToProcess); +} + +void add( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + vDSP_vadd( + inputVector1, + 1, + inputVector2, + 1, + outputVector, + 1, + numberOfElementsToProcess); +} + +void substract( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + vDSP_vsub( + inputVector1, + 1, + inputVector2, + 1, + outputVector, + 1, + numberOfElementsToProcess); +} + +void multiply( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + vDSP_vmul( + inputVector1, + 1, + inputVector2, + 1, + outputVector, + 1, + numberOfElementsToProcess); +} + +float maximumMagnitude( + const float *inputVector, + size_t numberOfElementsToProcess) { + float maximumValue = 0; + vDSP_maxmgv(inputVector, 1, &maximumValue, numberOfElementsToProcess); + return maximumValue; +} + +#else + +#if defined(HAVE_X86_SSE2) +static inline bool is16ByteAligned(const float *vector) { + return !(reinterpret_cast(vector) & 0x0F); +} +#endif + +void multiplyByScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + +#if defined(HAVE_X86_SSE2) + + // If the inputVector address is not 16-byte aligned, the first several frames + // (at most three) should be processed separately. + while (!is16ByteAligned(inputVector) && n) { + *outputVector = scalar * *inputVector; + inputVector++; + outputVector++; + n--; + } + + // Now the inputVector address is aligned and start to apply SSE. + size_t group = n / 4; + __m128 mScale = _mm_set_ps1(scalar); + __m128 *pSource; + __m128 *pDest; + __m128 dest; + + if (!is16ByteAligned(outputVector)) { + while (group--) { + pSource = reinterpret_cast<__m128 *>(const_cast(inputVector)); + dest = _mm_mul_ps(*pSource, mScale); + _mm_storeu_ps(outputVector, dest); + + inputVector += 4; + outputVector += 4; + } + } else { + while (group--) { + pSource = reinterpret_cast<__m128 *>(const_cast(inputVector)); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_mul_ps(*pSource, mScale); + + inputVector += 4; + outputVector += 4; + } + } + + // Non-SSE handling for remaining frames which is less than 4. + n %= 4; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + + while (outputVector < endP) { + float32x4_t source = vld1q_f32(inputVector); + vst1q_f32(outputVector, vmulq_n_f32(source, scalar)); + + inputVector += 4; + outputVector += 4; + } + n = tailFrames; +#endif + while (n--) { + *outputVector = scalar * *inputVector; + ++inputVector; + ++outputVector; + } +} + +void addScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + +#if defined(HAVE_X86_SSE2) +// If the inputVector address is not 16-byte aligned, the first several frames +// (at most three) should be processed separately. + while (!is16ByteAligned(inputVector) && n) { + *outputVector = *inputVector + scalar; + inputVector++; + outputVector++; + n--; + } + + // Now the inputVector address is aligned and start to apply SSE. + size_t group = n / 4; + __m128 mScalar = _mm_set_ps1(scalar); + __m128 *pSource; + __m128 *pDest; + __m128 dest; + + bool destAligned = is16ByteAligned(outputVector); + if (destAligned) { // all aligned + while (group--) { + pSource = reinterpret_cast<__m128 *>(const_cast(inputVector)); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_add_ps(*pSource, mScalar); + + inputVector += 4; + outputVector += 4; + } + } else { + while (group--) { + pSource = reinterpret_cast<__m128 *>(const_cast(inputVector)); + dest = _mm_add_ps(*pSource, mScalar); + _mm_storeu_ps(outputVector, dest); + + inputVector += 4; + outputVector += 4; + } + } + + // Non-SSE handling for remaining frames which is less than 4. + n %= 4; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + float32x4_t scalarVector = vdupq_n_f32(scalar); + + while (outputVector < endP) { + float32x4_t source = vld1q_f32(inputVector); + vst1q_f32(outputVector, vaddq_f32(source, scalarVector)); + + inputVector += 4; + outputVector += 4; + } + n = tailFrames; +#endif + while (n--) { + *outputVector = *inputVector + scalar; + ++inputVector; + ++outputVector; + } +} + +void add( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + +#if defined(HAVE_X86_SSE2) + // If the inputVector address is not 16-byte aligned, the first several frames + // (at most three) should be processed separately. + while (!is16ByteAligned(inputVector1) && n) { + *outputVector = *inputVector1 + *inputVector2; + inputVector1++; + inputVector2++; + outputVector++; + n--; + } + + // Now the inputVector1 address is aligned and start to apply SSE. + size_t group = n / 4; + __m128 *pSource1; + __m128 *pSource2; + __m128 *pDest; + __m128 source2; + __m128 dest; + + bool source2Aligned = is16ByteAligned(inputVector2); + bool destAligned = is16ByteAligned(outputVector); + + if (source2Aligned && destAligned) { // all aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + pSource2 = reinterpret_cast<__m128 *>(const_cast(inputVector2)); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_add_ps(*pSource1, *pSource2); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + + } else if (source2Aligned && !destAligned) { // source2 aligned but dest not + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + pSource2 = reinterpret_cast<__m128 *>(const_cast(inputVector2)); + dest = _mm_add_ps(*pSource1, *pSource2); + _mm_storeu_ps(outputVector, dest); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + + } else if (!source2Aligned && destAligned) { // source2 not aligned but dest + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + source2 = _mm_loadu_ps(inputVector2); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_add_ps(*pSource1, source2); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } else if (!source2Aligned && !destAligned) { // both source2 and dest not + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + source2 = _mm_loadu_ps(inputVector2); + dest = _mm_add_ps(*pSource1, source2); + _mm_storeu_ps(outputVector, dest); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } + + // Non-SSE handling for remaining frames which is less than 4. + n %= 4; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + + while (outputVector < endP) { + float32x4_t source1 = vld1q_f32(inputVector1); + float32x4_t source2 = vld1q_f32(inputVector2); + vst1q_f32(outputVector, vaddq_f32(source1, source2)); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + n = tailFrames; +#endif + while (n--) { + *outputVector = *inputVector1 + *inputVector2; + ++inputVector1; + ++inputVector2; + ++outputVector; + } +} + +void substract( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + +#if defined(HAVE_X86_SSE2) + // If the inputVector address is not 16-byte aligned, the first several frames + // (at most three) should be processed separately. + while (!is16ByteAligned(inputVector1) && n) { + *outputVector = *inputVector1 - *inputVector2; + inputVector1++; + inputVector2++; + outputVector++; + n--; + } + + // Now the inputVector1 address is aligned and start to apply SSE. + size_t group = n / 4; + __m128 *pSource1; + __m128 *pSource2; + __m128 *pDest; + __m128 source2; + __m128 dest; + + bool source2Aligned = is16ByteAligned(inputVector2); + bool destAligned = is16ByteAligned(outputVector); + + if (source2Aligned && destAligned) { // all aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + pSource2 = reinterpret_cast<__m128 *>(const_cast(inputVector2)); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_sub_ps(*pSource1, *pSource2); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } else if (source2Aligned && !destAligned) { // source2 aligned but dest not + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + pSource2 = reinterpret_cast<__m128 *>(const_cast(inputVector2)); + dest = _mm_sub_ps(*pSource1, *pSource2); + _mm_storeu_ps(outputVector, dest); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } else if (!source2Aligned && destAligned) { // source2 not aligned but dest + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + source2 = _mm_loadu_ps(inputVector2); + pDest = reinterpret_cast<__m128 *>(outputVector); + *pDest = _mm_sub_ps(*pSource1, source2); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } else if (!source2Aligned && !destAligned) { // both source2 and dest not + // aligned + while (group--) { + pSource1 = reinterpret_cast<__m128 *>(const_cast(inputVector1)); + source2 = _mm_loadu_ps(inputVector2); + dest = _mm_sub_ps(*pSource1, source2); + _mm_storeu_ps(outputVector, dest); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + } + + // Non-SSE handling for remaining frames which is less than 4. + n %= 4; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + + while (outputVector < endP) { + float32x4_t source1 = vld1q_f32(inputVector1); + float32x4_t source2 = vld1q_f32(inputVector2); + vst1q_f32(outputVector, vsubq_f32(source1, source2)); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + n = tailFrames; +#endif + while (n--) { + *outputVector = *inputVector1 - *inputVector2; + ++inputVector1; + ++inputVector2; + ++outputVector; + } +} + +void multiply( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + +#if defined(HAVE_X86_SSE2) +// If the inputVector1 address is not 16-byte aligned, the first several frames +// (at most three) should be processed separately. + while (!is16ByteAligned(inputVector1) && n) { + *outputVector = *inputVector1 * *inputVector2; + inputVector1++; + inputVector2++; + outputVector++; + n--; + } + + // Now the inputVector1 address aligned and start to apply SSE. + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + __m128 pSource1; + __m128 pSource2; + __m128 dest; + + bool source2Aligned = is16ByteAligned(inputVector2); + bool destAligned = is16ByteAligned(outputVector); + +#define SSE2_MULT(loadInstr, storeInstr) \ + while (outputVector < endP) { \ + pSource1 = _mm_load_ps(inputVector1); \ + pSource2 = _mm_##loadInstr##_ps(inputVector2); \ + dest = _mm_mul_ps(pSource1, pSource2); \ + _mm_##storeInstr##_ps(outputVector, dest); \ + inputVector1 += 4; \ + inputVector2 += 4; \ + outputVector += 4; \ + } + + if (source2Aligned && destAligned) // Both aligned. + SSE2_MULT(load, store) + else if (source2Aligned && !destAligned) // Source2 is aligned but dest not. + SSE2_MULT(load, storeu) + else if (!source2Aligned && destAligned) // Dest is aligned but source2 not. + SSE2_MULT(loadu, store) + else // Neither aligned. + SSE2_MULT(loadu, storeu) + + n = tailFrames; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = outputVector + n - tailFrames; + + while (outputVector < endP) { + float32x4_t source1 = vld1q_f32(inputVector1); + float32x4_t source2 = vld1q_f32(inputVector2); + vst1q_f32(outputVector, vmulq_f32(source1, source2)); + + inputVector1 += 4; + inputVector2 += 4; + outputVector += 4; + } + n = tailFrames; +#endif + while (n--) { + *outputVector = *inputVector1 * *inputVector2; + ++inputVector1; + ++inputVector2; + ++outputVector; + } +} + +float maximumMagnitude( + const float *inputVector, + size_t numberOfElementsToProcess) { + size_t n = numberOfElementsToProcess; + float max = 0; + +#if defined(HAVE_X86_SSE2) + // If the inputVector address is not 16-byte aligned, the first several frames + // (at most three) should be processed separately. + while (!is16ByteAligned(inputVector) && n) { + max = std::max(max, std::abs(*inputVector)); + inputVector++; + n--; + } + + // Now the inputVector is aligned, use SSE. + size_t tailFrames = n % 4; + const float *endP = inputVector + n - tailFrames; + __m128 source; + __m128 mMax = _mm_setzero_ps(); + int mask = 0x7FFFFFFF; + __m128 mMask = _mm_set1_ps(*reinterpret_cast(&mask)); + + while (inputVector < endP) { + source = _mm_load_ps(inputVector); + // Calculate the absolute value by anding source with mask, the sign bit is + // set to 0. + source = _mm_and_ps(source, mMask); + mMax = _mm_max_ps(mMax, source); + inputVector += 4; + } + + // Get max from the SSE results. + const float *groupMaxP = reinterpret_cast(&mMax); + max = std::max(max, groupMaxP[0]); + max = std::max(max, groupMaxP[1]); + max = std::max(max, groupMaxP[2]); + max = std::max(max, groupMaxP[3]); + + n = tailFrames; +#elif defined(HAVE_ARM_NEON_INTRINSICS) + size_t tailFrames = n % 4; + const float *endP = inputVector + n - tailFrames; + + float32x4_t fourMax = vdupq_n_f32(0); + while (inputVector < endP) { + float32x4_t source = vld1q_f32(inputVector); + fourMax = vmaxq_f32(fourMax, vabsq_f32(source)); + inputVector += 4; + } + float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourMax)); + + float groupMax[2]; + vst1_f32(groupMax, twoMax); + max = std::max(groupMax[0], groupMax[1]); + + n = tailFrames; +#endif + + while (n--) { + max = std::max(max, std::abs(*inputVector)); + ++inputVector; + } + + return max; +} + +#endif +} // namespace audioapi::VectorMath diff --git a/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.h b/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.h new file mode 100644 index 00000000..a7507e0c --- /dev/null +++ b/packages/react-native-audio-api/android/src/main/cpp/utils/VectorMath.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2010, Google Inc. All rights reserved. + * Copyright (C) 2020, Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +// Defines the interface for several vector math functions whose implementation +// will ideally be optimized. + +#include + +namespace audioapi::VectorMath { + +void multiplyByScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess); +void addScalar( + const float *inputVector, + float scalar, + float *outputVector, + size_t numberOfElementsToProcess); +void add( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess); +void substract( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess); +void multiply( + const float *inputVector1, + const float *inputVector2, + float *outputVector, + size_t numberOfElementsToProcess); + +// Finds the maximum magnitude of a float vector. +float maximumMagnitude( + const float *inputVector, + size_t numberOfElementsToProcess); +} // namespace audioapi::VectorMath