From 46d089e7a2731f5706971c8978e93266cfc91b72 Mon Sep 17 00:00:00 2001 From: Stefano D'Angelo Date: Wed, 7 Feb 2024 12:37:40 +0100 Subject: [PATCH] added/moved denormal kill cpu flags --- templates/android/src/jni.cpp | 25 ++++++++++++++++++++ templates/cmd/src/main.c | 14 ++++++++++++ templates/ios/src/native.mm | 10 ++++++++ templates/lv2/src/lv2.c | 39 +++++++++++++++---------------- templates/vst3/src/vst3.c | 43 +++++++++++++++++++---------------- 5 files changed, 93 insertions(+), 38 deletions(-) diff --git a/templates/android/src/jni.cpp b/templates/android/src/jni.cpp index 6f099c6..2679eb0 100644 --- a/templates/android/src/jni.cpp +++ b/templates/android/src/jni.cpp @@ -30,6 +30,11 @@ # include #endif +#if defined(__i386__) || defined(__x86_64__) +#include +#include +#endif + static ma_device device; static plugin instance; static void * mem; @@ -71,6 +76,18 @@ uint8_t midiBuffer[MIDI_BUFFER_SIZE]; static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) { (void)pDevice; +#if defined(__aarch64__) + uint64_t fpcr; + __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); + __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ +#elif defined(__i386__) || defined(__x86_64__) + const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE(); + const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE(); + + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + #if PARAMETERS_N + NUM_MIDI_INPUTS > 0 if (mutex.try_lock()) { # if PARAMETERS_N > 0 @@ -134,6 +151,14 @@ static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, i += n; } + +#if defined(__aarch64__) + __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); +#elif defined(__i386__) || defined(__x86_64__) + _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode); + _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode); +#endif + } extern "C" diff --git a/templates/cmd/src/main.c b/templates/cmd/src/main.c index 8516243..af958a2 100644 --- a/templates/cmd/src/main.c +++ b/templates/cmd/src/main.c @@ -16,6 +16,11 @@ # include #endif +#if defined(__i386__) || defined(__x86_64__) +#include +#include +#endif + plugin instance; void * mem; #if (NUM_NON_OPT_CHANNELS_IN > NUM_CHANNELS_IN) || (NUM_NON_OPT_CHANNELS_OUT > NUM_CHANNELS_OUT) @@ -261,6 +266,15 @@ int main(int argc, char * argv[]) { } #endif +#if defined(__aarch64__) + uint64_t fpcr; + __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); + __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ +#elif defined(__i386__) || defined(__x86_64__) + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + #if PARAMETERS_N > 0 for (size_t i = 0; i < PARAMETERS_N; i++) { if (param_data[i].out) diff --git a/templates/ios/src/native.mm b/templates/ios/src/native.mm index b9646c4..94c859b 100644 --- a/templates/ios/src/native.mm +++ b/templates/ios/src/native.mm @@ -58,6 +58,12 @@ int midiBuffer_i = 0; static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) { (void)pDevice; +#if defined(__aarch64__) + uint64_t fpcr; + __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); + __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ +#endif + #if PARAMETERS_N + NUM_MIDI_INPUTS > 0 if (mutex.try_lock()) { # if PARAMETERS_N > 0 @@ -122,6 +128,10 @@ static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, #endif i += n; } + +#if defined(__aarch64__) + __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); +#endif } #if (NUM_MIDI_INPUTS > 0) diff --git a/templates/lv2/src/lv2.c b/templates/lv2/src/lv2.c index 73b024a..d31f8c2 100644 --- a/templates/lv2/src/lv2.c +++ b/templates/lv2/src/lv2.c @@ -167,6 +167,18 @@ static inline float clampf(float x, float m, float M) { static void run(LV2_Handle instance, uint32_t sample_count) { plugin_instance * i = (plugin_instance *)instance; +#if defined(__aarch64__) + uint64_t fpcr; + __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); + __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ +#elif defined(__i386__) || defined(__x86_64__) + const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE(); + const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE(); + + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + #if DATA_PRODUCT_CONTROL_INPUTS_N > 0 for (uint32_t j = 0; j < DATA_PRODUCT_CONTROL_INPUTS_N; j++) { uint32_t k = param_data[j].index; @@ -190,18 +202,6 @@ static void run(LV2_Handle instance, uint32_t sample_count) { } #endif -#if defined(__aarch64__) - uint64_t fpcr; - __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); - __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ -#elif defined(__i386__) || defined(__x86_64__) - const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE(); - const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE(); - - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#endif - // from https://lv2plug.in/book #if DATA_PRODUCT_MIDI_INPUTS_N > 0 for (size_t j = 0; j < DATA_PRODUCT_MIDI_INPUTS_N; j++) { @@ -219,13 +219,6 @@ static void run(LV2_Handle instance, uint32_t sample_count) { plugin_process(&i->p, i->x, i->y, sample_count); -#if defined(__aarch64__) - __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); -#elif defined(__i386__) || defined(__x86_64__) - _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode); - _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode); -#endif - #if DATA_PRODUCT_CONTROL_OUTPUTS_N > 0 for (uint32_t j = 0; j < DATA_PRODUCT_CONTROL_OUTPUTS_N; j++) { uint32_t k = param_out_index[j]; @@ -235,6 +228,14 @@ static void run(LV2_Handle instance, uint32_t sample_count) { #else (void)plugin_get_parameter; #endif + +#if defined(__aarch64__) + __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); +#elif defined(__i386__) || defined(__x86_64__) + _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode); + _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode); +#endif + } static void cleanup(LV2_Handle instance) { diff --git a/templates/vst3/src/vst3.c b/templates/vst3/src/vst3.c index 9367178..8e67915 100644 --- a/templates/vst3/src/vst3.c +++ b/templates/vst3/src/vst3.c @@ -9,6 +9,11 @@ #include "data.h" #include "plugin.h" +#if defined(__i386__) || defined(__x86_64__) +#include +#include +#endif + // COM in C doc: // https://github.com/rubberduck-vba/Rubberduck/wiki/COM-in-plain-C // https://devblogs.microsoft.com/oldnewthing/20040205-00/?p=40733 @@ -576,6 +581,18 @@ static void processParams(pluginInstance *p, struct Steinberg_Vst_ProcessData *d static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst_ProcessData* data) { TRACE("plugin IAudioProcessor process\n"); +#if defined(__aarch64__) + uint64_t fpcr; + __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); + __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ +#elif defined(__i386__) || defined(__x86_64__) + const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE(); + const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE(); + + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); +#endif + pluginInstance *p = (pluginInstance *)((char *)thisInterface - offsetof(pluginInstance, vtblIAudioProcessor)); processParams(p, data, 1); @@ -631,27 +648,8 @@ static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst float **outputs = NULL; #endif -#if defined(__aarch64__) - uint64_t fpcr; - __asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr)); - __asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ -#elif defined(__i386__) || defined(__x86_64__) - const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE(); - const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE(); - - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); -#endif - plugin_process(&p->p, inputs, outputs, data->numSamples); -#if defined(__aarch64__) - __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); -#elif defined(__i386__) || defined(__x86_64__) - _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode); - _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode); -#endif - processParams(p, data, 0); #if DATA_PRODUCT_PARAMETERS_N > 0 @@ -675,6 +673,13 @@ static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst // TBD: latency + IComponentHandler::restartComponent (kLatencyChanged), see https://steinbergmedia.github.io/vst3_dev_portal/pages/Technical+Documentation/Workflow+Diagrams/Get+Latency+Call+Sequence.html +#if defined(__aarch64__) + __asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr)); +#elif defined(__i386__) || defined(__x86_64__) + _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode); + _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode); +#endif + return Steinberg_kResultOk; }