added/moved denormal kill cpu flags

This commit is contained in:
Stefano D'Angelo 2024-02-07 12:37:40 +01:00
parent 2f008b2488
commit 46d089e7a2
5 changed files with 93 additions and 38 deletions

View File

@ -30,6 +30,11 @@
# include <amidi/AMidi.h>
#endif
#if defined(__i386__) || defined(__x86_64__)
#include <xmmintrin.h>
#include <pmmintrin.h>
#endif
static ma_device device;
static plugin instance;
static void * mem;
@ -71,6 +76,18 @@ uint8_t midiBuffer[MIDI_BUFFER_SIZE];
static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) {
(void)pDevice;
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE();
const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
#if PARAMETERS_N + NUM_MIDI_INPUTS > 0
if (mutex.try_lock()) {
# if PARAMETERS_N > 0
@ -134,6 +151,14 @@ static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
i += n;
}
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode);
#endif
}
extern "C"

View File

@ -16,6 +16,11 @@
# include <midi-parser.h>
#endif
#if defined(__i386__) || defined(__x86_64__)
#include <xmmintrin.h>
#include <pmmintrin.h>
#endif
plugin instance;
void * mem;
#if (NUM_NON_OPT_CHANNELS_IN > NUM_CHANNELS_IN) || (NUM_NON_OPT_CHANNELS_OUT > NUM_CHANNELS_OUT)
@ -261,6 +266,15 @@ int main(int argc, char * argv[]) {
}
#endif
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
#if PARAMETERS_N > 0
for (size_t i = 0; i < PARAMETERS_N; i++) {
if (param_data[i].out)

View File

@ -58,6 +58,12 @@ int midiBuffer_i = 0;
static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput, ma_uint32 frameCount) {
(void)pDevice;
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#endif
#if PARAMETERS_N + NUM_MIDI_INPUTS > 0
if (mutex.try_lock()) {
# if PARAMETERS_N > 0
@ -122,6 +128,10 @@ static void data_callback(ma_device* pDevice, void* pOutput, const void* pInput,
#endif
i += n;
}
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#endif
}
#if (NUM_MIDI_INPUTS > 0)

View File

@ -167,6 +167,18 @@ static inline float clampf(float x, float m, float M) {
static void run(LV2_Handle instance, uint32_t sample_count) {
plugin_instance * i = (plugin_instance *)instance;
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE();
const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
#if DATA_PRODUCT_CONTROL_INPUTS_N > 0
for (uint32_t j = 0; j < DATA_PRODUCT_CONTROL_INPUTS_N; j++) {
uint32_t k = param_data[j].index;
@ -190,18 +202,6 @@ static void run(LV2_Handle instance, uint32_t sample_count) {
}
#endif
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE();
const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
// from https://lv2plug.in/book
#if DATA_PRODUCT_MIDI_INPUTS_N > 0
for (size_t j = 0; j < DATA_PRODUCT_MIDI_INPUTS_N; j++) {
@ -219,13 +219,6 @@ static void run(LV2_Handle instance, uint32_t sample_count) {
plugin_process(&i->p, i->x, i->y, sample_count);
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode);
#endif
#if DATA_PRODUCT_CONTROL_OUTPUTS_N > 0
for (uint32_t j = 0; j < DATA_PRODUCT_CONTROL_OUTPUTS_N; j++) {
uint32_t k = param_out_index[j];
@ -235,6 +228,14 @@ static void run(LV2_Handle instance, uint32_t sample_count) {
#else
(void)plugin_get_parameter;
#endif
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode);
#endif
}
static void cleanup(LV2_Handle instance) {

View File

@ -9,6 +9,11 @@
#include "data.h"
#include "plugin.h"
#if defined(__i386__) || defined(__x86_64__)
#include <xmmintrin.h>
#include <pmmintrin.h>
#endif
// COM in C doc:
// https://github.com/rubberduck-vba/Rubberduck/wiki/COM-in-plain-C
// https://devblogs.microsoft.com/oldnewthing/20040205-00/?p=40733
@ -576,6 +581,18 @@ static void processParams(pluginInstance *p, struct Steinberg_Vst_ProcessData *d
static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst_ProcessData* data) {
TRACE("plugin IAudioProcessor process\n");
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE();
const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
pluginInstance *p = (pluginInstance *)((char *)thisInterface - offsetof(pluginInstance, vtblIAudioProcessor));
processParams(p, data, 1);
@ -631,27 +648,8 @@ static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst
float **outputs = NULL;
#endif
#if defined(__aarch64__)
uint64_t fpcr;
__asm__ __volatile__ ("mrs %0, fpcr" : "=r"(fpcr));
__asm__ __volatile__ ("msr fpcr, %0" :: "r"(fpcr | 0x1000000)); // enable FZ
#elif defined(__i386__) || defined(__x86_64__)
const unsigned int flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE();
const unsigned int denormals_zero_mode = _MM_GET_DENORMALS_ZERO_MODE();
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
#endif
plugin_process(&p->p, inputs, outputs, data->numSamples);
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode);
#endif
processParams(p, data, 0);
#if DATA_PRODUCT_PARAMETERS_N > 0
@ -675,6 +673,13 @@ static Steinberg_tresult pluginProcess(void* thisInterface, struct Steinberg_Vst
// TBD: latency + IComponentHandler::restartComponent (kLatencyChanged), see https://steinbergmedia.github.io/vst3_dev_portal/pages/Technical+Documentation/Workflow+Diagrams/Get+Latency+Call+Sequence.html
#if defined(__aarch64__)
__asm__ __volatile__ ("msr fpcr, %0" : : "r"(fpcr));
#elif defined(__i386__) || defined(__x86_64__)
_MM_SET_FLUSH_ZERO_MODE(flush_zero_mode);
_MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode);
#endif
return Steinberg_kResultOk;
}