diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index 64b2dad..405b9d5 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -79,5 +79,25 @@ glmm_norm_inf(float32x4_t a) { return glmm_hmax(glmm_abs(a)); } +static inline +float32x4_t +glmm_fmadd(float32x4_t a, float32x4_t b, float32x4_t c) { +#if defined(__aarch64__) + return vfmaq_f32(a, b, c); +#else + return vmlaq_f32(a, b, c); +#endif +} + +static inline +float32x4_t +glmm_fnmadd(float32x4_t a, float32x4_t b, float32x4_t c) { +#if defined(__aarch64__) + return vfmsq_f32(a, b, c); +#else + return vmlsq_f32(a, b, c); +#endif +} + #endif #endif /* cglm_simd_arm_h */ diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index bbeccb3..2a5716b 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -197,5 +197,25 @@ glmm_store3(float v[3], __m128 vx) { _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2)); } +static inline +__m128 +glmm_fmadd(__m128 a, __m128 b, __m128 c) { +#ifdef __FMA__ + return _mm_fmadd_ps(a, b, c); +#else + return _mm_add_ps(c, _mm_mul_ps(a, b)); +#endif +} + +static inline +__m128 +glmm_fnmadd(__m128 a, __m128 b, __m128 c) { +#ifdef __FMA__ + return _mm_fnmadd_ps(a, b, c); +#else + return _mm_sub_ps(c, _mm_mul_ps(a, b)); +#endif +} + #endif #endif /* cglm_simd_x86_h */