From 7df5aa2e26e5e33c12a6bef31635ab0132ef3e3a Mon Sep 17 00:00:00 2001 From: Recep Aslantas Date: Sun, 18 Apr 2021 13:51:09 +0300 Subject: [PATCH] opitimize mat2 operations with fma --- include/cglm/simd/sse2/mat2.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/cglm/simd/sse2/mat2.h b/include/cglm/simd/sse2/mat2.h index b3b4d97..1f832b0 100644 --- a/include/cglm/simd/sse2/mat2.h +++ b/include/cglm/simd/sse2/mat2.h @@ -26,11 +26,11 @@ glm_mat2_mul_sse2(mat2 m1, mat2 m2, mat2 dest) { dest[1][0] = a * g + c * h; dest[1][1] = b * g + d * h; */ - x0 = _mm_mul_ps(_mm_movelh_ps(x1, x1), glmm_shuff1(x2, 2, 2, 0, 0)); - x1 = _mm_mul_ps(_mm_movehl_ps(x1, x1), glmm_shuff1(x2, 3, 3, 1, 1)); - x1 = _mm_add_ps(x0, x1); + x0 = glmm_fmadd(_mm_movelh_ps(x1, x1), glmm_shuff1(x2, 2, 2, 0, 0), + _mm_mul_ps(_mm_movehl_ps(x1, x1), + glmm_shuff1(x2, 3, 3, 1, 1))); - glmm_store(dest[0], x1); + glmm_store(dest[0], x0); } CGLM_INLINE