diff --git a/include/cglm/simd/x86.h b/include/cglm/simd/x86.h index 657d9ba..b80f335 100644 --- a/include/cglm/simd/x86.h +++ b/include/cglm/simd/x86.h @@ -20,14 +20,6 @@ #define glmm_128 __m128 -#ifdef __AVX__ -# define glmm_set1(x) _mm_broadcast_ss(&x) -# define glmm_set1_ptr(x) _mm_broadcast_ss(x) -#else -# define glmm_set1(x) _mm_set1_ps(x) -# define glmm_set1_ptr(x) _mm_set1_ps(*x) -#endif - #if defined(CGLM_USE_INT_DOMAIN) && defined(__SSE2__) # define glmm_shuff1(xmm, z, y, x, w) \ _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \ @@ -39,10 +31,23 @@ #define glmm_splat(x, lane) glmm_shuff1(x, lane, lane, lane, lane) -#define glmm_splat_x(x) glmm_splat(x, 0) -#define glmm_splat_y(x) glmm_splat(x, 1) -#define glmm_splat_z(x) glmm_splat(x, 2) -#define glmm_splat_w(x) glmm_splat(x, 3) +#ifdef __AVX__ +# define glmm_set1(x) _mm_broadcast_ss(&x) +# define glmm_set1_ptr(x) _mm_broadcast_ss(x) + +# define glmm_splat_x(x) _mm_broadcastss_ps(x) +# define glmm_splat_y(x) _mm_permute_ps(x, _MM_SHUFFLE(1, 1, 1, 1)) +# define glmm_splat_z(x) _mm_permute_ps(x, _MM_SHUFFLE(2, 2, 2, 2)) +# define glmm_splat_w(x) _mm_permute_ps(x, _MM_SHUFFLE(3, 3, 3, 3)) +#else +# define glmm_set1(x) _mm_set1_ps(x) +# define glmm_set1_ptr(x) _mm_set1_ps(*x) + +# define glmm_splat_x(x) glmm_splat(x, 0) +# define glmm_splat_y(x) glmm_splat(x, 1) +# define glmm_splat_z(x) glmm_splat(x, 2) +# define glmm_splat_w(x) glmm_splat(x, 3) +#endif /* glmm_shuff1x() is DEPRECATED!, use glmm_splat() */ #define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x)