simd128: enable in mat3

This commit is contained in:
myfreeer
2023-03-06 16:50:37 +08:00
parent a0dd85f3d1
commit fdef58bd1a
3 changed files with 24 additions and 3 deletions

View File

@@ -42,6 +42,10 @@
# include "simd/sse2/mat3.h" # include "simd/sse2/mat3.h"
#endif #endif
#ifdef CGLM_SIMD_WASM
# include "simd/wasm/mat3.h"
#endif
#define GLM_MAT3_IDENTITY_INIT {{1.0f, 0.0f, 0.0f}, \ #define GLM_MAT3_IDENTITY_INIT {{1.0f, 0.0f, 0.0f}, \
{0.0f, 1.0f, 0.0f}, \ {0.0f, 1.0f, 0.0f}, \
{0.0f, 0.0f, 1.0f}} {0.0f, 0.0f, 1.0f}}
@@ -150,6 +154,8 @@ void
glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) { glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat3_mul_sse2(m1, m2, dest); glm_mat3_mul_sse2(m1, m2, dest);
#elif defined(__wasm__) && defined(__wasm_simd128__)
glm_mat3_mul_wasm(m1, m2, dest);
#else #else
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2],
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2], a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2],

View File

@@ -92,7 +92,22 @@ _mm_sqrt_ss(glmm_128 __a)
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__)) static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
_mm_rcp_ps(glmm_128 __a) _mm_rcp_ps(glmm_128 __a)
{ {
return (glmm_128)wasm_f32x4_div((v128_t)wasm_f32x4_splat(1.0f), (v128_t)__a); return (glmm_128)wasm_f32x4_div((glmm_128)wasm_f32x4_splat(1.0f), (glmm_128)__a);
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_storeu_ps(float *__p, glmm_128 __a)
{
struct __unaligned {
glmm_128 __v;
} __attribute__((__packed__, __may_alias__));
((struct __unaligned *)__p)->__v = __a;
}
static __inline__ void __attribute__((__always_inline__, __nodebug__))
_mm_store_ss(float *__p, glmm_128 __a)
{
wasm_v128_store32_lane((void*)__p, (glmm_128)__a, 0);
} }
#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ #define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \

View File

@@ -27,13 +27,13 @@ glm_mat3_mul_wasm(mat3 m1, mat3 m2, mat3 dest) {
x1 = glmm_shuff1(r0, 3, 0, 0, 0); /* b10 b00 b00 b00 */ x1 = glmm_shuff1(r0, 3, 0, 0, 0); /* b10 b00 b00 b00 */
x2 = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(1, 0, 3, 3)); /* a12 a11 a10 a10 */ x2 = _mm_shuffle_ps(l0, l1, _MM_SHUFFLE(1, 0, 3, 3)); /* a12 a11 a10 a10 */
x3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 3, 1)); /* b20 b11 b10 b01 */ x3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 0, 3, 1)); /* b20 b11 b10 b01 */
x0 = _mm_mul_ps(x8, x1); x0 = wasm_f32x4_mul(x8, x1);
x6 = glmm_shuff1(l0, 1, 0, 2, 1); /* a01 a00 a02 a01 */ x6 = glmm_shuff1(l0, 1, 0, 2, 1); /* a01 a00 a02 a01 */
x7 = glmm_shuff1(x3, 3, 3, 1, 1); /* b20 b20 b10 b10 */ x7 = glmm_shuff1(x3, 3, 3, 1, 1); /* b20 b20 b10 b10 */
l2 = wasm_v128_load32_zero(&m1[2][2]); l2 = wasm_v128_load32_zero(&m1[2][2]);
r2 = wasm_v128_load32_zero(&m2[2][2]); r2 = wasm_v128_load32_zero(&m2[2][2]);
x1 = _mm_mul_ps(x6, x7); x1 = wasm_f32x4_mul(x6, x7);
l2 = glmm_shuff1(l2, 0, 0, 1, 0); /* a22 a22 0.f a22 */ l2 = glmm_shuff1(l2, 0, 0, 1, 0); /* a22 a22 0.f a22 */
r2 = glmm_shuff1(r2, 0, 0, 1, 0); /* b22 b22 0.f b22 */ r2 = glmm_shuff1(r2, 0, 0, 1, 0); /* b22 b22 0.f b22 */