mirror of
https://github.com/recp/cglm.git
synced 2026-02-17 03:39:05 +00:00
simd128: handle both sse2 and simd128 enabled by Emscripten
https://github.com/recp/cglm/pull/286#issuecomment-1492985403
This commit is contained in:
@@ -53,12 +53,12 @@
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mul(mat4 m1, mat4 m2, mat4 dest) {
|
glm_mul(mat4 m1, mat4 m2, mat4 dest) {
|
||||||
#ifdef __AVX__
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
glm_mul_wasm(m1, m2, dest);
|
||||||
|
#elif defined(__AVX__)
|
||||||
glm_mul_avx(m1, m2, dest);
|
glm_mul_avx(m1, m2, dest);
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glm_mul_sse2(m1, m2, dest);
|
glm_mul_sse2(m1, m2, dest);
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mul_wasm(m1, m2, dest);
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mul_neon(m1, m2, dest);
|
glm_mul_neon(m1, m2, dest);
|
||||||
#else
|
#else
|
||||||
@@ -113,10 +113,10 @@ glm_mul(mat4 m1, mat4 m2, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
|
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mul_rot_sse2(m1, m2, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mul_rot_wasm(m1, m2, dest);
|
glm_mul_rot_wasm(m1, m2, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mul_rot_sse2(m1, m2, dest);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mul_rot_neon(m1, m2, dest);
|
glm_mul_rot_neon(m1, m2, dest);
|
||||||
#else
|
#else
|
||||||
@@ -164,10 +164,10 @@ glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_inv_tr(mat4 mat) {
|
glm_inv_tr(mat4 mat) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_inv_tr_sse2(mat);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_inv_tr_wasm(mat);
|
glm_inv_tr_wasm(mat);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_inv_tr_sse2(mat);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_inv_tr_neon(mat);
|
glm_inv_tr_neon(mat);
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -136,10 +136,10 @@ glm_mat2_zero(mat2 mat) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
|
glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat2_mul_sse2(m1, m2, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat2_mul_wasm(m1, m2, dest);
|
glm_mat2_mul_wasm(m1, m2, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat2_mul_sse2(m1, m2, dest);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat2_mul_neon(m1, m2, dest);
|
glm_mat2_mul_neon(m1, m2, dest);
|
||||||
#else
|
#else
|
||||||
@@ -166,10 +166,10 @@ glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat2_transpose_to(mat2 m, mat2 dest) {
|
glm_mat2_transpose_to(mat2 m, mat2 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat2_transp_sse2(m, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat2_transp_wasm(m, dest);
|
glm_mat2_transp_wasm(m, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat2_transp_sse2(m, dest);
|
||||||
#else
|
#else
|
||||||
dest[0][0] = m[0][0];
|
dest[0][0] = m[0][0];
|
||||||
dest[0][1] = m[1][0];
|
dest[0][1] = m[1][0];
|
||||||
@@ -230,10 +230,11 @@ glm_mat2_trace(mat2 m) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat2_scale(mat2 m, float s) {
|
glm_mat2_scale(mat2 m, float s) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
glmm_store(m[0], wasm_f32x4_mul(wasm_v128_load(m[0]),
|
||||||
|
wasm_f32x4_splat(s)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(m[0], _mm_mul_ps(_mm_loadu_ps(m[0]), _mm_set1_ps(s)));
|
glmm_store(m[0], _mm_mul_ps(_mm_loadu_ps(m[0]), _mm_set1_ps(s)));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(m[0], wasm_f32x4_mul(wasm_v128_load(m[0]), wasm_f32x4_splat(s)));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), vdupq_n_f32(s)));
|
vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), vdupq_n_f32(s)));
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -152,10 +152,10 @@ glm_mat3_zero(mat3 mat) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) {
|
glm_mat3_mul(mat3 m1, mat3 m2, mat3 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat3_mul_sse2(m1, m2, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat3_mul_wasm(m1, m2, dest);
|
glm_mat3_mul_wasm(m1, m2, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat3_mul_sse2(m1, m2, dest);
|
||||||
#else
|
#else
|
||||||
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2],
|
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2],
|
||||||
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2],
|
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2],
|
||||||
|
|||||||
@@ -125,15 +125,15 @@ glm_mat4_ucopy(mat4 mat, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_copy(mat4 mat, mat4 dest) {
|
glm_mat4_copy(mat4 mat, mat4 dest) {
|
||||||
#ifdef __AVX__
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store256(dest[0], glmm_load256(mat[0]));
|
|
||||||
glmm_store256(dest[2], glmm_load256(mat[2]));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
|
||||||
glmm_store(dest[0], glmm_load(mat[0]));
|
glmm_store(dest[0], glmm_load(mat[0]));
|
||||||
glmm_store(dest[1], glmm_load(mat[1]));
|
glmm_store(dest[1], glmm_load(mat[1]));
|
||||||
glmm_store(dest[2], glmm_load(mat[2]));
|
glmm_store(dest[2], glmm_load(mat[2]));
|
||||||
glmm_store(dest[3], glmm_load(mat[3]));
|
glmm_store(dest[3], glmm_load(mat[3]));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
#elif defined(__AVX__)
|
||||||
|
glmm_store256(dest[0], glmm_load256(mat[0]));
|
||||||
|
glmm_store256(dest[2], glmm_load256(mat[2]));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest[0], glmm_load(mat[0]));
|
glmm_store(dest[0], glmm_load(mat[0]));
|
||||||
glmm_store(dest[1], glmm_load(mat[1]));
|
glmm_store(dest[1], glmm_load(mat[1]));
|
||||||
glmm_store(dest[2], glmm_load(mat[2]));
|
glmm_store(dest[2], glmm_load(mat[2]));
|
||||||
@@ -196,7 +196,14 @@ glm_mat4_identity_array(mat4 * __restrict mat, size_t count) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_zero(mat4 mat) {
|
glm_mat4_zero(mat4 mat) {
|
||||||
#ifdef __AVX__
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
glmm_128 x0;
|
||||||
|
x0 = wasm_f32x4_const(0.f, 0.f, 0.f, 0.f);
|
||||||
|
glmm_store(mat[0], x0);
|
||||||
|
glmm_store(mat[1], x0);
|
||||||
|
glmm_store(mat[2], x0);
|
||||||
|
glmm_store(mat[3], x0);
|
||||||
|
#elif defined(__AVX__)
|
||||||
__m256 y0;
|
__m256 y0;
|
||||||
y0 = _mm256_setzero_ps();
|
y0 = _mm256_setzero_ps();
|
||||||
glmm_store256(mat[0], y0);
|
glmm_store256(mat[0], y0);
|
||||||
@@ -208,13 +215,6 @@ glm_mat4_zero(mat4 mat) {
|
|||||||
glmm_store(mat[1], x0);
|
glmm_store(mat[1], x0);
|
||||||
glmm_store(mat[2], x0);
|
glmm_store(mat[2], x0);
|
||||||
glmm_store(mat[3], x0);
|
glmm_store(mat[3], x0);
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_128 x0;
|
|
||||||
x0 = wasm_f32x4_const(0.f, 0.f, 0.f, 0.f);
|
|
||||||
glmm_store(mat[0], x0);
|
|
||||||
glmm_store(mat[1], x0);
|
|
||||||
glmm_store(mat[2], x0);
|
|
||||||
glmm_store(mat[3], x0);
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glmm_128 x0;
|
glmm_128 x0;
|
||||||
x0 = vdupq_n_f32(0.0f);
|
x0 = vdupq_n_f32(0.0f);
|
||||||
@@ -313,12 +313,12 @@ glm_mat4_ins3(mat3 mat, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
|
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
|
||||||
#ifdef __AVX__
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
glm_mat4_mul_wasm(m1, m2, dest);
|
||||||
|
#elif defined(__AVX__)
|
||||||
glm_mat4_mul_avx(m1, m2, dest);
|
glm_mat4_mul_avx(m1, m2, dest);
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glm_mat4_mul_sse2(m1, m2, dest);
|
glm_mat4_mul_sse2(m1, m2, dest);
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_mul_wasm(m1, m2, dest);
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat4_mul_neon(m1, m2, dest);
|
glm_mat4_mul_neon(m1, m2, dest);
|
||||||
#else
|
#else
|
||||||
@@ -395,10 +395,10 @@ glm_mat4_mulN(mat4 * __restrict matrices[], uint32_t len, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
|
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat4_mulv_sse2(m, v, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_mulv_wasm(m, v, dest);
|
glm_mat4_mulv_wasm(m, v, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat4_mulv_sse2(m, v, dest);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat4_mulv_neon(m, v, dest);
|
glm_mat4_mulv_neon(m, v, dest);
|
||||||
#else
|
#else
|
||||||
@@ -517,10 +517,10 @@ glm_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_transpose_to(mat4 m, mat4 dest) {
|
glm_mat4_transpose_to(mat4 m, mat4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat4_transp_sse2(m, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_transp_wasm(m, dest);
|
glm_mat4_transp_wasm(m, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat4_transp_sse2(m, dest);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat4_transp_neon(m, dest);
|
glm_mat4_transp_neon(m, dest);
|
||||||
#else
|
#else
|
||||||
@@ -543,10 +543,10 @@ glm_mat4_transpose_to(mat4 m, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_transpose(mat4 m) {
|
glm_mat4_transpose(mat4 m) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat4_transp_sse2(m, m);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_transp_wasm(m, m);
|
glm_mat4_transp_wasm(m, m);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat4_transp_sse2(m, m);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat4_transp_neon(m, m);
|
glm_mat4_transp_neon(m, m);
|
||||||
#else
|
#else
|
||||||
@@ -584,12 +584,12 @@ glm_mat4_scale_p(mat4 m, float s) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_scale(mat4 m, float s) {
|
glm_mat4_scale(mat4 m, float s) {
|
||||||
#ifdef __AVX__
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
|
glm_mat4_scale_wasm(m, s);
|
||||||
|
#elif defined(__AVX__)
|
||||||
glm_mat4_scale_avx(m, s);
|
glm_mat4_scale_avx(m, s);
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glm_mat4_scale_sse2(m, s);
|
glm_mat4_scale_sse2(m, s);
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_scale_wasm(m, s);
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_mat4_scale_neon(m, s);
|
glm_mat4_scale_neon(m, s);
|
||||||
#else
|
#else
|
||||||
@@ -607,10 +607,10 @@ glm_mat4_scale(mat4 m, float s) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
float
|
float
|
||||||
glm_mat4_det(mat4 mat) {
|
glm_mat4_det(mat4 mat) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
return glm_mat4_det_sse2(mat);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
return glm_mat4_det_wasm(mat);
|
return glm_mat4_det_wasm(mat);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
return glm_mat4_det_sse2(mat);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
return glm_mat4_det_neon(mat);
|
return glm_mat4_det_neon(mat);
|
||||||
#else
|
#else
|
||||||
@@ -707,10 +707,10 @@ glm_mat4_inv(mat4 mat, mat4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_mat4_inv_fast(mat4 mat, mat4 dest) {
|
glm_mat4_inv_fast(mat4 mat, mat4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_mat4_inv_fast_sse2(mat, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_mat4_inv_fast_wasm(mat, dest);
|
glm_mat4_inv_fast_wasm(mat, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_mat4_inv_fast_sse2(mat, dest);
|
||||||
#else
|
#else
|
||||||
glm_mat4_inv(mat, dest);
|
glm_mat4_inv(mat, dest);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -242,21 +242,7 @@ glm_quat_norm(versor q) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_quat_normalize_to(versor q, versor dest) {
|
glm_quat_normalize_to(versor q, versor dest) {
|
||||||
#if defined( __SSE2__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
__m128 xdot, x0;
|
|
||||||
float dot;
|
|
||||||
|
|
||||||
x0 = glmm_load(q);
|
|
||||||
xdot = glmm_vdot(x0, x0);
|
|
||||||
dot = _mm_cvtss_f32(xdot);
|
|
||||||
|
|
||||||
if (dot <= 0.0f) {
|
|
||||||
glm_quat_identity(dest);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_128 xdot, x0;
|
glmm_128 xdot, x0;
|
||||||
float dot;
|
float dot;
|
||||||
|
|
||||||
@@ -271,6 +257,20 @@ glm_quat_normalize_to(versor q, versor dest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
|
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
|
||||||
|
#elif defined( __SSE2__ ) || defined( __SSE2__ )
|
||||||
|
__m128 xdot, x0;
|
||||||
|
float dot;
|
||||||
|
|
||||||
|
x0 = glmm_load(q);
|
||||||
|
xdot = glmm_vdot(x0, x0);
|
||||||
|
dot = _mm_cvtss_f32(xdot);
|
||||||
|
|
||||||
|
if (dot <= 0.0f) {
|
||||||
|
glm_quat_identity(dest);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
||||||
#else
|
#else
|
||||||
float dot;
|
float dot;
|
||||||
|
|
||||||
@@ -457,10 +457,10 @@ glm_quat_mul(versor p, versor q, versor dest) {
|
|||||||
+ (a1 d2 + b1 c2 − c1 b2 + d1 a2)k
|
+ (a1 d2 + b1 c2 − c1 b2 + d1 a2)k
|
||||||
a1 a2 − b1 b2 − c1 c2 − d1 d2
|
a1 a2 − b1 b2 − c1 c2 − d1 d2
|
||||||
*/
|
*/
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glm_quat_mul_sse2(p, q, dest);
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glm_quat_mul_wasm(p, q, dest);
|
glm_quat_mul_wasm(p, q, dest);
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glm_quat_mul_sse2(p, q, dest);
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
glm_quat_mul_neon(p, q, dest);
|
glm_quat_mul_neon(p, q, dest);
|
||||||
#else
|
#else
|
||||||
|
|||||||
@@ -113,7 +113,7 @@
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(CGLM_SIMD_x86)
|
#if defined(CGLM_SIMD_x86) && !defined(CGLM_SIMD_WASM)
|
||||||
# include "x86.h"
|
# include "x86.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|||||||
@@ -45,10 +45,10 @@
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_broadcast(float val, vec4 d) {
|
glm_vec4_broadcast(float val, vec4 d) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(d, _mm_set1_ps(val));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(d, wasm_f32x4_splat(val));
|
glmm_store(d, wasm_f32x4_splat(val));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(d, _mm_set1_ps(val));
|
||||||
#else
|
#else
|
||||||
d[0] = d[1] = d[2] = d[3] = val;
|
d[0] = d[1] = d[2] = d[3] = val;
|
||||||
#endif
|
#endif
|
||||||
@@ -63,10 +63,10 @@ glm_vec4_broadcast(float val, vec4 d) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_fill(vec4 v, float val) {
|
glm_vec4_fill(vec4 v, float val) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(v, _mm_set1_ps(val));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(v, wasm_f32x4_splat(val));
|
glmm_store(v, wasm_f32x4_splat(val));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(v, _mm_set1_ps(val));
|
||||||
#else
|
#else
|
||||||
v[0] = v[1] = v[2] = v[3] = val;
|
v[0] = v[1] = v[2] = v[3] = val;
|
||||||
#endif
|
#endif
|
||||||
@@ -251,9 +251,9 @@ glm_vec4_sign(vec4 v, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_abs(vec4 v, vec4 dest) {
|
glm_vec4_abs(vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, glmm_abs(glmm_load(v)));
|
glmm_store(dest, glmm_abs(glmm_load(v)));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, glmm_abs(glmm_load(v)));
|
glmm_store(dest, glmm_abs(glmm_load(v)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vabsq_f32(vld1q_f32(v)));
|
vst1q_f32(dest, vabsq_f32(vld1q_f32(v)));
|
||||||
@@ -290,9 +290,9 @@ glm_vec4_fract(vec4 v, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
float
|
float
|
||||||
glm_vec4_hadd(vec4 v) {
|
glm_vec4_hadd(vec4 v) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
return glmm_hadd(glmm_load(v));
|
return glmm_hadd(glmm_load(v));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
return glmm_hadd(glmm_load(v));
|
return glmm_hadd(glmm_load(v));
|
||||||
#else
|
#else
|
||||||
return v[0] + v[1] + v[2] + v[3];
|
return v[0] + v[1] + v[2] + v[3];
|
||||||
@@ -308,10 +308,10 @@ glm_vec4_hadd(vec4 v) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_sqrt(vec4 v, vec4 dest) {
|
glm_vec4_sqrt(vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_sqrt_ps(glmm_load(v)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_sqrt(glmm_load(v)));
|
glmm_store(dest, wasm_f32x4_sqrt(glmm_load(v)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_sqrt_ps(glmm_load(v)));
|
||||||
#else
|
#else
|
||||||
dest[0] = sqrtf(v[0]);
|
dest[0] = sqrtf(v[0]);
|
||||||
dest[1] = sqrtf(v[1]);
|
dest[1] = sqrtf(v[1]);
|
||||||
|
|||||||
@@ -137,9 +137,9 @@ glm_vec4_copy3(vec4 a, vec3 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_copy(vec4 v, vec4 dest) {
|
glm_vec4_copy(vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, glmm_load(v));
|
glmm_store(dest, glmm_load(v));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, glmm_load(v));
|
glmm_store(dest, glmm_load(v));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vld1q_f32(v));
|
vst1q_f32(dest, vld1q_f32(v));
|
||||||
@@ -181,10 +181,10 @@ glm_vec4_ucopy(vec4 v, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_zero(vec4 v) {
|
glm_vec4_zero(vec4 v) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(v, _mm_setzero_ps());
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(v, wasm_f32x4_const_splat(0.f));
|
glmm_store(v, wasm_f32x4_const_splat(0.f));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(v, _mm_setzero_ps());
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(v, vdupq_n_f32(0.0f));
|
vst1q_f32(v, vdupq_n_f32(0.0f));
|
||||||
#else
|
#else
|
||||||
@@ -203,10 +203,10 @@ glm_vec4_zero(vec4 v) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_one(vec4 v) {
|
glm_vec4_one(vec4 v) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(v, _mm_set1_ps(1.0f));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(v, wasm_f32x4_const_splat(1.0f));
|
glmm_store(v, wasm_f32x4_const_splat(1.0f));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(v, _mm_set1_ps(1.0f));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(v, vdupq_n_f32(1.0f));
|
vst1q_f32(v, vdupq_n_f32(1.0f));
|
||||||
#else
|
#else
|
||||||
@@ -331,10 +331,10 @@ glm_vec4_norm_inf(vec4 v) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_add(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
@@ -355,10 +355,10 @@ glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_adds(vec4 v, float s, vec4 dest) {
|
glm_vec4_adds(vec4 v, float s, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(glmm_load(v), wasm_f32x4_splat(s)));
|
glmm_store(dest, wasm_f32x4_add(glmm_load(v), wasm_f32x4_splat(s)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||||
#else
|
#else
|
||||||
@@ -379,10 +379,10 @@ glm_vec4_adds(vec4 v, float s, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vsubq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
@@ -403,10 +403,10 @@ glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_sub(glmm_load(v), wasm_f32x4_splat(s)));
|
glmm_store(dest, wasm_f32x4_sub(glmm_load(v), wasm_f32x4_splat(s)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
vst1q_f32(dest, vsubq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||||
#else
|
#else
|
||||||
@@ -427,10 +427,10 @@ glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_mul(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_mul(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_mul_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vmulq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
@@ -451,10 +451,10 @@ glm_vec4_mul(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_scale(vec4 v, float s, vec4 dest) {
|
glm_vec4_scale(vec4 v, float s, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_mul(glmm_load(v), wasm_f32x4_splat(s)));
|
glmm_store(dest, wasm_f32x4_mul(glmm_load(v), wasm_f32x4_splat(s)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
vst1q_f32(dest, vmulq_f32(vld1q_f32(v), vdupq_n_f32(s)));
|
||||||
#else
|
#else
|
||||||
@@ -516,10 +516,10 @@ glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_div(glmm_load(v), wasm_f32x4_splat(s)));
|
glmm_store(dest, wasm_f32x4_div(glmm_load(v), wasm_f32x4_splat(s)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
|
||||||
#else
|
#else
|
||||||
glm_vec4_scale(v, 1.0f / s, dest);
|
glm_vec4_scale(v, 1.0f / s, dest);
|
||||||
#endif
|
#endif
|
||||||
@@ -537,14 +537,14 @@ glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
|
||||||
_mm_add_ps(glmm_load(a),
|
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(
|
||||||
glmm_load(dest),
|
glmm_load(dest),
|
||||||
wasm_f32x4_add(glmm_load(a), glmm_load(b))));
|
wasm_f32x4_add(glmm_load(a), glmm_load(b))));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
|
_mm_add_ps(glmm_load(a),
|
||||||
|
glmm_load(b))));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||||
vaddq_f32(vld1q_f32(a),
|
vaddq_f32(vld1q_f32(a),
|
||||||
@@ -569,14 +569,14 @@ glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
|
||||||
_mm_sub_ps(glmm_load(a),
|
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(
|
||||||
glmm_load(dest),
|
glmm_load(dest),
|
||||||
wasm_f32x4_sub(glmm_load(a), glmm_load(b))));
|
wasm_f32x4_sub(glmm_load(a), glmm_load(b))));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
|
_mm_sub_ps(glmm_load(a),
|
||||||
|
glmm_load(b))));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||||
vsubq_f32(vld1q_f32(a),
|
vsubq_f32(vld1q_f32(a),
|
||||||
@@ -645,14 +645,14 @@ glm_vec4_muladds(vec4 a, float s, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
|
||||||
_mm_max_ps(glmm_load(a),
|
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(
|
||||||
glmm_load(dest),
|
glmm_load(dest),
|
||||||
wasm_f32x4_max(glmm_load(a), glmm_load(b))));
|
wasm_f32x4_max(glmm_load(a), glmm_load(b))));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
|
_mm_max_ps(glmm_load(a),
|
||||||
|
glmm_load(b))));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||||
vmaxq_f32(vld1q_f32(a),
|
vmaxq_f32(vld1q_f32(a),
|
||||||
@@ -677,14 +677,14 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
|
||||||
_mm_min_ps(glmm_load(a),
|
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(
|
||||||
glmm_load(dest),
|
glmm_load(dest),
|
||||||
wasm_f32x4_min(glmm_load(a), glmm_load(b))));
|
wasm_f32x4_min(glmm_load(a), glmm_load(b))));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
|
_mm_min_ps(glmm_load(a),
|
||||||
|
glmm_load(b))));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
||||||
vminq_f32(vld1q_f32(a),
|
vminq_f32(vld1q_f32(a),
|
||||||
@@ -706,11 +706,11 @@ glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_negate_to(vec4 v, vec4 dest) {
|
glm_vec4_negate_to(vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_v128_xor(glmm_load(v),
|
glmm_store(dest, wasm_v128_xor(glmm_load(v),
|
||||||
wasm_f32x4_const_splat(-0.0f)));
|
wasm_f32x4_const_splat(-0.0f)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
|
vst1q_f32(dest, vnegq_f32(vld1q_f32(v)));
|
||||||
#else
|
#else
|
||||||
@@ -741,21 +741,7 @@ glm_vec4_negate(vec4 v) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
__m128 xdot, x0;
|
|
||||||
float dot;
|
|
||||||
|
|
||||||
x0 = glmm_load(v);
|
|
||||||
xdot = glmm_vdot(x0, x0);
|
|
||||||
dot = _mm_cvtss_f32(xdot);
|
|
||||||
|
|
||||||
if (dot == 0.0f) {
|
|
||||||
glmm_store(dest, _mm_setzero_ps());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_128 xdot, x0;
|
glmm_128 xdot, x0;
|
||||||
float dot;
|
float dot;
|
||||||
|
|
||||||
@@ -770,6 +756,20 @@ glm_vec4_normalize_to(vec4 v, vec4 dest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
|
glmm_store(dest, wasm_f32x4_div(x0, wasm_f32x4_sqrt(xdot)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
__m128 xdot, x0;
|
||||||
|
float dot;
|
||||||
|
|
||||||
|
x0 = glmm_load(v);
|
||||||
|
xdot = glmm_vdot(x0, x0);
|
||||||
|
dot = _mm_cvtss_f32(xdot);
|
||||||
|
|
||||||
|
if (dot == 0.0f) {
|
||||||
|
glmm_store(dest, _mm_setzero_ps());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
|
||||||
#else
|
#else
|
||||||
float norm;
|
float norm;
|
||||||
|
|
||||||
@@ -805,10 +805,10 @@ glm_vec4_normalize(vec4 v) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
float
|
float
|
||||||
glm_vec4_distance(vec4 a, vec4 b) {
|
glm_vec4_distance(vec4 a, vec4 b) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
return glmm_norm(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
return glmm_norm(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
return glmm_norm(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
return glmm_norm(vsubq_f32(glmm_load(a), glmm_load(b)));
|
return glmm_norm(vsubq_f32(glmm_load(a), glmm_load(b)));
|
||||||
#else
|
#else
|
||||||
@@ -829,10 +829,10 @@ glm_vec4_distance(vec4 a, vec4 b) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
float
|
float
|
||||||
glm_vec4_distance2(vec4 a, vec4 b) {
|
glm_vec4_distance2(vec4 a, vec4 b) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
return glmm_norm2(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
return glmm_norm2(wasm_f32x4_sub(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
return glmm_norm2(_mm_sub_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
return glmm_norm2(vsubq_f32(glmm_load(a), glmm_load(b)));
|
return glmm_norm2(vsubq_f32(glmm_load(a), glmm_load(b)));
|
||||||
#else
|
#else
|
||||||
@@ -853,10 +853,10 @@ glm_vec4_distance2(vec4 a, vec4 b) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_max(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_max(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
@@ -877,10 +877,10 @@ glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(dest, wasm_f32x4_min(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_min(glmm_load(a), glmm_load(b)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
@@ -901,13 +901,13 @@ glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
|
||||||
_mm_set1_ps(maxVal)));
|
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
|
||||||
glmm_store(v, wasm_f32x4_min(
|
glmm_store(v, wasm_f32x4_min(
|
||||||
wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
wasm_f32x4_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
||||||
wasm_f32x4_splat(maxVal)));
|
wasm_f32x4_splat(maxVal)));
|
||||||
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
|
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
||||||
|
_mm_set1_ps(maxVal)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
|
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
|
||||||
vdupq_n_f32(maxVal)));
|
vdupq_n_f32(maxVal)));
|
||||||
|
|||||||
Reference in New Issue
Block a user