mirror of
https://github.com/recp/cglm.git
synced 2026-02-17 03:39:05 +00:00
simd128: inline some functions
This commit is contained in:
@@ -29,10 +29,6 @@
|
|||||||
#define glmm_splat_z(x) glmm_splat(x, 2)
|
#define glmm_splat_z(x) glmm_splat(x, 2)
|
||||||
#define glmm_splat_w(x) glmm_splat(x, 3)
|
#define glmm_splat_w(x) glmm_splat(x, 3)
|
||||||
|
|
||||||
#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \
|
|
||||||
glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
|
|
||||||
z1, y1, x1, w1)
|
|
||||||
|
|
||||||
#define _mm_cvtss_f32(v) wasm_f32x4_extract_lane(v, 0)
|
#define _mm_cvtss_f32(v) wasm_f32x4_extract_lane(v, 0)
|
||||||
|
|
||||||
static inline glmm_128 __attribute__((__always_inline__, __nodebug__))
|
static inline glmm_128 __attribute__((__always_inline__, __nodebug__))
|
||||||
@@ -59,24 +55,6 @@ _mm_movelh_ps(glmm_128 __a, glmm_128 __b)
|
|||||||
return wasm_i32x4_shuffle(__a, __b, 0, 1, 4, 5);
|
return wasm_i32x4_shuffle(__a, __b, 0, 1, 4, 5);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
|
||||||
_mm_move_ss(glmm_128 __a, glmm_128 __b)
|
|
||||||
{
|
|
||||||
return (glmm_128)wasm_i32x4_shuffle(__a, __b, 4, 1, 2, 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
|
||||||
_mm_add_ps(glmm_128 __a, glmm_128 __b)
|
|
||||||
{
|
|
||||||
return (glmm_128)wasm_f32x4_add((glmm_128)__a, (glmm_128)__b);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
|
||||||
_mm_add_ss(glmm_128 __a, glmm_128 __b)
|
|
||||||
{
|
|
||||||
return _mm_move_ss(__a, _mm_add_ps(__a, __b));
|
|
||||||
}
|
|
||||||
|
|
||||||
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_set_ps(float __z, float __y, float __x, float __w)
|
_mm_set_ps(float __z, float __y, float __x, float __w)
|
||||||
{
|
{
|
||||||
@@ -92,7 +70,7 @@ _mm_sqrt_ss(glmm_128 __a)
|
|||||||
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
static __inline__ glmm_128 __attribute__((__always_inline__, __nodebug__))
|
||||||
_mm_rcp_ps(glmm_128 __a)
|
_mm_rcp_ps(glmm_128 __a)
|
||||||
{
|
{
|
||||||
return (glmm_128)wasm_f32x4_div((glmm_128)wasm_f32x4_splat(1.0f), (glmm_128)__a);
|
return (glmm_128)wasm_f32x4_div((glmm_128)wasm_f32x4_splat(1.0f), (glmm_128)__a);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
static __inline__ void __attribute__((__always_inline__, __nodebug__))
|
||||||
@@ -148,7 +126,7 @@ glmm_vhadds(glmm_128 v) {
|
|||||||
shuf = glmm_shuff1(v, 2, 3, 0, 1);
|
shuf = glmm_shuff1(v, 2, 3, 0, 1);
|
||||||
sums = wasm_f32x4_add(v, shuf);
|
sums = wasm_f32x4_add(v, shuf);
|
||||||
shuf = _mm_movehl_ps(shuf, sums);
|
shuf = _mm_movehl_ps(shuf, sums);
|
||||||
sums = _mm_add_ss(sums, shuf);
|
sums = wasm_i32x4_shuffle(sums, wasm_f32x4_add(sums, shuf), 4, 1, 2, 3);
|
||||||
return sums;
|
return sums;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -181,7 +159,7 @@ glmm_vhmax(glmm_128 v) {
|
|||||||
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
||||||
x1 = wasm_f32x4_pmax(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
x1 = wasm_f32x4_pmax(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
||||||
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
||||||
return _mm_move_ss(x1, wasm_f32x4_pmax(x1, x2));
|
return (glmm_128) wasm_i32x4_shuffle(x1, wasm_f32x4_pmax(x1, x2), 4, 1, 2, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
|
|||||||
@@ -329,7 +329,7 @@ glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
|
|||||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
#elif defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, wasm_f32x4_add(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
|
vst1q_f32(dest, vaddq_f32(vld1q_f32(a), vld1q_f32(b)));
|
||||||
#else
|
#else
|
||||||
|
|||||||
Reference in New Issue
Block a user