diff --git a/include/cglm/vec4.h b/include/cglm/vec4.h index 0ef6bff..c50b4f9 100644 --- a/include/cglm/vec4.h +++ b/include/cglm/vec4.h @@ -146,7 +146,15 @@ glm_vec4_dot(vec4 a, vec4 b) { CGLM_INLINE float glm_vec4_norm2(vec4 v) { - return glm_vec4_dot(v, v); +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_load_ps(v); + x0 = _mm_mul_ps(x0, x0); + x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2)); + return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1))); +#else + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; +#endif } /*! @@ -159,7 +167,13 @@ glm_vec4_norm2(vec4 v) { CGLM_INLINE float glm_vec4_norm(vec4 vec) { +#if defined( __SSE__ ) || defined( __SSE2__ ) + __m128 x0; + x0 = _mm_load_ps(vec); + return _mm_cvtss_f32(_mm_sqrt_ss(glm_simd_dot(x0, x0))); +#else return sqrtf(glm_vec4_norm2(vec)); +#endif } /*! diff --git a/test/src/test_common.c b/test/src/test_common.c index a559087..60baac3 100644 --- a/test/src/test_common.c +++ b/test/src/test_common.c @@ -84,6 +84,11 @@ test_assert_mat4_eq2(mat4 m1, mat4 m2, float eps) { } } +void +test_assert_eqf(float a, float b) { + assert_true(fabsf(a - b) <= 0.000009); /* rounding errors */ +} + void test_assert_vec3_eq(vec3 v1, vec3 v2) { assert_true(fabsf(v1[0] - v2[0]) <= 0.000009); /* rounding errors */ diff --git a/test/src/test_common.h b/test/src/test_common.h index 50e84d6..db113a4 100644 --- a/test/src/test_common.h +++ b/test/src/test_common.h @@ -25,6 +25,9 @@ void test_rand_mat4(mat4 dest); +void +test_assert_eqf(float a, float b); + void test_assert_mat4_eq(mat4 m1, mat4 m2); diff --git a/test/src/test_vec4.c b/test/src/test_vec4.c index 994bab7..51986f1 100644 --- a/test/src/test_vec4.c +++ b/test/src/test_vec4.c @@ -28,6 +28,16 @@ test_vec4_normalize_to(vec4 vec, vec4 dest) { glm_vec4_scale(vec, 1.0f / norm, dest); } +float +test_vec4_norm2(vec4 vec) { + return test_vec4_dot(vec, vec); +} + +float +test_vec4_norm(vec4 vec) { + return sqrtf(test_vec4_dot(vec, vec)); +} + void test_vec4(void **state) { vec4 v, v1, v2; @@ -35,7 +45,7 @@ test_vec4(void **state) { float d1, d2; - for (i = 0; i < 100; i++) { + for (i = 0; i < 1000; i++) { /* 1. test SSE/SIMD dot product */ test_rand_vec4(v); d1 = glm_vec4_dot(v, v); @@ -51,5 +61,13 @@ test_vec4(void **state) { /* all must be same */ test_assert_vec4_eq(v1, v2); test_assert_vec4_eq(v, v2); + + /* 3. test SIMD norm */ + test_rand_vec4(v); + test_assert_eqf(test_vec4_norm(v), glm_vec4_norm(v)); + + /* 3. test SIMD norm2 */ + test_rand_vec4(v); + test_assert_eqf(test_vec4_norm2(v), glm_vec4_norm2(v)); } }