simd, sse2: make alignment optional for store operations

This commit is contained in:
Recep Aslantas
2018-05-08 15:31:09 +03:00
parent 252bf925fc
commit 568001d26a
8 changed files with 143 additions and 144 deletions

View File

@@ -58,19 +58,19 @@ glm_translate_to(mat4 m, vec3 v, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT;
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(t[2]),
_mm_set1_ps(v[2])),
glmm_load(t[3]))))
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(t[2]),
_mm_set1_ps(v[2])),
glmm_load(t[3]))))
;
_mm_store_ps(dest[0], glmm_load(m[0]));
_mm_store_ps(dest[1], glmm_load(m[1]));
_mm_store_ps(dest[2], glmm_load(m[2]));
glmm_store(dest[0], glmm_load(m[0]));
glmm_store(dest[1], glmm_load(m[1]));
glmm_store(dest[2], glmm_load(m[2]));
#else
vec4 v1, v2, v3;
@@ -97,14 +97,14 @@ CGLM_INLINE
void
glm_translate(mat4 m, vec3 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(v[2])),
glmm_load(m[3]))))
glmm_store(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(v[2])),
glmm_load(m[3]))))
;
#else
vec4 v1, v2, v3;
@@ -129,10 +129,10 @@ CGLM_INLINE
void
glm_translate_x(mat4 m, float x) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(x)),
glmm_load(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(x)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -151,10 +151,10 @@ CGLM_INLINE
void
glm_translate_y(mat4 m, float y) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(y)),
glmm_load(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(y)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -173,10 +173,10 @@ CGLM_INLINE
void
glm_translate_z(mat4 m, float z) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(z)),
glmm_load(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(z)),
glmm_load(m[3])))
;
#else
vec4 v1;