improve mat4 mul

This commit is contained in:
Recep Aslantas
2016-09-20 03:13:48 +03:00
parent b42a599310
commit 8a6fe6948a
3 changed files with 77 additions and 72 deletions

View File

@@ -19,52 +19,45 @@
#define GLM_MAT4_IDENTITY (mat4)GLM_MAT4_IDENTITY_INIT
#define glm_mat4_mul_impl(l, r, d) \
do { \
d[0] = l[0] * r[0] + l[1] * r[4] + l[2] * r[8] + l[3] * r[12]; \
d[1] = l[0] * r[1] + l[1] * r[5] + l[2] * r[9] + l[3] * r[13]; \
d[2] = l[0] * r[2] + l[1] * r[6] + l[2] * r[10] + l[3] * r[14]; \
d[3] = l[0] * r[3] + l[1] * r[7] + l[2] * r[11] + l[3] * r[15]; \
d[4] = l[4] * r[0] + l[5] * r[4] + l[6] * r[8] + l[7] * r[12]; \
d[5] = l[4] * r[1] + l[5] * r[5] + l[6] * r[9] + l[7] * r[13]; \
d[6] = l[4] * r[2] + l[5] * r[6] + l[6] * r[10] + l[7] * r[14]; \
d[7] = l[4] * r[3] + l[5] * r[7] + l[6] * r[11] + l[7] * r[15]; \
d[8] = l[8] * r[0] + l[9] * r[4] + l[10] * r[8] + l[11] * r[12]; \
d[9] = l[8] * r[1] + l[9] * r[5] + l[10] * r[9] + l[11] * r[13]; \
d[10] = l[8] * r[2] + l[9] * r[6] + l[10] * r[10] + l[11] * r[14]; \
d[11] = l[8] * r[3] + l[9] * r[7] + l[10] * r[11] + l[11] * r[15]; \
d[12] = l[12] * r[0] + l[13] * r[4] + l[14] * r[8] + l[15] * r[12]; \
d[13] = l[12] * r[1] + l[13] * r[5] + l[14] * r[9] + l[15] * r[13]; \
d[14] = l[12] * r[2] + l[13] * r[6] + l[14] * r[10] + l[15] * r[14]; \
d[15] = l[12] * r[3] + l[13] * r[7] + l[14] * r[11] + l[15] * r[15]; \
} while (0)
CGLM_INLINE
void
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest) {
float * __restrict d;
float * __restrict l;
d = (float *)dest;
l = (float *)m1;
if (m1 != m2) {
float * __restrict r;
r = (float *)m2;
glm_mat4_mul(mat4 l, mat4 r, mat4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
CGLM_MAT_MUL_SSE_4x4f(l, r, d);
glm_mat4_mul_sse2(l, r, d);
#else
glm_mat4_mul_impl(l, r, d);
d[0][0] = l[0][0] * r[0][0] + l[1][0] * r[0][1] +
l[2][0] * r[0][2] + l[3][0] * r[0][3];
d[1][0] = l[0][0] * r[1][0] + l[1][0] * r[1][1] +
l[2][0] * r[1][2] + l[3][0] * r[1][3];
d[2][0] = l[0][0] * r[2][0] + l[1][0] * r[2][1] +
l[2][0] * r[2][2] + l[3][0] * r[2][3];
d[3][0] = l[0][0] * r[3][0] + l[1][0] * r[3][1] +
l[2][0] * r[3][2] + l[3][0] * r[3][3];
d[0][1] = l[0][1] * r[0][0] + l[1][1] * r[0][1] +
l[2][1] * r[0][2] + l[3][1] * r[0][3];
d[1][1] = l[0][1] * r[1][0] + l[1][1] * r[1][1] +
l[2][1] * r[1][2] + l[3][1] * r[1][3];
d[2][1] = l[0][1] * r[2][0] + l[1][1] * r[2][1] +
l[2][1] * r[2][2] + l[3][1] * r[2][3];
d[3][1] = l[0][1] * r[3][0] + l[1][1] * r[3][1] +
l[2][1] * r[3][2] + l[3][1] * r[3][3];
d[0][2] = l[0][2] * r[0][0] + l[1][2] * r[0][1] +
l[2][2] * r[0][2] + l[3][2] * r[0][3];
d[1][2] = l[0][2] * r[1][0] + l[1][2] * r[1][1] +
l[2][2] * r[1][2] + l[3][2] * r[1][3];
d[2][2] = l[0][2] * r[2][0] + l[1][2] * r[2][1] +
l[2][2] * r[2][2] + l[3][2] * r[2][3];
d[3][2] = l[0][2] * r[3][0] + l[1][2] * r[3][1] +
l[2][2] * r[3][2] + l[3][2] * r[3][3];
d[0][3] = l[0][3] * r[0][0] + l[1][3] * r[0][1] +
l[2][3] * r[0][2] + l[3][3] * r[0][3];
d[1][3] = l[0][3] * r[1][0] + l[1][3] * r[1][1] +
l[2][3] * r[1][2] + l[3][3] * r[1][3];
d[2][3] = l[0][3] * r[2][0] + l[1][3] * r[2][1] +
l[2][3] * r[2][2] + l[3][3] * r[2][3];
d[3][3] = l[0][3] * r[3][0] + l[1][3] * r[3][1] +
l[2][3] * r[3][2] + l[3][3] * r[3][3];
#endif
} else {
#if defined( __SSE__ ) || defined( __SSE2__ )
CGLM_MAT_MUL_SSE_4x4f(l, l, d);
#else
glm_mat4_mul_impl(l, l, d);
#endif
}
}
CGLM_INLINE