Compare commits

...

7 Commits

Author SHA1 Message Date
Recep Aslantas
04eaf9c535 arm, neon: neon/fma support for glm_quat_mul() 2021-04-29 01:12:00 +03:00
Recep Aslantas
bd6641bd0a build: add missing files to build files 2021-04-28 22:45:03 +03:00
Recep Aslantas
4e4bff418d arm, neon: neon/fma support for glm_mat2_mul() 2021-04-28 22:06:46 +03:00
Recep Aslantas
55ebbdbe40 arm, neon: neon/fma support for glm_inv_tr() 2021-04-28 14:46:14 +03:00
Recep Aslantas
e4c35e32fc Merge pull request #190 from ylecuyer/patch-3
Minor typo in doc
2021-04-27 23:52:40 +03:00
Yoann Lecuyer
ec467fef1f Minor typo in doc
I stumbled upon while reading the doc
2021-04-27 22:09:13 +02:00
Recep Aslantas
1e8865233b Merge pull request #189 from recp/simd-2
ARM Neon Update
2021-04-25 15:20:24 +03:00
12 changed files with 192 additions and 3 deletions

View File

@@ -109,7 +109,10 @@ cglm_simd_avx_HEADERS = include/cglm/simd/avx/mat4.h \
include/cglm/simd/avx/affine.h
cglm_simd_neondir=$(includedir)/cglm/simd/neon
cglm_simd_neon_HEADERS = include/cglm/simd/neon/mat4.h
cglm_simd_neon_HEADERS = include/cglm/simd/neon/mat4.h \
include/cglm/simd/neon/mat2.h \
include/cglm/simd/neon/affine.h \
include/cglm/simd/neon/quat.h
cglm_structdir=$(includedir)/cglm/struct
cglm_struct_HEADERS = include/cglm/struct/mat4.h \

View File

@@ -2,7 +2,7 @@ How to send vector or matrix to OpenGL like API
==================================================
*cglm*'s vector and matrix types are arrays. So you can send them directly to a
function which accecpts pointer. But you may got warnings for matrix because it is
function which accepts pointer. But you may got warnings for matrix because it is
two dimensional array.
Passing / Uniforming Matrix to OpenGL:

View File

@@ -158,6 +158,8 @@ void
glm_inv_tr(mat4 mat) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_inv_tr_sse2(mat);
#elif defined(CGLM_NEON_FP)
glm_inv_tr_neon(mat);
#else
CGLM_ALIGN_MAT mat3 r;
CGLM_ALIGN(8) vec3 t;

View File

@@ -40,6 +40,10 @@
# include "simd/sse2/mat2.h"
#endif
#ifdef CGLM_NEON_FP
# include "simd/neon/mat2.h"
#endif
#define GLM_MAT2_IDENTITY_INIT {{1.0f, 0.0f}, {0.0f, 1.0f}}
#define GLM_MAT2_ZERO_INIT {{0.0f, 0.0f}, {0.0f, 0.0f}}
@@ -130,6 +134,8 @@ void
glm_mat2_mul(mat2 m1, mat2 m2, mat2 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat2_mul_sse2(m1, m2, dest);
#elif defined(CGLM_NEON_FP)
glm_mat2_mul_neon(m1, m2, dest);
#else
float a00 = m1[0][0], a01 = m1[0][1],
a10 = m1[1][0], a11 = m1[1][1],

View File

@@ -63,6 +63,10 @@
# include "simd/sse2/quat.h"
#endif
#ifdef CGLM_NEON_FP
# include "simd/neon/quat.h"
#endif
CGLM_INLINE
void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
@@ -412,6 +416,8 @@ glm_quat_mul(versor p, versor q, versor dest) {
*/
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_quat_mul_sse2(p, q, dest);
#elif defined(CGLM_NEON_FP)
glm_quat_mul_neon(p, q, dest);
#else
dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
dest[1] = p[3] * q[1] - p[0] * q[2] + p[1] * q[3] + p[2] * q[0];

View File

@@ -76,5 +76,41 @@ glm_mul_rot_neon(mat4 m1, mat4 m2, mat4 dest) {
glmm_store(dest[3], glmm_load(m1[3]));
}
CGLM_INLINE
void
glm_inv_tr_neon(mat4 mat) {
float32x4x4_t vmat;
glmm_128 r0, r1, r2, r3, x0;
vmat = vld4q_f32(mat[0]);
r0 = vmat.val[0];
r1 = vmat.val[1];
r2 = vmat.val[2];
r3 = vmat.val[3];
x0 = glmm_fmadd(r0, glmm_splat_w(r0),
glmm_fmadd(r1, glmm_splat_w(r1),
vmulq_f32(r2, glmm_splat_w(r2))));
x0 = glmm_xor(x0, glmm_set1(-0.f));
glmm_store(mat[0], r0);
glmm_store(mat[1], r1);
glmm_store(mat[2], r2);
glmm_store(mat[3], x0);
mat[0][3] = 0.0f;
mat[1][3] = 0.0f;
mat[2][3] = 0.0f;
mat[3][3] = 1.0f;
/* TODO: ?
zo = vget_high_f32(r3);
vst1_lane_f32(&mat[0][3], zo, 0);
vst1_lane_f32(&mat[1][3], zo, 0);
vst1_lane_f32(&mat[2][3], zo, 0);
vst1_lane_f32(&mat[3][3], zo, 1);
*/
}
#endif
#endif /* cglm_affine_neon_h */

View File

@@ -0,0 +1,44 @@
/*
* Copyright (c), Recep Aslantas.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#ifndef cglm_mat2_neon_h
#define cglm_mat2_neon_h
#if defined(__ARM_NEON_FP)
#include "../../common.h"
#include "../intrin.h"
CGLM_INLINE
void
glm_mat2_mul_neon(mat2 m1, mat2 m2, mat2 dest) {
float32x4x2_t a1;
glmm_128 x0, x1, x2;
float32x2_t dc, ba;
x1 = glmm_load(m1[0]); /* d c b a */
x2 = glmm_load(m2[0]); /* h g f e */
dc = vget_high_f32(x1);
ba = vget_low_f32(x1);
/* g g e e, h h f f */
a1 = vtrnq_f32(x2, x2);
/*
dest[0][0] = a * e + c * f;
dest[0][1] = b * e + d * f;
dest[1][0] = a * g + c * h;
dest[1][1] = b * g + d * h;
*/
x0 = glmm_fmadd(vcombine_f32(ba, ba), a1.val[0],
vmulq_f32(vcombine_f32(dc, dc), a1.val[1]));
glmm_store(dest[0], x0);
}
#endif
#endif /* cglm_mat2_neon_h */

View File

@@ -0,0 +1,56 @@
/*
* Copyright (c), Recep Aslantas.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#ifndef cglm_quat_neon_h
#define cglm_quat_neon_h
#if defined(__ARM_NEON_FP)
#include "../../common.h"
#include "../intrin.h"
CGLM_INLINE
void
glm_quat_mul_neon(versor p, versor q, versor dest) {
/*
+ (a1 b2 + b1 a2 + c1 d2 d1 c2)i
+ (a1 c2 b1 d2 + c1 a2 + d1 b2)j
+ (a1 d2 + b1 c2 c1 b2 + d1 a2)k
a1 a2 b1 b2 c1 c2 d1 d2
*/
glmm_128 xp, xq, xqr, r, x, y, z, s2, s3;
glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f};
float32x2_t qh, ql;
xp = glmm_load(p); /* 3 2 1 0 */
xq = glmm_load(q);
r = vmulq_f32(glmm_splat_w(xp), xq);
x = glmm_splat_x(xp);
y = glmm_splat_y(xp);
z = glmm_splat_z(xp);
ql = vget_high_f32(s1);
s3 = vcombine_f32(ql, ql);
s2 = vzipq_f32(s3, s3).val[0];
xqr = vrev64q_f32(xq);
qh = vget_high_f32(xqr);
ql = vget_low_f32(xqr);
r = glmm_fmadd(glmm_xor(x, s3), vcombine_f32(qh, ql), r);
r = glmm_fmadd(glmm_xor(y, s2), vcombine_f32(vget_high_f32(xq),
vget_low_f32(xq)), r);
r = glmm_fmadd(glmm_xor(z, s1), vcombine_f32(ql, qh), r);
glmm_store(dest, r);
}
#endif
#endif /* cglm_quat_neon_h */

View File

@@ -41,6 +41,5 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
glmm_store(dest, r);
}
#endif
#endif /* cglm_quat_simd_h */

View File

@@ -7,6 +7,25 @@
#include "test_common.h"
#ifndef glm_affine_mat_test_guard
#define glm_affine_mat_test_guard
CGLM_INLINE
void
glm_inv_tr_raw(mat4 mat) {
CGLM_ALIGN_MAT mat3 r;
CGLM_ALIGN(8) vec3 t;
/* rotate */
glm_mat4_pick3t(mat, r);
glm_mat4_ins3(r, mat);
/* translate */
glm_mat3_mulv(r, mat[3], t);
glm_vec3_negate(t);
glm_vec3_copy(t, mat[3]);
}
#endif
TEST_IMPL(GLM_PREFIX, mul) {
mat4 m1 = GLM_MAT4_IDENTITY_INIT;
mat4 m2 = GLM_MAT4_IDENTITY_INIT;
@@ -81,6 +100,12 @@ TEST_IMPL(GLM_PREFIX, inv_tr) {
GLM(mat4_inv)(m1, m2);
GLM(inv_tr)(m2);
ASSERTIFY(test_assert_mat4_eq(m1, m2))
/* test with raw */
glm_mat4_copy(m1, m2);
glm_inv_tr_raw(m2);
GLM(inv_tr)(m1);
ASSERTIFY(test_assert_mat4_eq(m1, m2))
}
TEST_SUCCESS

View File

@@ -90,7 +90,10 @@
<ClInclude Include="..\include\cglm\simd\avx\affine.h" />
<ClInclude Include="..\include\cglm\simd\avx\mat4.h" />
<ClInclude Include="..\include\cglm\simd\intrin.h" />
<ClInclude Include="..\include\cglm\simd\neon\affine.h" />
<ClInclude Include="..\include\cglm\simd\neon\mat2.h" />
<ClInclude Include="..\include\cglm\simd\neon\mat4.h" />
<ClInclude Include="..\include\cglm\simd\neon\quat.h" />
<ClInclude Include="..\include\cglm\simd\sse2\affine.h" />
<ClInclude Include="..\include\cglm\simd\sse2\mat2.h" />
<ClInclude Include="..\include\cglm\simd\sse2\mat3.h" />

View File

@@ -370,5 +370,14 @@
<ClInclude Include="..\include\cglm\struct\affine2d.h">
<Filter>include\cglm\struct</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\simd\neon\affine.h">
<Filter>include\cglm\simd\neon</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\simd\neon\mat2.h">
<Filter>include\cglm\simd\neon</Filter>
</ClInclude>
<ClInclude Include="..\include\cglm\simd\neon\quat.h">
<Filter>include\cglm\simd\neon</Filter>
</ClInclude>
</ItemGroup>
</Project>