Compare commits

..

13 Commits
v0.5.0 ... simd

Author SHA1 Message Date
Recep Aslantas
01b93b0409 Merge branch 'master' into simd 2019-01-21 22:43:15 +03:00
Recep Aslantas
07e60bd098 cam: extend frustum's far distance helper (#71)
* this will help to implement zoom easily
2019-01-16 14:59:58 +03:00
Recep Aslantas
e3d3cd8ab8 now working on v0.5.2 2019-01-15 12:08:54 +03:00
Recep Aslantas
d17c99215d Update README.md 2018-12-26 09:57:52 +03:00
Recep Aslantas
dc6eb492c1 Merge pull request #70 from recp/vec3-mat3
remove builtin alignment from vec3 and mat3 types
2018-12-26 09:54:48 +03:00
Recep Aslantas
7219b02d23 remove alignment from vec3 and mat3 2018-12-25 10:08:36 +03:00
Recep Aslantas
21834b4ffb matrix: trace of matrix 2018-12-06 18:17:02 +03:00
Recep Aslantas
2ef9c23a6c vec: normalize cross product helper 2018-12-06 18:01:52 +03:00
Recep Aslantas
92605f845a test: fix comparing two float values in tests 2018-12-05 16:34:22 +03:00
Recep Aslantas
b23d65bef5 now working on v0.5.1 2018-12-05 16:32:13 +03:00
Recep Aslantas
9aebdc76b3 avx: implement scale matrix using AVX 2018-10-30 09:58:11 +03:00
Recep Aslantas
e9b51fc07a avx: implement mat4_inv for AVX1 2018-10-30 09:28:15 +03:00
Recep Aslantas
abfa355b84 avx: optimize (re-use) mat4_mul registers 2018-10-30 09:27:55 +03:00
26 changed files with 636 additions and 73 deletions

View File

@@ -25,6 +25,7 @@ you have the latest version
- **[api rename]** by starting v0.4.5, **glm_simd** functions are renamed to **glmm_** - **[api rename]** by starting v0.4.5, **glm_simd** functions are renamed to **glmm_**
- **[new option]** by starting v0.4.5, you can disable alignment requirement, check options in docs. - **[new option]** by starting v0.4.5, you can disable alignment requirement, check options in docs.
- **[major change]** by starting v0.5.0, vec3 functions use **glm_vec3_** namespace, it was **glm_vec_** until v0.5.0 - **[major change]** by starting v0.5.0, vec3 functions use **glm_vec3_** namespace, it was **glm_vec_** until v0.5.0
- **[major change]** by starting v0.5.1, built-in alignment is removed from **vec3** and **mat3** types
#### Note for C++ developers: #### Note for C++ developers:
If you don't aware about original GLM library yet, you may also want to look at: If you don't aware about original GLM library yet, you may also want to look at:

View File

@@ -7,7 +7,7 @@
#***************************************************************************** #*****************************************************************************
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([cglm], [0.5.0], [info@recp.me]) AC_INIT([cglm], [0.5.2], [info@recp.me])
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])

View File

@@ -36,6 +36,7 @@ Functions:
#. :c:func:`glm_ortho_default` #. :c:func:`glm_ortho_default`
#. :c:func:`glm_ortho_default_s` #. :c:func:`glm_ortho_default_s`
#. :c:func:`glm_perspective` #. :c:func:`glm_perspective`
#. :c:func:`glm_persp_move_far`
#. :c:func:`glm_perspective_default` #. :c:func:`glm_perspective_default`
#. :c:func:`glm_perspective_resize` #. :c:func:`glm_perspective_resize`
#. :c:func:`glm_lookat` #. :c:func:`glm_lookat`
@@ -145,6 +146,16 @@ Functions documentation
| *[in]* **farVal** far clipping planes | *[in]* **farVal** far clipping planes
| *[out]* **dest** result matrix | *[out]* **dest** result matrix
.. c:function:: void glm_persp_move_far(mat4 proj, float deltaFar)
| extend perspective projection matrix's far distance
| this function does not guarantee far >= near, be aware of that!
Parameters:
| *[in, out]* **proj** projection matrix to extend
| *[in]* **deltaFar** distance from existing far (negative to shink)
.. c:function:: void glm_perspective_default(float aspect, mat4 dest) .. c:function:: void glm_perspective_default(float aspect, mat4 dest)
| set up perspective projection matrix with default near/far | set up perspective projection matrix with default near/far

View File

@@ -62,9 +62,9 @@ author = u'Recep Aslantas'
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = u'0.5.0' version = u'0.5.2'
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = u'0.5.0' release = u'0.5.2'
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.

View File

@@ -29,6 +29,7 @@ Functions:
#. :c:func:`glm_mat3_scale` #. :c:func:`glm_mat3_scale`
#. :c:func:`glm_mat3_det` #. :c:func:`glm_mat3_det`
#. :c:func:`glm_mat3_inv` #. :c:func:`glm_mat3_inv`
#. :c:func:`glm_mat3_trace`
#. :c:func:`glm_mat3_swap_col` #. :c:func:`glm_mat3_swap_col`
#. :c:func:`glm_mat3_swap_row` #. :c:func:`glm_mat3_swap_row`
@@ -133,6 +134,16 @@ Functions documentation
| *[in]* **mat** matrix | *[in]* **mat** matrix
| *[out]* **dest** destination (inverse matrix) | *[out]* **dest** destination (inverse matrix)
.. c:function:: void glm_mat3_trace(mat3 m)
| sum of the elements on the main diagonal from upper left to the lower right
Parameters:
| *[in]* **m** matrix
Returns:
trace of matrix
.. c:function:: void glm_mat3_swap_col(mat3 mat, int col1, int col2) .. c:function:: void glm_mat3_swap_col(mat3 mat, int col1, int col2)
swap two matrix columns swap two matrix columns

View File

@@ -33,6 +33,8 @@ Functions:
#. :c:func:`glm_mat4_mulN` #. :c:func:`glm_mat4_mulN`
#. :c:func:`glm_mat4_mulv` #. :c:func:`glm_mat4_mulv`
#. :c:func:`glm_mat4_mulv3` #. :c:func:`glm_mat4_mulv3`
#. :c:func:`glm_mat3_trace`
#. :c:func:`glm_mat3_trace3`
#. :c:func:`glm_mat4_quat` #. :c:func:`glm_mat4_quat`
#. :c:func:`glm_mat4_transpose_to` #. :c:func:`glm_mat4_transpose_to`
#. :c:func:`glm_mat4_transpose` #. :c:func:`glm_mat4_transpose`
@@ -156,6 +158,27 @@ Functions documentation
| *[in]* **v** vec3 (right, column vector) | *[in]* **v** vec3 (right, column vector)
| *[out]* **dest** vec3 (result, column vector) | *[out]* **dest** vec3 (result, column vector)
.. c:function:: void glm_mat4_trace(mat4 m)
| sum of the elements on the main diagonal from upper left to the lower right
Parameters:
| *[in]* **m** matrix
Returns:
trace of matrix
.. c:function:: void glm_mat4_trace3(mat4 m)
| trace of matrix (rotation part)
| sum of the elements on the main diagonal from upper left to the lower right
Parameters:
| *[in]* **m** matrix
Returns:
trace of matrix
.. c:function:: void glm_mat4_quat(mat4 m, versor dest) .. c:function:: void glm_mat4_quat(mat4 m, versor dest)
convert mat4's rotation part to quaternion convert mat4's rotation part to quaternion

View File

@@ -39,7 +39,6 @@ Functions:
#. :c:func:`glm_vec3_zero` #. :c:func:`glm_vec3_zero`
#. :c:func:`glm_vec3_one` #. :c:func:`glm_vec3_one`
#. :c:func:`glm_vec3_dot` #. :c:func:`glm_vec3_dot`
#. :c:func:`glm_vec3_cross`
#. :c:func:`glm_vec3_norm2` #. :c:func:`glm_vec3_norm2`
#. :c:func:`glm_vec3_norm` #. :c:func:`glm_vec3_norm`
#. :c:func:`glm_vec3_add` #. :c:func:`glm_vec3_add`
@@ -65,6 +64,8 @@ Functions:
#. :c:func:`glm_vec3_negate_to` #. :c:func:`glm_vec3_negate_to`
#. :c:func:`glm_vec3_normalize` #. :c:func:`glm_vec3_normalize`
#. :c:func:`glm_vec3_normalize_to` #. :c:func:`glm_vec3_normalize_to`
#. :c:func:`glm_vec3_cross`
#. :c:func:`glm_vec3_crossn`
#. :c:func:`glm_vec3_distance2` #. :c:func:`glm_vec3_distance2`
#. :c:func:`glm_vec3_distance` #. :c:func:`glm_vec3_distance`
#. :c:func:`glm_vec3_angle` #. :c:func:`glm_vec3_angle`
@@ -125,12 +126,21 @@ Functions documentation
.. c:function:: void glm_vec3_cross(vec3 a, vec3 b, vec3 d) .. c:function:: void glm_vec3_cross(vec3 a, vec3 b, vec3 d)
cross product cross product of two vector (RH)
Parameters: Parameters:
| *[in]* **a** source 1 | *[in]* **a** vector 1
| *[in]* **b** source 2 | *[in]* **b** vector 2
| *[out]* **d** destination | *[out]* **dest** destination
.. c:function:: void glm_vec3_crossn(vec3 a, vec3 b, vec3 dest)
cross product of two vector (RH) and normalize the result
Parameters:
| *[in]* **a** vector 1
| *[in]* **b** vector 2
| *[out]* **dest** destination
.. c:function:: float glm_vec3_norm2(vec3 v) .. c:function:: float glm_vec3_norm2(vec3 v)

View File

@@ -61,6 +61,10 @@ glmc_perspective(float fovy,
float farVal, float farVal,
mat4 dest); mat4 dest);
CGLM_EXPORT
void
glmc_persp_move_far(mat4 proj, float deltaFar);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_perspective_default(float aspect, mat4 dest); glmc_perspective_default(float aspect, mat4 dest);

View File

@@ -44,6 +44,10 @@ CGLM_EXPORT
void void
glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest); glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest);
CGLM_EXPORT
float
glmc_mat3_trace(mat3 m);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat3_quat(mat3 m, versor dest); glmc_mat3_quat(mat3 m, versor dest);

View File

@@ -61,6 +61,14 @@ CGLM_EXPORT
void void
glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest); glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest);
CGLM_EXPORT
float
glmc_mat4_trace(mat4 m);
CGLM_EXPORT
float
glmc_mat4_trace3(mat4 m);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_quat(mat4 m, versor dest); glmc_mat4_quat(mat4 m, versor dest);

View File

@@ -42,7 +42,11 @@ glmc_vec3_dot(vec3 a, vec3 b);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_vec3_cross(vec3 a, vec3 b, vec3 d); glmc_vec3_cross(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT
void
glmc_vec3_crossn(vec3 a, vec3 b, vec3 dest);
CGLM_EXPORT CGLM_EXPORT
float float

View File

@@ -271,6 +271,30 @@ glm_perspective(float fovy,
dest[3][2] = 2.0f * nearVal * farVal * fn; dest[3][2] = 2.0f * nearVal * farVal * fn;
} }
/*!
* @brief extend perspective projection matrix's far distance
*
* this function does not guarantee far >= near, be aware of that!
*
* @param[in, out] proj projection matrix to extend
* @param[in] deltaFar distance from existing far (negative to shink)
*/
CGLM_INLINE
void
glm_persp_move_far(mat4 proj, float deltaFar) {
float fn, farVal, nearVal, p22, p32;
p22 = proj[2][2];
p32 = proj[3][2];
nearVal = p32 / (p22 - 1.0f);
farVal = p32 / (p22 + 1.0f) + deltaFar;
fn = 1.0f / (nearVal - farVal);
proj[2][2] = (nearVal + farVal) * fn;
proj[3][2] = 2.0f * nearVal * farVal * fn;
}
/*! /*!
* @brief set up perspective projection matrix with default near/far * @brief set up perspective projection matrix with default near/far
* and angle values * and angle values
@@ -323,9 +347,7 @@ glm_lookat(vec3 eye,
glm_vec3_sub(center, eye, f); glm_vec3_sub(center, eye, f);
glm_vec3_normalize(f); glm_vec3_normalize(f);
glm_vec3_cross(f, up, s); glm_vec3_crossn(f, up, s);
glm_vec3_normalize(s);
glm_vec3_cross(s, f, u); glm_vec3_cross(s, f, u);
dest[0][0] = s[0]; dest[0][0] = s[0];

View File

@@ -21,6 +21,7 @@
CGLM_INLINE void glm_mat3_transpose_to(mat3 m, mat3 dest); CGLM_INLINE void glm_mat3_transpose_to(mat3 m, mat3 dest);
CGLM_INLINE void glm_mat3_transpose(mat3 m); CGLM_INLINE void glm_mat3_transpose(mat3 m);
CGLM_INLINE void glm_mat3_mulv(mat3 m, vec3 v, vec3 dest); CGLM_INLINE void glm_mat3_mulv(mat3 m, vec3 v, vec3 dest);
CGLM_INLINE float glm_mat3_trace(mat3 m);
CGLM_INLINE void glm_mat3_scale(mat3 m, float s); CGLM_INLINE void glm_mat3_scale(mat3 m, float s);
CGLM_INLINE float glm_mat3_det(mat3 mat); CGLM_INLINE float glm_mat3_det(mat3 mat);
CGLM_INLINE void glm_mat3_inv(mat3 mat, mat3 dest); CGLM_INLINE void glm_mat3_inv(mat3 mat, mat3 dest);
@@ -207,6 +208,18 @@ glm_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
dest[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2]; dest[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2];
} }
/*!
* @brief trace of matrix
*
* sum of the elements on the main diagonal from upper left to the lower right
*
* @param[in] m matrix
*/
CGLM_INLINE
float
glm_mat3_trace(mat3 m) {
return m[0][0] + m[1][1] + m[2][2];
}
/*! /*!
* @brief convert mat3 to quaternion * @brief convert mat3 to quaternion

View File

@@ -29,6 +29,8 @@
CGLM_INLINE void glm_mat4_mulN(mat4 *matrices[], int len, mat4 dest); CGLM_INLINE void glm_mat4_mulN(mat4 *matrices[], int len, mat4 dest);
CGLM_INLINE void glm_mat4_mulv(mat4 m, vec4 v, vec4 dest); CGLM_INLINE void glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
CGLM_INLINE void glm_mat4_mulv3(mat4 m, vec3 v, vec3 dest); CGLM_INLINE void glm_mat4_mulv3(mat4 m, vec3 v, vec3 dest);
CGLM_INLINE float glm_mat4_trace(mat4 m);
CGLM_INLINE float glm_mat4_trace3(mat4 m);
CGLM_INLINE void glm_mat4_transpose_to(mat4 m, mat4 dest); CGLM_INLINE void glm_mat4_transpose_to(mat4 m, mat4 dest);
CGLM_INLINE void glm_mat4_transpose(mat4 m); CGLM_INLINE void glm_mat4_transpose(mat4 m);
CGLM_INLINE void glm_mat4_scale_p(mat4 m, float s); CGLM_INLINE void glm_mat4_scale_p(mat4 m, float s);
@@ -338,6 +340,32 @@ glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
#endif #endif
} }
/*!
* @brief trace of matrix
*
* sum of the elements on the main diagonal from upper left to the lower right
*
* @param[in] m matrix
*/
CGLM_INLINE
float
glm_mat4_trace(mat4 m) {
return m[0][0] + m[1][1] + m[2][2] + m[3][3];
}
/*!
* @brief trace of matrix (rotation part)
*
* sum of the elements on the main diagonal from upper left to the lower right
*
* @param[in] m matrix
*/
CGLM_INLINE
float
glm_mat4_trace3(mat4 m) {
return m[0][0] + m[1][1] + m[2][2];
}
/*! /*!
* @brief convert mat4's rotation part to quaternion * @brief convert mat4's rotation part to quaternion
* *
@@ -476,7 +504,9 @@ glm_mat4_scale_p(mat4 m, float s) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_scale(mat4 m, float s) { glm_mat4_scale(mat4 m, float s) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #ifdef __AVX__
glm_mat4_scale_avx(m, s);
#elif defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_scale_sse2(m, s); glm_mat4_scale_sse2(m, s);
#else #else
glm_mat4_scale_p(m, s); glm_mat4_scale_p(m, s);
@@ -526,7 +556,9 @@ glm_mat4_det(mat4 mat) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_inv(mat4 mat, mat4 dest) { glm_mat4_inv(mat4 mat, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #ifdef __AVX__
glm_mat4_inv_avx(mat, dest);
#elif defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_inv_sse2(mat, dest); glm_mat4_inv_sse2(mat, dest);
#else #else
float t[6]; float t[6];
@@ -587,7 +619,9 @@ glm_mat4_inv(mat4 mat, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_inv_fast(mat4 mat, mat4 dest) { glm_mat4_inv_fast(mat4 mat, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #ifdef __AVX__
glm_mat4_inv_fast_avx(mat, dest);
#elif defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_inv_fast_sse2(mat, dest); glm_mat4_inv_fast_sse2(mat, dest);
#else #else
glm_mat4_inv(mat, dest); glm_mat4_inv(mat, dest);

View File

@@ -14,31 +14,47 @@
#include <immintrin.h> #include <immintrin.h>
CGLM_INLINE
void
glm_mat4_scale_avx(mat4 m, float s) {
__m256 y0;
y0 = _mm256_set1_ps(s);
glmm_store256(m[0], _mm256_mul_ps(y0, glmm_load256(m[0])));
glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2])));
}
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) { glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */ /* D = R * L (Column-Major) */
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; __m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
__m256i yi0, yi1, yi2, yi3;
y0 = glmm_load256(m2[0]); /* h g f e d c b a */ y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = glmm_load256(m2[2]); /* p o n m l k j i */ y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = glmm_load256(m1[0]); /* h g f e d c b a */ y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */ y3 = glmm_load256(m1[2]); /* p o n m l k j i */
/* 0x03: 0b00000011 */ /* 0x03: 0b00000011 */
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */ y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */ y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
yi0 = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0);
yi1 = _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2);
yi2 = _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1);
yi3 = _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3);
/* f f f f a a a a */ /* f f f f a a a a */
/* h h h h c c c c */ /* h h h h c c c c */
/* e e e e b b b b */ /* e e e e b b b b */
/* g g g g d d d d */ /* g g g g d d d d */
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0)); y6 = _mm256_permutevar_ps(y0, yi0);
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2)); y7 = _mm256_permutevar_ps(y0, yi1);
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y0, yi2);
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); y9 = _mm256_permutevar_ps(y0, yi3);
glmm_store256(dest[0], glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
@@ -50,10 +66,10 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
/* p p p p k k k k */ /* p p p p k k k k */
/* m m m m j j j j */ /* m m m m j j j j */
/* o o o o l l l l */ /* o o o o l l l l */
y6 = _mm256_permutevar_ps(y1, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0)); y6 = _mm256_permutevar_ps(y1, yi0);
y7 = _mm256_permutevar_ps(y1, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2)); y7 = _mm256_permutevar_ps(y1, yi1);
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y1, yi2);
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); y9 = _mm256_permutevar_ps(y1, yi3);
glmm_store256(dest[2], glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
@@ -62,5 +78,365 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
_mm256_mul_ps(y5, y9)))); _mm256_mul_ps(y5, y9))));
} }
CGLM_INLINE
void
glm_mat4_inv_avx(mat4 mat, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13;
__m256 yt0, yt1, yt2;
__m256 t0, t1, t2;
__m256 r1, r2;
__m256 flpsign;
__m256i yi1, yi2, yi3;
y0 = glmm_load256(mat[0]); /* h g f e d c b a */
y1 = glmm_load256(mat[2]); /* p o n m l k j i */
y2 = _mm256_permute2f128_ps(y1, y1, 0x00); /* l k j i l k j i */
y3 = _mm256_permute2f128_ps(y1, y1, 0x11); /* p o n m p o n m */
y4 = _mm256_permute2f128_ps(y0, y0, 0x03); /* d c b a h g f e */
y13 = _mm256_permute2f128_ps(y4, y4, 0x00); /* h g f e h g f e */
yi1 = _mm256_set_epi32(0, 0, 0, 0, 0, 1, 1, 2);
yi2 = _mm256_set_epi32(1, 1, 1, 2, 3, 2, 3, 3);
flpsign = _mm256_set_ps(0.f, -0.f, 0.f, -0.f, -0.f, 0.f, -0.f, 0.f);
/* i i i i i j j k */
/* n n n o p o p p */
/* m m m m m n n o */
/* j j j k l k l l */
/* e e e e e f f g */
/* f f f g h g h h */
y5 = _mm256_permutevar_ps(y2, yi1);
y6 = _mm256_permutevar_ps(y3, yi2);
y7 = _mm256_permutevar_ps(y3, yi1);
y8 = _mm256_permutevar_ps(y2, yi2);
y2 = _mm256_permutevar_ps(y13, yi1);
y3 = _mm256_permutevar_ps(y13, yi2);
yi1 = _mm256_set_epi32(2, 1, 0, 0, 2, 1, 0, 0);
yi2 = _mm256_set_epi32(2, 1, 1, 0, 2, 1, 1, 0);
yi3 = _mm256_set_epi32(3, 3, 2, 0, 3, 3, 2, 0);
/*
t0[0] = k * p - o * l; t1[0] = g * p - o * h; t2[0] = g * l - k * h;
t0[1] = j * p - n * l; t1[1] = f * p - n * h; t2[1] = f * l - j * h;
t0[2] = j * o - n * k; t1[2] = f * o - n * g; t2[2] = f * k - j * g;
t0[3] = i * p - m * l; t1[3] = e * p - m * h; t2[3] = e * l - i * h;
t0[4] = i * o - m * k; t1[4] = e * o - m * g; t2[4] = e * k - i * g;
t0[5] = i * n - m * j; t1[5] = e * n - m * f; t2[5] = e * j - i * f;
*/
yt0 = _mm256_sub_ps(_mm256_mul_ps(y5, y6), _mm256_mul_ps(y7, y8));
yt1 = _mm256_sub_ps(_mm256_mul_ps(y2, y6), _mm256_mul_ps(y7, y3));
yt2 = _mm256_sub_ps(_mm256_mul_ps(y2, y8), _mm256_mul_ps(y5, y3));
/* t3 t2 t1 t0 t3 t2 t1 t0 */
/* t5 t5 t5 t4 t5 t5 t5 t4 */
y9 = _mm256_permute2f128_ps(yt0, yt0, 0x00);
y10 = _mm256_permute2f128_ps(yt0, yt0, 0x11);
//
/* t2 t1 t0 t0 t2 t1 t0 t0 */
t0 = _mm256_permutevar_ps(y9, yi1);
/* t4 t3 t3 t1 t4 t3 t3 t1 */
y11 = _mm256_shuffle_ps(y9, y10, 0x4D);
y12 = _mm256_permutevar_ps(y11, yi2);
t1 = _mm256_permute2f128_ps(y12, y9, 0x00);
/* t5 t5 t4 t2 t5 t5 t4 t2 */
y11 = _mm256_shuffle_ps(y9, y10, 0x4A);
y12 = _mm256_permutevar_ps(y11, yi3);
t2 = _mm256_permute2f128_ps(y12, y12, 0x00);
/* a a a b e e e f */
/* b b c c f f g g */
/* c d d d g h h h */
y9 = _mm256_permute_ps(y4, 0x01);
y10 = _mm256_permute_ps(y4, 0x5A);
y11 = _mm256_permute_ps(y4, 0xBF);
/*
dest[0][0] = f * t[0] - g * t[1] + h * t[2];
dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]);
dest[2][0] = e * t[1] - f * t[3] + h * t[5];
dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]);
dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]);
dest[1][1] = a * t[0] - c * t[3] + d * t[4];
dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]);
dest[3][1] = a * t[2] - b * t[4] + c * t[5];
*/
r1 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y9, t0),
_mm256_mul_ps(y10, t1)),
_mm256_mul_ps(y11, t2)),
flpsign);
/* d c b a d c b a */
y2 = _mm256_permute2f128_ps(y0, y0, 0x0);
/* a a a b a a a b */
/* b b c c b b c c */
/* c d d d c d d d */
y3 = _mm256_permutevar_ps(y2, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1));
y4 = _mm256_permutevar_ps(y2, _mm256_set_epi32(1, 1, 2, 2, 1, 1, 2, 2));
y5 = _mm256_permutevar_ps(y2, _mm256_set_epi32(2, 3, 3, 3, 2, 3, 3, 3));
/* t2[3] t2[2] t2[1] t2[0] t1[3] t1[2] t1[1] t1[0] */
/* t2[5] t2[5] t2[5] t2[4] t1[5] t1[5] t1[5] t1[4] */
y6 = _mm256_permute2f128_ps(yt1, yt2, 0x20);
y7 = _mm256_permute2f128_ps(yt1, yt2, 0x31);
/* t2[2] t2[1] t2[0] t2[0] t1[2] t1[1] t1[0] t1[0] */
t0 = _mm256_permutevar_ps(y6, yi1);
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
y11 = _mm256_shuffle_ps(y6, y7, 0x4D);
t1 = _mm256_permutevar_ps(y11, yi2);
/* t2[5] t2[5] t2[4] t2[2] t1[5] t1[5] t1[4] t1[2] */
y11 = _mm256_shuffle_ps(y6, y7, 0x4A);
t2 = _mm256_permutevar_ps(y11, yi3);
/*
dest[0][2] = b * t1[0] - c * t1[1] + d * t1[2];
dest[1][2] =-(a * t1[0] - c * t1[3] + d * t1[4]);
dest[2][2] = a * t1[1] - b * t1[3] + d * t1[5];
dest[3][2] =-(a * t1[2] - b * t1[4] + c * t1[5]);
dest[0][3] =-(b * t2[0] - c * t2[1] + d * t2[2]);
dest[1][3] = a * t2[0] - c * t2[3] + d * t2[4];
dest[2][3] =-(a * t2[1] - b * t2[3] + d * t2[5]);
dest[3][3] = a * t2[2] - b * t2[4] + c * t2[5];
*/
r2 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y3, t0),
_mm256_mul_ps(y4, t1)),
_mm256_mul_ps(y5, t2)),
flpsign);
/* determinant */
y4 = _mm256_mul_ps(y0, r1);
y4 = _mm256_permute2f128_ps(y4, y4, 0x30);
y4 = _mm256_dp_ps(y0, r1, 0xff);
y5 = _mm256_div_ps(_mm256_set1_ps(1.0f), y4);
r1 = _mm256_mul_ps(r1, y5);
r2 = _mm256_mul_ps(r2, y5);
/* transpose */
/* d c b a h g f e */
/* l k j i p o n m */
y0 = _mm256_permute2f128_ps(r1, r1, 0x03);
y1 = _mm256_permute2f128_ps(r2, r2, 0x03);
/* b a f e f e b a */
/* j i n m n m j i */
/* i m a e m i e a */
/* j n b f n j f b */
/* n j f b m i e a */
y2 = _mm256_shuffle_ps(r1, y0, 0x44);
y3 = _mm256_shuffle_ps(r2, y1, 0x44);
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
y6 = _mm256_permute2f128_ps(y4, y5, 0x20);
/* d c h g h g d c */
/* l k p o p o l k */
/* k o c g o k g c */
/* l p d h p l h d */
/* p l h d o k g c */
y2 = _mm256_shuffle_ps(r1, y0, 0xEE);
y3 = _mm256_shuffle_ps(r2, y1, 0xEE);
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
y7 = _mm256_permute2f128_ps(y4, y5, 0x20);
glmm_store256(dest[0], y6);
glmm_store256(dest[2], y7);
}
CGLM_INLINE
void
glm_mat4_inv_fast_avx(mat4 mat, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13;
__m256 yt0, yt1, yt2;
__m256 t0, t1, t2;
__m256 r1, r2;
__m256 flpsign;
__m256i yi1, yi2, yi3;
y0 = glmm_load256(mat[0]); /* h g f e d c b a */
y1 = glmm_load256(mat[2]); /* p o n m l k j i */
y2 = _mm256_permute2f128_ps(y1, y1, 0x00); /* l k j i l k j i */
y3 = _mm256_permute2f128_ps(y1, y1, 0x11); /* p o n m p o n m */
y4 = _mm256_permute2f128_ps(y0, y0, 0x03); /* d c b a h g f e */
y13 = _mm256_permute2f128_ps(y4, y4, 0x00); /* h g f e h g f e */
yi1 = _mm256_set_epi32(0, 0, 0, 0, 0, 1, 1, 2);
yi2 = _mm256_set_epi32(1, 1, 1, 2, 3, 2, 3, 3);
flpsign = _mm256_set_ps(0.f, -0.f, 0.f, -0.f, -0.f, 0.f, -0.f, 0.f);
/* i i i i i j j k */
/* n n n o p o p p */
/* m m m m m n n o */
/* j j j k l k l l */
/* e e e e e f f g */
/* f f f g h g h h */
y5 = _mm256_permutevar_ps(y2, yi1);
y6 = _mm256_permutevar_ps(y3, yi2);
y7 = _mm256_permutevar_ps(y3, yi1);
y8 = _mm256_permutevar_ps(y2, yi2);
y2 = _mm256_permutevar_ps(y13, yi1);
y3 = _mm256_permutevar_ps(y13, yi2);
yi1 = _mm256_set_epi32(2, 1, 0, 0, 2, 1, 0, 0);
yi2 = _mm256_set_epi32(2, 1, 1, 0, 2, 1, 1, 0);
yi3 = _mm256_set_epi32(3, 3, 2, 0, 3, 3, 2, 0);
/*
t0[0] = k * p - o * l; t1[0] = g * p - o * h; t2[0] = g * l - k * h;
t0[1] = j * p - n * l; t1[1] = f * p - n * h; t2[1] = f * l - j * h;
t0[2] = j * o - n * k; t1[2] = f * o - n * g; t2[2] = f * k - j * g;
t0[3] = i * p - m * l; t1[3] = e * p - m * h; t2[3] = e * l - i * h;
t0[4] = i * o - m * k; t1[4] = e * o - m * g; t2[4] = e * k - i * g;
t0[5] = i * n - m * j; t1[5] = e * n - m * f; t2[5] = e * j - i * f;
*/
yt0 = _mm256_sub_ps(_mm256_mul_ps(y5, y6), _mm256_mul_ps(y7, y8));
yt1 = _mm256_sub_ps(_mm256_mul_ps(y2, y6), _mm256_mul_ps(y7, y3));
yt2 = _mm256_sub_ps(_mm256_mul_ps(y2, y8), _mm256_mul_ps(y5, y3));
/* t3 t2 t1 t0 t3 t2 t1 t0 */
/* t5 t5 t5 t4 t5 t5 t5 t4 */
y9 = _mm256_permute2f128_ps(yt0, yt0, 0x00);
y10 = _mm256_permute2f128_ps(yt0, yt0, 0x11);
/* t2 t1 t0 t0 t2 t1 t0 t0 */
t0 = _mm256_permutevar_ps(y9, yi1);
/* t4 t3 t3 t1 t4 t3 t3 t1 */
y11 = _mm256_shuffle_ps(y9, y10, 0x4D);
y12 = _mm256_permutevar_ps(y11, yi2);
t1 = _mm256_permute2f128_ps(y12, y9, 0x00);
/* t5 t5 t4 t2 t5 t5 t4 t2 */
y11 = _mm256_shuffle_ps(y9, y10, 0x4A);
y12 = _mm256_permutevar_ps(y11, yi3);
t2 = _mm256_permute2f128_ps(y12, y12, 0x00);
/* a a a b e e e f */
/* b b c c f f g g */
/* c d d d g h h h */
y9 = _mm256_permute_ps(y4, 0x01);
y10 = _mm256_permute_ps(y4, 0x5A);
y11 = _mm256_permute_ps(y4, 0xBF);
/*
dest[0][0] = f * t[0] - g * t[1] + h * t[2];
dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]);
dest[2][0] = e * t[1] - f * t[3] + h * t[5];
dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]);
dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]);
dest[1][1] = a * t[0] - c * t[3] + d * t[4];
dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]);
dest[3][1] = a * t[2] - b * t[4] + c * t[5];
*/
r1 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y9, t0),
_mm256_mul_ps(y10, t1)),
_mm256_mul_ps(y11, t2)),
flpsign);
/* d c b a d c b a */
y2 = _mm256_permute2f128_ps(y0, y0, 0x0);
/* a a a b a a a b */
/* b b c c b b c c */
/* c d d d c d d d */
y3 = _mm256_permutevar_ps(y2, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1));
y4 = _mm256_permutevar_ps(y2, _mm256_set_epi32(1, 1, 2, 2, 1, 1, 2, 2));
y5 = _mm256_permutevar_ps(y2, _mm256_set_epi32(2, 3, 3, 3, 2, 3, 3, 3));
/* t2[3] t2[2] t2[1] t2[0] t1[3] t1[2] t1[1] t1[0] */
/* t2[5] t2[5] t2[5] t2[4] t1[5] t1[5] t1[5] t1[4] */
y6 = _mm256_permute2f128_ps(yt1, yt2, 0x20);
y7 = _mm256_permute2f128_ps(yt1, yt2, 0x31);
/* t2[2] t2[1] t2[0] t2[0] t1[2] t1[1] t1[0] t1[0] */
t0 = _mm256_permutevar_ps(y6, yi1);
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
y11 = _mm256_shuffle_ps(y6, y7, 0x4D);
t1 = _mm256_permutevar_ps(y11, yi2);
/* t2[5] t2[5] t2[4] t2[2] t1[5] t1[5] t1[4] t1[2] */
y11 = _mm256_shuffle_ps(y6, y7, 0x4A);
t2 = _mm256_permutevar_ps(y11, yi3);
/*
dest[0][2] = b * t1[0] - c * t1[1] + d * t1[2];
dest[1][2] =-(a * t1[0] - c * t1[3] + d * t1[4]);
dest[2][2] = a * t1[1] - b * t1[3] + d * t1[5];
dest[3][2] =-(a * t1[2] - b * t1[4] + c * t1[5]);
dest[0][3] =-(b * t2[0] - c * t2[1] + d * t2[2]);
dest[1][3] = a * t2[0] - c * t2[3] + d * t2[4];
dest[2][3] =-(a * t2[1] - b * t2[3] + d * t2[5]);
dest[3][3] = a * t2[2] - b * t2[4] + c * t2[5];
*/
r2 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y3, t0),
_mm256_mul_ps(y4, t1)),
_mm256_mul_ps(y5, t2)),
flpsign);
/* determinant */
y4 = _mm256_mul_ps(y0, r1);
y4 = _mm256_permute2f128_ps(y4, y4, 0x30);
y4 = _mm256_dp_ps(y0, r1, 0xff);
y5 = _mm256_rcp_ps(y4);
r1 = _mm256_mul_ps(r1, y5);
r2 = _mm256_mul_ps(r2, y5);
/* transpose */
/* d c b a h g f e */
/* l k j i p o n m */
y0 = _mm256_permute2f128_ps(r1, r1, 0x03);
y1 = _mm256_permute2f128_ps(r2, r2, 0x03);
/* b a f e f e b a */
/* j i n m n m j i */
/* i m a e m i e a */
/* j n b f n j f b */
/* n j f b m i e a */
y2 = _mm256_shuffle_ps(r1, y0, 0x44);
y3 = _mm256_shuffle_ps(r2, y1, 0x44);
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
y6 = _mm256_permute2f128_ps(y4, y5, 0x20);
/* d c h g h g d c */
/* l k p o p o l k */
/* k o c g o k g c */
/* l p d h p l h d */
/* p l h d o k g c */
y2 = _mm256_shuffle_ps(r1, y0, 0xEE);
y3 = _mm256_shuffle_ps(r2, y1, 0xEE);
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
y7 = _mm256_permute2f128_ps(y4, y5, 0x20);
glmm_store256(dest[0], y6);
glmm_store256(dest[2], y7);
}
#endif #endif
#endif /* cglm_mat_simd_avx_h */ #endif /* cglm_mat_simd_avx_h */

View File

@@ -10,12 +10,12 @@
#if defined(_MSC_VER) #if defined(_MSC_VER)
/* do not use alignment for older visual studio versions */ /* do not use alignment for older visual studio versions */
#if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */ # if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED # define CGLM_ALL_UNALIGNED
# define CGLM_ALIGN(X) /* no alignment */ # define CGLM_ALIGN(X) /* no alignment */
#else # else
# define CGLM_ALIGN(X) __declspec(align(X)) # define CGLM_ALIGN(X) __declspec(align(X))
#endif # endif
#else #else
# define CGLM_ALIGN(X) __attribute((aligned(X))) # define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif #endif
@@ -33,20 +33,18 @@
#endif #endif
typedef float vec2[2]; typedef float vec2[2];
typedef CGLM_ALIGN_IF(8) float vec3[3]; typedef float vec3[3];
typedef int ivec3[3]; typedef int ivec3[3];
typedef CGLM_ALIGN_IF(16) float vec4[4]; typedef CGLM_ALIGN_IF(16) float vec4[4];
typedef vec4 versor;
typedef vec3 mat3[3];
#ifdef __AVX__ #ifdef __AVX__
typedef CGLM_ALIGN_IF(32) vec3 mat3[3];
typedef CGLM_ALIGN_IF(32) vec4 mat4[4]; typedef CGLM_ALIGN_IF(32) vec4 mat4[4];
#else #else
typedef vec3 mat3[3];
typedef CGLM_ALIGN_IF(16) vec4 mat4[4]; typedef CGLM_ALIGN_IF(16) vec4 mat4[4];
#endif #endif
typedef vec4 versor;
#define GLM_E 2.71828182845904523536028747135266250 /* e */ #define GLM_E 2.71828182845904523536028747135266250 /* e */
#define GLM_LOG2E 1.44269504088896340735992468100189214 /* log2(e) */ #define GLM_LOG2E 1.44269504088896340735992468100189214 /* log2(e) */
#define GLM_LOG10E 0.434294481903251827651128918916605082 /* log10(e) */ #define GLM_LOG10E 0.434294481903251827651128918916605082 /* log10(e) */

View File

@@ -21,7 +21,6 @@
CGLM_INLINE void glm_vec3_zero(vec3 v); CGLM_INLINE void glm_vec3_zero(vec3 v);
CGLM_INLINE void glm_vec3_one(vec3 v); CGLM_INLINE void glm_vec3_one(vec3 v);
CGLM_INLINE float glm_vec3_dot(vec3 a, vec3 b); CGLM_INLINE float glm_vec3_dot(vec3 a, vec3 b);
CGLM_INLINE void glm_vec3_cross(vec3 a, vec3 b, vec3 d);
CGLM_INLINE float glm_vec3_norm2(vec3 v); CGLM_INLINE float glm_vec3_norm2(vec3 v);
CGLM_INLINE float glm_vec3_norm(vec3 v); CGLM_INLINE float glm_vec3_norm(vec3 v);
CGLM_INLINE void glm_vec3_add(vec3 a, vec3 b, vec3 dest); CGLM_INLINE void glm_vec3_add(vec3 a, vec3 b, vec3 dest);
@@ -47,6 +46,8 @@
CGLM_INLINE void glm_vec3_inv_to(vec3 v, vec3 dest); CGLM_INLINE void glm_vec3_inv_to(vec3 v, vec3 dest);
CGLM_INLINE void glm_vec3_normalize(vec3 v); CGLM_INLINE void glm_vec3_normalize(vec3 v);
CGLM_INLINE void glm_vec3_normalize_to(vec3 v, vec3 dest); CGLM_INLINE void glm_vec3_normalize_to(vec3 v, vec3 dest);
CGLM_INLINE void glm_vec3_cross(vec3 a, vec3 b, vec3 d);
CGLM_INLINE void glm_vec3_crossn(vec3 a, vec3 b, vec3 dest);
CGLM_INLINE float glm_vec3_distance(vec3 a, vec3 b); CGLM_INLINE float glm_vec3_distance(vec3 a, vec3 b);
CGLM_INLINE float glm_vec3_angle(vec3 a, vec3 b); CGLM_INLINE float glm_vec3_angle(vec3 a, vec3 b);
CGLM_INLINE void glm_vec3_rotate(vec3 v, float angle, vec3 axis); CGLM_INLINE void glm_vec3_rotate(vec3 v, float angle, vec3 axis);
@@ -166,22 +167,6 @@ glm_vec3_dot(vec3 a, vec3 b) {
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2]; return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
} }
/*!
* @brief vec3 cross product
*
* @param[in] a source 1
* @param[in] b source 2
* @param[out] d destination
*/
CGLM_INLINE
void
glm_vec3_cross(vec3 a, vec3 b, vec3 d) {
/* (u2.v3 - u3.v2, u3.v1 - u1.v3, u1.v2 - u2.v1) */
d[0] = a[1] * b[2] - a[2] * b[1];
d[1] = a[2] * b[0] - a[0] * b[2];
d[2] = a[0] * b[1] - a[1] * b[0];
}
/*! /*!
* @brief norm * norm (magnitude) of vec * @brief norm * norm (magnitude) of vec
* *
@@ -444,7 +429,7 @@ glm_vec3_maxadd(vec3 a, vec3 b, vec3 dest) {
* it applies += operator so dest must be initialized * it applies += operator so dest must be initialized
* *
* @param[in] a vector * @param[in] a vector
* @param[in] s scalar * @param[in] b scalar
* @param[out] dest dest += min(a, b) * @param[out] dest dest += min(a, b)
*/ */
CGLM_INLINE CGLM_INLINE
@@ -521,6 +506,36 @@ glm_vec3_normalize_to(vec3 v, vec3 dest) {
glm_vec3_scale(v, 1.0f / norm, dest); glm_vec3_scale(v, 1.0f / norm, dest);
} }
/*!
* @brief cross product of two vector (RH)
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest destination
*/
CGLM_INLINE
void
glm_vec3_cross(vec3 a, vec3 b, vec3 dest) {
/* (u2.v3 - u3.v2, u3.v1 - u1.v3, u1.v2 - u2.v1) */
dest[0] = a[1] * b[2] - a[2] * b[1];
dest[1] = a[2] * b[0] - a[0] * b[2];
dest[2] = a[0] * b[1] - a[1] * b[0];
}
/*!
* @brief cross product of two vector (RH) and normalize the result
*
* @param[in] a vector 1
* @param[in] b vector 2
* @param[out] dest destination
*/
CGLM_INLINE
void
glm_vec3_crossn(vec3 a, vec3 b, vec3 dest) {
glm_vec3_cross(a, b, dest);
glm_vec3_normalize(dest);
}
/*! /*!
* @brief angle betwen two vector * @brief angle betwen two vector
* *

View File

@@ -331,9 +331,9 @@ glm_vec4_subs(vec4 v, float s, vec4 dest) {
/*! /*!
* @brief multiply two vector (component-wise multiplication) * @brief multiply two vector (component-wise multiplication)
* *
* @param a vector1 * @param a vector1
* @param b vector2 * @param b vector2
* @param d dest = (a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3]) * @param dest dest = (a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3])
*/ */
CGLM_INLINE CGLM_INLINE
void void
@@ -426,7 +426,6 @@ glm_vec4_divs(vec4 v, float s, vec4 dest) {
#endif #endif
} }
/*! /*!
* @brief add two vectors and add result to sum * @brief add two vectors and add result to sum
* *
@@ -553,7 +552,7 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
* it applies += operator so dest must be initialized * it applies += operator so dest must be initialized
* *
* @param[in] a vector * @param[in] a vector
* @param[in] s scalar * @param[in] b scalar
* @param[out] dest dest += min(a, b) * @param[out] dest dest += min(a, b)
*/ */
CGLM_INLINE CGLM_INLINE

View File

@@ -10,6 +10,6 @@
#define CGLM_VERSION_MAJOR 0 #define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 5 #define CGLM_VERSION_MINOR 5
#define CGLM_VERSION_PATCH 0 #define CGLM_VERSION_PATCH 2
#endif /* cglm_version_h */ #endif /* cglm_version_h */

View File

@@ -88,6 +88,12 @@ glmc_perspective(float fovy,
dest); dest);
} }
CGLM_EXPORT
void
glmc_persp_move_far(mat4 proj, float deltaFar) {
glm_persp_move_far(proj, deltaFar);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_perspective_default(float aspect, mat4 dest) { glmc_perspective_default(float aspect, mat4 dest) {

View File

@@ -50,6 +50,12 @@ glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
glm_mat3_mulv(m, v, dest); glm_mat3_mulv(m, v, dest);
} }
CGLM_EXPORT
float
glmc_mat3_trace(mat3 m) {
return glm_mat3_trace(m);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat3_quat(mat3 m, versor dest) { glmc_mat3_quat(mat3 m, versor dest) {

View File

@@ -74,6 +74,18 @@ glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest) {
glm_mat4_mulv3(m, v, last, dest); glm_mat4_mulv3(m, v, last, dest);
} }
CGLM_EXPORT
float
glmc_mat4_trace(mat4 m) {
return glm_mat4_trace(m);
}
CGLM_EXPORT
float
glmc_mat4_trace3(mat4 m) {
return glm_mat4_trace3(m);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_quat(mat4 m, versor dest) { glmc_mat4_quat(mat4 m, versor dest) {

View File

@@ -40,8 +40,14 @@ glmc_vec3_dot(vec3 a, vec3 b) {
CGLM_EXPORT CGLM_EXPORT
void void
glmc_vec3_cross(vec3 a, vec3 b, vec3 d) { glmc_vec3_cross(vec3 a, vec3 b, vec3 dest) {
glm_vec3_cross(a, b, d); glm_vec3_cross(a, b, dest);
}
CGLM_EXPORT
void
glmc_vec3_crossn(vec3 a, vec3 b, vec3 dest) {
glm_vec3_crossn(a, b, dest);
} }
CGLM_EXPORT CGLM_EXPORT

View File

@@ -24,9 +24,9 @@ test_mat3(void **state) {
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
if (i == j) if (i == j)
assert_true(m3[i][j] == 1.0f); assert_true(glm_eq(m3[i][j], 1.0f));
else else
assert_true(m3[i][j] == 0.0f); assert_true(glm_eq(m3[i][j], 0.0f));
} }
} }

View File

@@ -24,9 +24,9 @@ test_mat4(void **state) {
for (i = 0; i < m; i++) { for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
if (i == j) if (i == j)
assert_true(m3[i][j] == 1.0f); assert_true(glm_eq(m3[i][j], 1.0f));
else else
assert_true(m3[i][j] == 0.0f); assert_true(glm_eq(m3[i][j], 0.0f));
} }
} }

View File

@@ -25,7 +25,7 @@ test_quat(void **state) {
/* 0. test identiy quat */ /* 0. test identiy quat */
glm_quat_identity(q4); glm_quat_identity(q4);
assert_true(glm_quat_real(q4) == cosf(glm_rad(0.0f) * 0.5f)); assert_true(glm_eq(glm_quat_real(q4), cosf(glm_rad(0.0f) * 0.5f)));
glm_quat_mat4(q4, rot1); glm_quat_mat4(q4, rot1);
test_assert_mat4_eq2(rot1, GLM_MAT4_IDENTITY, 0.000009); test_assert_mat4_eq2(rot1, GLM_MAT4_IDENTITY, 0.000009);
@@ -118,7 +118,7 @@ test_quat(void **state) {
/* 9. test imag, real */ /* 9. test imag, real */
/* 9.1 real */ /* 9.1 real */
assert_true(glm_quat_real(q4) == cosf(glm_rad(-90.0f) * 0.5f)); assert_true(glm_eq(glm_quat_real(q4), cosf(glm_rad(-90.0f) * 0.5f)));
/* 9.1 imag */ /* 9.1 imag */
glm_quat_imag(q4, imag); glm_quat_imag(q4, imag);