mirror of
https://github.com/recp/cglm.git
synced 2026-02-17 03:39:05 +00:00
Compare commits
16 Commits
vec-update
...
simd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01b93b0409 | ||
|
|
07e60bd098 | ||
|
|
e3d3cd8ab8 | ||
|
|
d17c99215d | ||
|
|
dc6eb492c1 | ||
|
|
7219b02d23 | ||
|
|
21834b4ffb | ||
|
|
2ef9c23a6c | ||
|
|
92605f845a | ||
|
|
b23d65bef5 | ||
|
|
2f632c6311 | ||
|
|
b47d148b81 | ||
|
|
a5bc2f710e | ||
|
|
9aebdc76b3 | ||
|
|
e9b51fc07a | ||
|
|
abfa355b84 |
68
README.md
68
README.md
@@ -24,6 +24,8 @@ you have the latest version
|
||||
- **[major change]** by starting v0.4.0, quaternions are stored as [x, y, z, w], it was [w, x, y, z] in v0.3.5 and earlier versions
|
||||
- **[api rename]** by starting v0.4.5, **glm_simd** functions are renamed to **glmm_**
|
||||
- **[new option]** by starting v0.4.5, you can disable alignment requirement, check options in docs.
|
||||
- **[major change]** by starting v0.5.0, vec3 functions use **glm_vec3_** namespace, it was **glm_vec_** until v0.5.0
|
||||
- **[major change]** by starting v0.5.1, built-in alignment is removed from **vec3** and **mat3** types
|
||||
|
||||
#### Note for C++ developers:
|
||||
If you don't aware about original GLM library yet, you may also want to look at:
|
||||
@@ -81,6 +83,7 @@ Currently *cglm* uses default clip space configuration (-1, 1) for camera functi
|
||||
- frustum (extract view frustum planes, corners...)
|
||||
- bounding box (AABB in Frustum (culling), crop, merge...)
|
||||
- project, unproject
|
||||
- and other...
|
||||
|
||||
<hr />
|
||||
|
||||
@@ -122,39 +125,6 @@ glm_mul(T, R, modelMat);
|
||||
glm_inv_tr(modelMat);
|
||||
```
|
||||
|
||||
## Contributors
|
||||
|
||||
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)].
|
||||
<a href="graphs/contributors"><img src="https://opencollective.com/cglm/contributors.svg?width=890&button=false" /></a>
|
||||
|
||||
|
||||
## Backers
|
||||
|
||||
Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/cglm#backer)]
|
||||
|
||||
<a href="https://opencollective.com/cglm#backers" target="_blank"><img src="https://opencollective.com/cglm/backers.svg?width=890"></a>
|
||||
|
||||
|
||||
## Sponsors
|
||||
|
||||
Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/cglm#sponsor)]
|
||||
|
||||
<a href="https://opencollective.com/cglm/sponsor/0/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/0/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/1/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/1/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/2/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/2/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/3/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/3/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/4/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/4/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/5/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/5/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/6/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/6/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/7/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/7/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/8/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/8/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/9/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/9/avatar.svg"></a>
|
||||
|
||||
|
||||
|
||||
## License
|
||||
MIT. check the LICENSE file
|
||||
|
||||
## Build
|
||||
|
||||
### Unix (Autotools)
|
||||
@@ -284,3 +254,35 @@ You can pass same way to another APIs e.g. Vulkan, DX...
|
||||
- [ ] Unaligned operations (e.g. `glm_umat4_mul`)
|
||||
- [x] Extra documentation
|
||||
- [ ] ARM Neon Arch (In Progress)
|
||||
|
||||
|
||||
## Contributors
|
||||
|
||||
This project exists thanks to all the people who contribute. [[Contribute](CONTRIBUTING.md)].
|
||||
<a href="graphs/contributors"><img src="https://opencollective.com/cglm/contributors.svg?width=890&button=false" /></a>
|
||||
|
||||
|
||||
## Backers
|
||||
|
||||
Thank you to all our backers! 🙏 [[Become a backer](https://opencollective.com/cglm#backer)]
|
||||
|
||||
<a href="https://opencollective.com/cglm#backers" target="_blank"><img src="https://opencollective.com/cglm/backers.svg?width=890"></a>
|
||||
|
||||
|
||||
## Sponsors
|
||||
|
||||
Support this project by becoming a sponsor. Your logo will show up here with a link to your website. [[Become a sponsor](https://opencollective.com/cglm#sponsor)]
|
||||
|
||||
<a href="https://opencollective.com/cglm/sponsor/0/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/0/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/1/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/1/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/2/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/2/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/3/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/3/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/4/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/4/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/5/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/5/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/6/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/6/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/7/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/7/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/8/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/8/avatar.svg"></a>
|
||||
<a href="https://opencollective.com/cglm/sponsor/9/website" target="_blank"><img src="https://opencollective.com/cglm/sponsor/9/avatar.svg"></a>
|
||||
|
||||
## License
|
||||
MIT. check the LICENSE file
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#*****************************************************************************
|
||||
|
||||
AC_PREREQ([2.69])
|
||||
AC_INIT([cglm], [0.5.0], [info@recp.me])
|
||||
AC_INIT([cglm], [0.5.2], [info@recp.me])
|
||||
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])
|
||||
|
||||
AC_CONFIG_MACRO_DIR([m4])
|
||||
|
||||
@@ -36,6 +36,7 @@ Functions:
|
||||
#. :c:func:`glm_ortho_default`
|
||||
#. :c:func:`glm_ortho_default_s`
|
||||
#. :c:func:`glm_perspective`
|
||||
#. :c:func:`glm_persp_move_far`
|
||||
#. :c:func:`glm_perspective_default`
|
||||
#. :c:func:`glm_perspective_resize`
|
||||
#. :c:func:`glm_lookat`
|
||||
@@ -145,6 +146,16 @@ Functions documentation
|
||||
| *[in]* **farVal** far clipping planes
|
||||
| *[out]* **dest** result matrix
|
||||
|
||||
.. c:function:: void glm_persp_move_far(mat4 proj, float deltaFar)
|
||||
|
||||
| extend perspective projection matrix's far distance
|
||||
|
||||
| this function does not guarantee far >= near, be aware of that!
|
||||
|
||||
Parameters:
|
||||
| *[in, out]* **proj** projection matrix to extend
|
||||
| *[in]* **deltaFar** distance from existing far (negative to shink)
|
||||
|
||||
.. c:function:: void glm_perspective_default(float aspect, mat4 dest)
|
||||
|
||||
| set up perspective projection matrix with default near/far
|
||||
|
||||
@@ -62,9 +62,9 @@ author = u'Recep Aslantas'
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = u'0.5.0'
|
||||
version = u'0.5.2'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = u'0.5.0'
|
||||
release = u'0.5.2'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
|
||||
@@ -29,6 +29,7 @@ Functions:
|
||||
#. :c:func:`glm_mat3_scale`
|
||||
#. :c:func:`glm_mat3_det`
|
||||
#. :c:func:`glm_mat3_inv`
|
||||
#. :c:func:`glm_mat3_trace`
|
||||
#. :c:func:`glm_mat3_swap_col`
|
||||
#. :c:func:`glm_mat3_swap_row`
|
||||
|
||||
@@ -133,6 +134,16 @@ Functions documentation
|
||||
| *[in]* **mat** matrix
|
||||
| *[out]* **dest** destination (inverse matrix)
|
||||
|
||||
.. c:function:: void glm_mat3_trace(mat3 m)
|
||||
|
||||
| sum of the elements on the main diagonal from upper left to the lower right
|
||||
|
||||
Parameters:
|
||||
| *[in]* **m** matrix
|
||||
|
||||
Returns:
|
||||
trace of matrix
|
||||
|
||||
.. c:function:: void glm_mat3_swap_col(mat3 mat, int col1, int col2)
|
||||
|
||||
swap two matrix columns
|
||||
|
||||
@@ -33,6 +33,8 @@ Functions:
|
||||
#. :c:func:`glm_mat4_mulN`
|
||||
#. :c:func:`glm_mat4_mulv`
|
||||
#. :c:func:`glm_mat4_mulv3`
|
||||
#. :c:func:`glm_mat3_trace`
|
||||
#. :c:func:`glm_mat3_trace3`
|
||||
#. :c:func:`glm_mat4_quat`
|
||||
#. :c:func:`glm_mat4_transpose_to`
|
||||
#. :c:func:`glm_mat4_transpose`
|
||||
@@ -156,6 +158,27 @@ Functions documentation
|
||||
| *[in]* **v** vec3 (right, column vector)
|
||||
| *[out]* **dest** vec3 (result, column vector)
|
||||
|
||||
.. c:function:: void glm_mat4_trace(mat4 m)
|
||||
|
||||
| sum of the elements on the main diagonal from upper left to the lower right
|
||||
|
||||
Parameters:
|
||||
| *[in]* **m** matrix
|
||||
|
||||
Returns:
|
||||
trace of matrix
|
||||
|
||||
.. c:function:: void glm_mat4_trace3(mat4 m)
|
||||
|
||||
| trace of matrix (rotation part)
|
||||
| sum of the elements on the main diagonal from upper left to the lower right
|
||||
|
||||
Parameters:
|
||||
| *[in]* **m** matrix
|
||||
|
||||
Returns:
|
||||
trace of matrix
|
||||
|
||||
.. c:function:: void glm_mat4_quat(mat4 m, versor dest)
|
||||
|
||||
convert mat4's rotation part to quaternion
|
||||
|
||||
@@ -39,7 +39,6 @@ Functions:
|
||||
#. :c:func:`glm_vec3_zero`
|
||||
#. :c:func:`glm_vec3_one`
|
||||
#. :c:func:`glm_vec3_dot`
|
||||
#. :c:func:`glm_vec3_cross`
|
||||
#. :c:func:`glm_vec3_norm2`
|
||||
#. :c:func:`glm_vec3_norm`
|
||||
#. :c:func:`glm_vec3_add`
|
||||
@@ -65,6 +64,8 @@ Functions:
|
||||
#. :c:func:`glm_vec3_negate_to`
|
||||
#. :c:func:`glm_vec3_normalize`
|
||||
#. :c:func:`glm_vec3_normalize_to`
|
||||
#. :c:func:`glm_vec3_cross`
|
||||
#. :c:func:`glm_vec3_crossn`
|
||||
#. :c:func:`glm_vec3_distance2`
|
||||
#. :c:func:`glm_vec3_distance`
|
||||
#. :c:func:`glm_vec3_angle`
|
||||
@@ -125,12 +126,21 @@ Functions documentation
|
||||
|
||||
.. c:function:: void glm_vec3_cross(vec3 a, vec3 b, vec3 d)
|
||||
|
||||
cross product
|
||||
cross product of two vector (RH)
|
||||
|
||||
Parameters:
|
||||
| *[in]* **a** source 1
|
||||
| *[in]* **b** source 2
|
||||
| *[out]* **d** destination
|
||||
| *[in]* **a** vector 1
|
||||
| *[in]* **b** vector 2
|
||||
| *[out]* **dest** destination
|
||||
|
||||
.. c:function:: void glm_vec3_crossn(vec3 a, vec3 b, vec3 dest)
|
||||
|
||||
cross product of two vector (RH) and normalize the result
|
||||
|
||||
Parameters:
|
||||
| *[in]* **a** vector 1
|
||||
| *[in]* **b** vector 2
|
||||
| *[out]* **dest** destination
|
||||
|
||||
.. c:function:: float glm_vec3_norm2(vec3 v)
|
||||
|
||||
|
||||
@@ -61,6 +61,10 @@ glmc_perspective(float fovy,
|
||||
float farVal,
|
||||
mat4 dest);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_persp_move_far(mat4 proj, float deltaFar);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_perspective_default(float aspect, mat4 dest);
|
||||
|
||||
@@ -44,6 +44,10 @@ CGLM_EXPORT
|
||||
void
|
||||
glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest);
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat3_trace(mat3 m);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_mat3_quat(mat3 m, versor dest);
|
||||
|
||||
@@ -61,6 +61,14 @@ CGLM_EXPORT
|
||||
void
|
||||
glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest);
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat4_trace(mat4 m);
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat4_trace3(mat4 m);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_mat4_quat(mat4 m, versor dest);
|
||||
|
||||
@@ -42,7 +42,11 @@ glmc_vec3_dot(vec3 a, vec3 b);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_vec3_cross(vec3 a, vec3 b, vec3 d);
|
||||
glmc_vec3_cross(vec3 a, vec3 b, vec3 dest);
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_vec3_crossn(vec3 a, vec3 b, vec3 dest);
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
|
||||
@@ -271,6 +271,30 @@ glm_perspective(float fovy,
|
||||
dest[3][2] = 2.0f * nearVal * farVal * fn;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief extend perspective projection matrix's far distance
|
||||
*
|
||||
* this function does not guarantee far >= near, be aware of that!
|
||||
*
|
||||
* @param[in, out] proj projection matrix to extend
|
||||
* @param[in] deltaFar distance from existing far (negative to shink)
|
||||
*/
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_persp_move_far(mat4 proj, float deltaFar) {
|
||||
float fn, farVal, nearVal, p22, p32;
|
||||
|
||||
p22 = proj[2][2];
|
||||
p32 = proj[3][2];
|
||||
|
||||
nearVal = p32 / (p22 - 1.0f);
|
||||
farVal = p32 / (p22 + 1.0f) + deltaFar;
|
||||
fn = 1.0f / (nearVal - farVal);
|
||||
|
||||
proj[2][2] = (nearVal + farVal) * fn;
|
||||
proj[3][2] = 2.0f * nearVal * farVal * fn;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief set up perspective projection matrix with default near/far
|
||||
* and angle values
|
||||
@@ -323,9 +347,7 @@ glm_lookat(vec3 eye,
|
||||
glm_vec3_sub(center, eye, f);
|
||||
glm_vec3_normalize(f);
|
||||
|
||||
glm_vec3_cross(f, up, s);
|
||||
glm_vec3_normalize(s);
|
||||
|
||||
glm_vec3_crossn(f, up, s);
|
||||
glm_vec3_cross(s, f, u);
|
||||
|
||||
dest[0][0] = s[0];
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
CGLM_INLINE void glm_mat3_transpose_to(mat3 m, mat3 dest);
|
||||
CGLM_INLINE void glm_mat3_transpose(mat3 m);
|
||||
CGLM_INLINE void glm_mat3_mulv(mat3 m, vec3 v, vec3 dest);
|
||||
CGLM_INLINE float glm_mat3_trace(mat3 m);
|
||||
CGLM_INLINE void glm_mat3_scale(mat3 m, float s);
|
||||
CGLM_INLINE float glm_mat3_det(mat3 mat);
|
||||
CGLM_INLINE void glm_mat3_inv(mat3 mat, mat3 dest);
|
||||
@@ -207,6 +208,18 @@ glm_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
|
||||
dest[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief trace of matrix
|
||||
*
|
||||
* sum of the elements on the main diagonal from upper left to the lower right
|
||||
*
|
||||
* @param[in] m matrix
|
||||
*/
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_mat3_trace(mat3 m) {
|
||||
return m[0][0] + m[1][1] + m[2][2];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief convert mat3 to quaternion
|
||||
|
||||
@@ -29,6 +29,8 @@
|
||||
CGLM_INLINE void glm_mat4_mulN(mat4 *matrices[], int len, mat4 dest);
|
||||
CGLM_INLINE void glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
|
||||
CGLM_INLINE void glm_mat4_mulv3(mat4 m, vec3 v, vec3 dest);
|
||||
CGLM_INLINE float glm_mat4_trace(mat4 m);
|
||||
CGLM_INLINE float glm_mat4_trace3(mat4 m);
|
||||
CGLM_INLINE void glm_mat4_transpose_to(mat4 m, mat4 dest);
|
||||
CGLM_INLINE void glm_mat4_transpose(mat4 m);
|
||||
CGLM_INLINE void glm_mat4_scale_p(mat4 m, float s);
|
||||
@@ -338,6 +340,32 @@ glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief trace of matrix
|
||||
*
|
||||
* sum of the elements on the main diagonal from upper left to the lower right
|
||||
*
|
||||
* @param[in] m matrix
|
||||
*/
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_mat4_trace(mat4 m) {
|
||||
return m[0][0] + m[1][1] + m[2][2] + m[3][3];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief trace of matrix (rotation part)
|
||||
*
|
||||
* sum of the elements on the main diagonal from upper left to the lower right
|
||||
*
|
||||
* @param[in] m matrix
|
||||
*/
|
||||
CGLM_INLINE
|
||||
float
|
||||
glm_mat4_trace3(mat4 m) {
|
||||
return m[0][0] + m[1][1] + m[2][2];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief convert mat4's rotation part to quaternion
|
||||
*
|
||||
@@ -476,7 +504,9 @@ glm_mat4_scale_p(mat4 m, float s) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_scale(mat4 m, float s) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
#ifdef __AVX__
|
||||
glm_mat4_scale_avx(m, s);
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_mat4_scale_sse2(m, s);
|
||||
#else
|
||||
glm_mat4_scale_p(m, s);
|
||||
@@ -526,7 +556,9 @@ glm_mat4_det(mat4 mat) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_inv(mat4 mat, mat4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
#ifdef __AVX__
|
||||
glm_mat4_inv_avx(mat, dest);
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_mat4_inv_sse2(mat, dest);
|
||||
#else
|
||||
float t[6];
|
||||
@@ -587,7 +619,9 @@ glm_mat4_inv(mat4 mat, mat4 dest) {
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_inv_fast(mat4 mat, mat4 dest) {
|
||||
#if defined( __SSE__ ) || defined( __SSE2__ )
|
||||
#ifdef __AVX__
|
||||
glm_mat4_inv_fast_avx(mat, dest);
|
||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||
glm_mat4_inv_fast_sse2(mat, dest);
|
||||
#else
|
||||
glm_mat4_inv(mat, dest);
|
||||
|
||||
@@ -14,12 +14,23 @@
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_scale_avx(mat4 m, float s) {
|
||||
__m256 y0;
|
||||
y0 = _mm256_set1_ps(s);
|
||||
|
||||
glmm_store256(m[0], _mm256_mul_ps(y0, glmm_load256(m[0])));
|
||||
glmm_store256(m[2], _mm256_mul_ps(y0, glmm_load256(m[2])));
|
||||
}
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
/* D = R * L (Column-Major) */
|
||||
|
||||
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
|
||||
__m256i yi0, yi1, yi2, yi3;
|
||||
|
||||
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
|
||||
@@ -31,14 +42,19 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
y4 = _mm256_permute2f128_ps(y2, y2, 0x03); /* d c b a h g f e */
|
||||
y5 = _mm256_permute2f128_ps(y3, y3, 0x03); /* l k j i p o n m */
|
||||
|
||||
yi0 = _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0);
|
||||
yi1 = _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2);
|
||||
yi2 = _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1);
|
||||
yi3 = _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3);
|
||||
|
||||
/* f f f f a a a a */
|
||||
/* h h h h c c c c */
|
||||
/* e e e e b b b b */
|
||||
/* g g g g d d d d */
|
||||
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y0, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
y6 = _mm256_permutevar_ps(y0, yi0);
|
||||
y7 = _mm256_permutevar_ps(y0, yi1);
|
||||
y8 = _mm256_permutevar_ps(y0, yi2);
|
||||
y9 = _mm256_permutevar_ps(y0, yi3);
|
||||
|
||||
glmm_store256(dest[0],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
@@ -50,10 +66,10 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
/* p p p p k k k k */
|
||||
/* m m m m j j j j */
|
||||
/* o o o o l l l l */
|
||||
y6 = _mm256_permutevar_ps(y1, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
|
||||
y7 = _mm256_permutevar_ps(y1, _mm256_set_epi32(3, 3, 3, 3, 2, 2, 2, 2));
|
||||
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
|
||||
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
|
||||
y6 = _mm256_permutevar_ps(y1, yi0);
|
||||
y7 = _mm256_permutevar_ps(y1, yi1);
|
||||
y8 = _mm256_permutevar_ps(y1, yi2);
|
||||
y9 = _mm256_permutevar_ps(y1, yi3);
|
||||
|
||||
glmm_store256(dest[2],
|
||||
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
|
||||
@@ -62,5 +78,365 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
|
||||
_mm256_mul_ps(y5, y9))));
|
||||
}
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_inv_avx(mat4 mat, mat4 dest) {
|
||||
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13;
|
||||
__m256 yt0, yt1, yt2;
|
||||
__m256 t0, t1, t2;
|
||||
__m256 r1, r2;
|
||||
__m256 flpsign;
|
||||
__m256i yi1, yi2, yi3;
|
||||
|
||||
y0 = glmm_load256(mat[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(mat[2]); /* p o n m l k j i */
|
||||
|
||||
y2 = _mm256_permute2f128_ps(y1, y1, 0x00); /* l k j i l k j i */
|
||||
y3 = _mm256_permute2f128_ps(y1, y1, 0x11); /* p o n m p o n m */
|
||||
y4 = _mm256_permute2f128_ps(y0, y0, 0x03); /* d c b a h g f e */
|
||||
y13 = _mm256_permute2f128_ps(y4, y4, 0x00); /* h g f e h g f e */
|
||||
|
||||
yi1 = _mm256_set_epi32(0, 0, 0, 0, 0, 1, 1, 2);
|
||||
yi2 = _mm256_set_epi32(1, 1, 1, 2, 3, 2, 3, 3);
|
||||
flpsign = _mm256_set_ps(0.f, -0.f, 0.f, -0.f, -0.f, 0.f, -0.f, 0.f);
|
||||
|
||||
/* i i i i i j j k */
|
||||
/* n n n o p o p p */
|
||||
/* m m m m m n n o */
|
||||
/* j j j k l k l l */
|
||||
/* e e e e e f f g */
|
||||
/* f f f g h g h h */
|
||||
y5 = _mm256_permutevar_ps(y2, yi1);
|
||||
y6 = _mm256_permutevar_ps(y3, yi2);
|
||||
y7 = _mm256_permutevar_ps(y3, yi1);
|
||||
y8 = _mm256_permutevar_ps(y2, yi2);
|
||||
y2 = _mm256_permutevar_ps(y13, yi1);
|
||||
y3 = _mm256_permutevar_ps(y13, yi2);
|
||||
|
||||
yi1 = _mm256_set_epi32(2, 1, 0, 0, 2, 1, 0, 0);
|
||||
yi2 = _mm256_set_epi32(2, 1, 1, 0, 2, 1, 1, 0);
|
||||
yi3 = _mm256_set_epi32(3, 3, 2, 0, 3, 3, 2, 0);
|
||||
|
||||
/*
|
||||
t0[0] = k * p - o * l; t1[0] = g * p - o * h; t2[0] = g * l - k * h;
|
||||
t0[1] = j * p - n * l; t1[1] = f * p - n * h; t2[1] = f * l - j * h;
|
||||
t0[2] = j * o - n * k; t1[2] = f * o - n * g; t2[2] = f * k - j * g;
|
||||
t0[3] = i * p - m * l; t1[3] = e * p - m * h; t2[3] = e * l - i * h;
|
||||
t0[4] = i * o - m * k; t1[4] = e * o - m * g; t2[4] = e * k - i * g;
|
||||
t0[5] = i * n - m * j; t1[5] = e * n - m * f; t2[5] = e * j - i * f;
|
||||
*/
|
||||
yt0 = _mm256_sub_ps(_mm256_mul_ps(y5, y6), _mm256_mul_ps(y7, y8));
|
||||
yt1 = _mm256_sub_ps(_mm256_mul_ps(y2, y6), _mm256_mul_ps(y7, y3));
|
||||
yt2 = _mm256_sub_ps(_mm256_mul_ps(y2, y8), _mm256_mul_ps(y5, y3));
|
||||
|
||||
/* t3 t2 t1 t0 t3 t2 t1 t0 */
|
||||
/* t5 t5 t5 t4 t5 t5 t5 t4 */
|
||||
y9 = _mm256_permute2f128_ps(yt0, yt0, 0x00);
|
||||
y10 = _mm256_permute2f128_ps(yt0, yt0, 0x11);
|
||||
//
|
||||
/* t2 t1 t0 t0 t2 t1 t0 t0 */
|
||||
t0 = _mm256_permutevar_ps(y9, yi1);
|
||||
|
||||
/* t4 t3 t3 t1 t4 t3 t3 t1 */
|
||||
y11 = _mm256_shuffle_ps(y9, y10, 0x4D);
|
||||
y12 = _mm256_permutevar_ps(y11, yi2);
|
||||
t1 = _mm256_permute2f128_ps(y12, y9, 0x00);
|
||||
|
||||
/* t5 t5 t4 t2 t5 t5 t4 t2 */
|
||||
y11 = _mm256_shuffle_ps(y9, y10, 0x4A);
|
||||
y12 = _mm256_permutevar_ps(y11, yi3);
|
||||
t2 = _mm256_permute2f128_ps(y12, y12, 0x00);
|
||||
|
||||
/* a a a b e e e f */
|
||||
/* b b c c f f g g */
|
||||
/* c d d d g h h h */
|
||||
y9 = _mm256_permute_ps(y4, 0x01);
|
||||
y10 = _mm256_permute_ps(y4, 0x5A);
|
||||
y11 = _mm256_permute_ps(y4, 0xBF);
|
||||
|
||||
/*
|
||||
dest[0][0] = f * t[0] - g * t[1] + h * t[2];
|
||||
dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]);
|
||||
dest[2][0] = e * t[1] - f * t[3] + h * t[5];
|
||||
dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]);
|
||||
|
||||
dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]);
|
||||
dest[1][1] = a * t[0] - c * t[3] + d * t[4];
|
||||
dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]);
|
||||
dest[3][1] = a * t[2] - b * t[4] + c * t[5];
|
||||
*/
|
||||
r1 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y9, t0),
|
||||
_mm256_mul_ps(y10, t1)),
|
||||
_mm256_mul_ps(y11, t2)),
|
||||
flpsign);
|
||||
|
||||
/* d c b a d c b a */
|
||||
y2 = _mm256_permute2f128_ps(y0, y0, 0x0);
|
||||
|
||||
/* a a a b a a a b */
|
||||
/* b b c c b b c c */
|
||||
/* c d d d c d d d */
|
||||
y3 = _mm256_permutevar_ps(y2, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1));
|
||||
y4 = _mm256_permutevar_ps(y2, _mm256_set_epi32(1, 1, 2, 2, 1, 1, 2, 2));
|
||||
y5 = _mm256_permutevar_ps(y2, _mm256_set_epi32(2, 3, 3, 3, 2, 3, 3, 3));
|
||||
|
||||
/* t2[3] t2[2] t2[1] t2[0] t1[3] t1[2] t1[1] t1[0] */
|
||||
/* t2[5] t2[5] t2[5] t2[4] t1[5] t1[5] t1[5] t1[4] */
|
||||
y6 = _mm256_permute2f128_ps(yt1, yt2, 0x20);
|
||||
y7 = _mm256_permute2f128_ps(yt1, yt2, 0x31);
|
||||
|
||||
/* t2[2] t2[1] t2[0] t2[0] t1[2] t1[1] t1[0] t1[0] */
|
||||
t0 = _mm256_permutevar_ps(y6, yi1);
|
||||
|
||||
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
|
||||
|
||||
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
|
||||
y11 = _mm256_shuffle_ps(y6, y7, 0x4D);
|
||||
t1 = _mm256_permutevar_ps(y11, yi2);
|
||||
|
||||
|
||||
/* t2[5] t2[5] t2[4] t2[2] t1[5] t1[5] t1[4] t1[2] */
|
||||
y11 = _mm256_shuffle_ps(y6, y7, 0x4A);
|
||||
t2 = _mm256_permutevar_ps(y11, yi3);
|
||||
|
||||
/*
|
||||
dest[0][2] = b * t1[0] - c * t1[1] + d * t1[2];
|
||||
dest[1][2] =-(a * t1[0] - c * t1[3] + d * t1[4]);
|
||||
dest[2][2] = a * t1[1] - b * t1[3] + d * t1[5];
|
||||
dest[3][2] =-(a * t1[2] - b * t1[4] + c * t1[5]);
|
||||
|
||||
dest[0][3] =-(b * t2[0] - c * t2[1] + d * t2[2]);
|
||||
dest[1][3] = a * t2[0] - c * t2[3] + d * t2[4];
|
||||
dest[2][3] =-(a * t2[1] - b * t2[3] + d * t2[5]);
|
||||
dest[3][3] = a * t2[2] - b * t2[4] + c * t2[5];
|
||||
*/
|
||||
r2 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y3, t0),
|
||||
_mm256_mul_ps(y4, t1)),
|
||||
_mm256_mul_ps(y5, t2)),
|
||||
flpsign);
|
||||
|
||||
/* determinant */
|
||||
|
||||
y4 = _mm256_mul_ps(y0, r1);
|
||||
y4 = _mm256_permute2f128_ps(y4, y4, 0x30);
|
||||
y4 = _mm256_dp_ps(y0, r1, 0xff);
|
||||
|
||||
y5 = _mm256_div_ps(_mm256_set1_ps(1.0f), y4);
|
||||
r1 = _mm256_mul_ps(r1, y5);
|
||||
r2 = _mm256_mul_ps(r2, y5);
|
||||
|
||||
/* transpose */
|
||||
|
||||
/* d c b a h g f e */
|
||||
/* l k j i p o n m */
|
||||
y0 = _mm256_permute2f128_ps(r1, r1, 0x03);
|
||||
y1 = _mm256_permute2f128_ps(r2, r2, 0x03);
|
||||
|
||||
/* b a f e f e b a */
|
||||
/* j i n m n m j i */
|
||||
/* i m a e m i e a */
|
||||
/* j n b f n j f b */
|
||||
/* n j f b m i e a */
|
||||
y2 = _mm256_shuffle_ps(r1, y0, 0x44);
|
||||
y3 = _mm256_shuffle_ps(r2, y1, 0x44);
|
||||
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
|
||||
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
|
||||
y6 = _mm256_permute2f128_ps(y4, y5, 0x20);
|
||||
|
||||
/* d c h g h g d c */
|
||||
/* l k p o p o l k */
|
||||
/* k o c g o k g c */
|
||||
/* l p d h p l h d */
|
||||
/* p l h d o k g c */
|
||||
y2 = _mm256_shuffle_ps(r1, y0, 0xEE);
|
||||
y3 = _mm256_shuffle_ps(r2, y1, 0xEE);
|
||||
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
|
||||
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
|
||||
y7 = _mm256_permute2f128_ps(y4, y5, 0x20);
|
||||
|
||||
glmm_store256(dest[0], y6);
|
||||
glmm_store256(dest[2], y7);
|
||||
}
|
||||
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_mat4_inv_fast_avx(mat4 mat, mat4 dest) {
|
||||
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10, y11, y12, y13;
|
||||
__m256 yt0, yt1, yt2;
|
||||
__m256 t0, t1, t2;
|
||||
__m256 r1, r2;
|
||||
__m256 flpsign;
|
||||
__m256i yi1, yi2, yi3;
|
||||
|
||||
y0 = glmm_load256(mat[0]); /* h g f e d c b a */
|
||||
y1 = glmm_load256(mat[2]); /* p o n m l k j i */
|
||||
|
||||
y2 = _mm256_permute2f128_ps(y1, y1, 0x00); /* l k j i l k j i */
|
||||
y3 = _mm256_permute2f128_ps(y1, y1, 0x11); /* p o n m p o n m */
|
||||
y4 = _mm256_permute2f128_ps(y0, y0, 0x03); /* d c b a h g f e */
|
||||
y13 = _mm256_permute2f128_ps(y4, y4, 0x00); /* h g f e h g f e */
|
||||
|
||||
yi1 = _mm256_set_epi32(0, 0, 0, 0, 0, 1, 1, 2);
|
||||
yi2 = _mm256_set_epi32(1, 1, 1, 2, 3, 2, 3, 3);
|
||||
flpsign = _mm256_set_ps(0.f, -0.f, 0.f, -0.f, -0.f, 0.f, -0.f, 0.f);
|
||||
|
||||
/* i i i i i j j k */
|
||||
/* n n n o p o p p */
|
||||
/* m m m m m n n o */
|
||||
/* j j j k l k l l */
|
||||
/* e e e e e f f g */
|
||||
/* f f f g h g h h */
|
||||
y5 = _mm256_permutevar_ps(y2, yi1);
|
||||
y6 = _mm256_permutevar_ps(y3, yi2);
|
||||
y7 = _mm256_permutevar_ps(y3, yi1);
|
||||
y8 = _mm256_permutevar_ps(y2, yi2);
|
||||
y2 = _mm256_permutevar_ps(y13, yi1);
|
||||
y3 = _mm256_permutevar_ps(y13, yi2);
|
||||
|
||||
yi1 = _mm256_set_epi32(2, 1, 0, 0, 2, 1, 0, 0);
|
||||
yi2 = _mm256_set_epi32(2, 1, 1, 0, 2, 1, 1, 0);
|
||||
yi3 = _mm256_set_epi32(3, 3, 2, 0, 3, 3, 2, 0);
|
||||
|
||||
/*
|
||||
t0[0] = k * p - o * l; t1[0] = g * p - o * h; t2[0] = g * l - k * h;
|
||||
t0[1] = j * p - n * l; t1[1] = f * p - n * h; t2[1] = f * l - j * h;
|
||||
t0[2] = j * o - n * k; t1[2] = f * o - n * g; t2[2] = f * k - j * g;
|
||||
t0[3] = i * p - m * l; t1[3] = e * p - m * h; t2[3] = e * l - i * h;
|
||||
t0[4] = i * o - m * k; t1[4] = e * o - m * g; t2[4] = e * k - i * g;
|
||||
t0[5] = i * n - m * j; t1[5] = e * n - m * f; t2[5] = e * j - i * f;
|
||||
*/
|
||||
yt0 = _mm256_sub_ps(_mm256_mul_ps(y5, y6), _mm256_mul_ps(y7, y8));
|
||||
yt1 = _mm256_sub_ps(_mm256_mul_ps(y2, y6), _mm256_mul_ps(y7, y3));
|
||||
yt2 = _mm256_sub_ps(_mm256_mul_ps(y2, y8), _mm256_mul_ps(y5, y3));
|
||||
|
||||
/* t3 t2 t1 t0 t3 t2 t1 t0 */
|
||||
/* t5 t5 t5 t4 t5 t5 t5 t4 */
|
||||
y9 = _mm256_permute2f128_ps(yt0, yt0, 0x00);
|
||||
y10 = _mm256_permute2f128_ps(yt0, yt0, 0x11);
|
||||
|
||||
/* t2 t1 t0 t0 t2 t1 t0 t0 */
|
||||
t0 = _mm256_permutevar_ps(y9, yi1);
|
||||
|
||||
/* t4 t3 t3 t1 t4 t3 t3 t1 */
|
||||
y11 = _mm256_shuffle_ps(y9, y10, 0x4D);
|
||||
y12 = _mm256_permutevar_ps(y11, yi2);
|
||||
t1 = _mm256_permute2f128_ps(y12, y9, 0x00);
|
||||
|
||||
/* t5 t5 t4 t2 t5 t5 t4 t2 */
|
||||
y11 = _mm256_shuffle_ps(y9, y10, 0x4A);
|
||||
y12 = _mm256_permutevar_ps(y11, yi3);
|
||||
t2 = _mm256_permute2f128_ps(y12, y12, 0x00);
|
||||
|
||||
/* a a a b e e e f */
|
||||
/* b b c c f f g g */
|
||||
/* c d d d g h h h */
|
||||
y9 = _mm256_permute_ps(y4, 0x01);
|
||||
y10 = _mm256_permute_ps(y4, 0x5A);
|
||||
y11 = _mm256_permute_ps(y4, 0xBF);
|
||||
|
||||
/*
|
||||
dest[0][0] = f * t[0] - g * t[1] + h * t[2];
|
||||
dest[1][0] =-(e * t[0] - g * t[3] + h * t[4]);
|
||||
dest[2][0] = e * t[1] - f * t[3] + h * t[5];
|
||||
dest[3][0] =-(e * t[2] - f * t[4] + g * t[5]);
|
||||
|
||||
dest[0][1] =-(b * t[0] - c * t[1] + d * t[2]);
|
||||
dest[1][1] = a * t[0] - c * t[3] + d * t[4];
|
||||
dest[2][1] =-(a * t[1] - b * t[3] + d * t[5]);
|
||||
dest[3][1] = a * t[2] - b * t[4] + c * t[5];
|
||||
*/
|
||||
r1 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y9, t0),
|
||||
_mm256_mul_ps(y10, t1)),
|
||||
_mm256_mul_ps(y11, t2)),
|
||||
flpsign);
|
||||
|
||||
/* d c b a d c b a */
|
||||
y2 = _mm256_permute2f128_ps(y0, y0, 0x0);
|
||||
|
||||
/* a a a b a a a b */
|
||||
/* b b c c b b c c */
|
||||
/* c d d d c d d d */
|
||||
y3 = _mm256_permutevar_ps(y2, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1));
|
||||
y4 = _mm256_permutevar_ps(y2, _mm256_set_epi32(1, 1, 2, 2, 1, 1, 2, 2));
|
||||
y5 = _mm256_permutevar_ps(y2, _mm256_set_epi32(2, 3, 3, 3, 2, 3, 3, 3));
|
||||
|
||||
/* t2[3] t2[2] t2[1] t2[0] t1[3] t1[2] t1[1] t1[0] */
|
||||
/* t2[5] t2[5] t2[5] t2[4] t1[5] t1[5] t1[5] t1[4] */
|
||||
y6 = _mm256_permute2f128_ps(yt1, yt2, 0x20);
|
||||
y7 = _mm256_permute2f128_ps(yt1, yt2, 0x31);
|
||||
|
||||
/* t2[2] t2[1] t2[0] t2[0] t1[2] t1[1] t1[0] t1[0] */
|
||||
t0 = _mm256_permutevar_ps(y6, yi1);
|
||||
|
||||
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
|
||||
|
||||
/* t1[4] t1[3] t1[3] t1[1] t1[4] t1[3] t1[3] t1[1] */
|
||||
y11 = _mm256_shuffle_ps(y6, y7, 0x4D);
|
||||
t1 = _mm256_permutevar_ps(y11, yi2);
|
||||
|
||||
|
||||
/* t2[5] t2[5] t2[4] t2[2] t1[5] t1[5] t1[4] t1[2] */
|
||||
y11 = _mm256_shuffle_ps(y6, y7, 0x4A);
|
||||
t2 = _mm256_permutevar_ps(y11, yi3);
|
||||
|
||||
/*
|
||||
dest[0][2] = b * t1[0] - c * t1[1] + d * t1[2];
|
||||
dest[1][2] =-(a * t1[0] - c * t1[3] + d * t1[4]);
|
||||
dest[2][2] = a * t1[1] - b * t1[3] + d * t1[5];
|
||||
dest[3][2] =-(a * t1[2] - b * t1[4] + c * t1[5]);
|
||||
|
||||
dest[0][3] =-(b * t2[0] - c * t2[1] + d * t2[2]);
|
||||
dest[1][3] = a * t2[0] - c * t2[3] + d * t2[4];
|
||||
dest[2][3] =-(a * t2[1] - b * t2[3] + d * t2[5]);
|
||||
dest[3][3] = a * t2[2] - b * t2[4] + c * t2[5];
|
||||
*/
|
||||
r2 = _mm256_xor_ps(_mm256_add_ps(_mm256_sub_ps(_mm256_mul_ps(y3, t0),
|
||||
_mm256_mul_ps(y4, t1)),
|
||||
_mm256_mul_ps(y5, t2)),
|
||||
flpsign);
|
||||
|
||||
/* determinant */
|
||||
|
||||
y4 = _mm256_mul_ps(y0, r1);
|
||||
y4 = _mm256_permute2f128_ps(y4, y4, 0x30);
|
||||
y4 = _mm256_dp_ps(y0, r1, 0xff);
|
||||
|
||||
y5 = _mm256_rcp_ps(y4);
|
||||
r1 = _mm256_mul_ps(r1, y5);
|
||||
r2 = _mm256_mul_ps(r2, y5);
|
||||
|
||||
/* transpose */
|
||||
|
||||
/* d c b a h g f e */
|
||||
/* l k j i p o n m */
|
||||
y0 = _mm256_permute2f128_ps(r1, r1, 0x03);
|
||||
y1 = _mm256_permute2f128_ps(r2, r2, 0x03);
|
||||
|
||||
/* b a f e f e b a */
|
||||
/* j i n m n m j i */
|
||||
/* i m a e m i e a */
|
||||
/* j n b f n j f b */
|
||||
/* n j f b m i e a */
|
||||
y2 = _mm256_shuffle_ps(r1, y0, 0x44);
|
||||
y3 = _mm256_shuffle_ps(r2, y1, 0x44);
|
||||
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
|
||||
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
|
||||
y6 = _mm256_permute2f128_ps(y4, y5, 0x20);
|
||||
|
||||
/* d c h g h g d c */
|
||||
/* l k p o p o l k */
|
||||
/* k o c g o k g c */
|
||||
/* l p d h p l h d */
|
||||
/* p l h d o k g c */
|
||||
y2 = _mm256_shuffle_ps(r1, y0, 0xEE);
|
||||
y3 = _mm256_shuffle_ps(r2, y1, 0xEE);
|
||||
y4 = _mm256_shuffle_ps(y2, y3, 0x88);
|
||||
y5 = _mm256_shuffle_ps(y2, y3, 0xDD);
|
||||
y7 = _mm256_permute2f128_ps(y4, y5, 0x20);
|
||||
|
||||
glmm_store256(dest[0], y6);
|
||||
glmm_store256(dest[2], y7);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* cglm_mat_simd_avx_h */
|
||||
|
||||
@@ -33,20 +33,18 @@
|
||||
#endif
|
||||
|
||||
typedef float vec2[2];
|
||||
typedef CGLM_ALIGN_IF(8) float vec3[3];
|
||||
typedef float vec3[3];
|
||||
typedef int ivec3[3];
|
||||
typedef CGLM_ALIGN_IF(16) float vec4[4];
|
||||
typedef vec4 versor;
|
||||
typedef vec3 mat3[3];
|
||||
|
||||
#ifdef __AVX__
|
||||
typedef CGLM_ALIGN_IF(32) vec3 mat3[3];
|
||||
typedef CGLM_ALIGN_IF(32) vec4 mat4[4];
|
||||
#else
|
||||
typedef vec3 mat3[3];
|
||||
typedef CGLM_ALIGN_IF(16) vec4 mat4[4];
|
||||
#endif
|
||||
|
||||
typedef vec4 versor;
|
||||
|
||||
#define GLM_E 2.71828182845904523536028747135266250 /* e */
|
||||
#define GLM_LOG2E 1.44269504088896340735992468100189214 /* log2(e) */
|
||||
#define GLM_LOG10E 0.434294481903251827651128918916605082 /* log10(e) */
|
||||
|
||||
@@ -21,7 +21,6 @@
|
||||
CGLM_INLINE void glm_vec3_zero(vec3 v);
|
||||
CGLM_INLINE void glm_vec3_one(vec3 v);
|
||||
CGLM_INLINE float glm_vec3_dot(vec3 a, vec3 b);
|
||||
CGLM_INLINE void glm_vec3_cross(vec3 a, vec3 b, vec3 d);
|
||||
CGLM_INLINE float glm_vec3_norm2(vec3 v);
|
||||
CGLM_INLINE float glm_vec3_norm(vec3 v);
|
||||
CGLM_INLINE void glm_vec3_add(vec3 a, vec3 b, vec3 dest);
|
||||
@@ -47,6 +46,8 @@
|
||||
CGLM_INLINE void glm_vec3_inv_to(vec3 v, vec3 dest);
|
||||
CGLM_INLINE void glm_vec3_normalize(vec3 v);
|
||||
CGLM_INLINE void glm_vec3_normalize_to(vec3 v, vec3 dest);
|
||||
CGLM_INLINE void glm_vec3_cross(vec3 a, vec3 b, vec3 d);
|
||||
CGLM_INLINE void glm_vec3_crossn(vec3 a, vec3 b, vec3 dest);
|
||||
CGLM_INLINE float glm_vec3_distance(vec3 a, vec3 b);
|
||||
CGLM_INLINE float glm_vec3_angle(vec3 a, vec3 b);
|
||||
CGLM_INLINE void glm_vec3_rotate(vec3 v, float angle, vec3 axis);
|
||||
@@ -166,22 +167,6 @@ glm_vec3_dot(vec3 a, vec3 b) {
|
||||
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief vec3 cross product
|
||||
*
|
||||
* @param[in] a source 1
|
||||
* @param[in] b source 2
|
||||
* @param[out] d destination
|
||||
*/
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec3_cross(vec3 a, vec3 b, vec3 d) {
|
||||
/* (u2.v3 - u3.v2, u3.v1 - u1.v3, u1.v2 - u2.v1) */
|
||||
d[0] = a[1] * b[2] - a[2] * b[1];
|
||||
d[1] = a[2] * b[0] - a[0] * b[2];
|
||||
d[2] = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief norm * norm (magnitude) of vec
|
||||
*
|
||||
@@ -444,7 +429,7 @@ glm_vec3_maxadd(vec3 a, vec3 b, vec3 dest) {
|
||||
* it applies += operator so dest must be initialized
|
||||
*
|
||||
* @param[in] a vector
|
||||
* @param[in] s scalar
|
||||
* @param[in] b scalar
|
||||
* @param[out] dest dest += min(a, b)
|
||||
*/
|
||||
CGLM_INLINE
|
||||
@@ -521,6 +506,36 @@ glm_vec3_normalize_to(vec3 v, vec3 dest) {
|
||||
glm_vec3_scale(v, 1.0f / norm, dest);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief cross product of two vector (RH)
|
||||
*
|
||||
* @param[in] a vector 1
|
||||
* @param[in] b vector 2
|
||||
* @param[out] dest destination
|
||||
*/
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec3_cross(vec3 a, vec3 b, vec3 dest) {
|
||||
/* (u2.v3 - u3.v2, u3.v1 - u1.v3, u1.v2 - u2.v1) */
|
||||
dest[0] = a[1] * b[2] - a[2] * b[1];
|
||||
dest[1] = a[2] * b[0] - a[0] * b[2];
|
||||
dest[2] = a[0] * b[1] - a[1] * b[0];
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief cross product of two vector (RH) and normalize the result
|
||||
*
|
||||
* @param[in] a vector 1
|
||||
* @param[in] b vector 2
|
||||
* @param[out] dest destination
|
||||
*/
|
||||
CGLM_INLINE
|
||||
void
|
||||
glm_vec3_crossn(vec3 a, vec3 b, vec3 dest) {
|
||||
glm_vec3_cross(a, b, dest);
|
||||
glm_vec3_normalize(dest);
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief angle betwen two vector
|
||||
*
|
||||
|
||||
@@ -333,7 +333,7 @@ glm_vec4_subs(vec4 v, float s, vec4 dest) {
|
||||
*
|
||||
* @param a vector1
|
||||
* @param b vector2
|
||||
* @param d dest = (a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3])
|
||||
* @param dest dest = (a[0] * b[0], a[1] * b[1], a[2] * b[2], a[3] * b[3])
|
||||
*/
|
||||
CGLM_INLINE
|
||||
void
|
||||
@@ -426,7 +426,6 @@ glm_vec4_divs(vec4 v, float s, vec4 dest) {
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*!
|
||||
* @brief add two vectors and add result to sum
|
||||
*
|
||||
@@ -553,7 +552,7 @@ glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||
* it applies += operator so dest must be initialized
|
||||
*
|
||||
* @param[in] a vector
|
||||
* @param[in] s scalar
|
||||
* @param[in] b scalar
|
||||
* @param[out] dest dest += min(a, b)
|
||||
*/
|
||||
CGLM_INLINE
|
||||
|
||||
@@ -10,6 +10,6 @@
|
||||
|
||||
#define CGLM_VERSION_MAJOR 0
|
||||
#define CGLM_VERSION_MINOR 5
|
||||
#define CGLM_VERSION_PATCH 0
|
||||
#define CGLM_VERSION_PATCH 2
|
||||
|
||||
#endif /* cglm_version_h */
|
||||
|
||||
@@ -88,6 +88,12 @@ glmc_perspective(float fovy,
|
||||
dest);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_persp_move_far(mat4 proj, float deltaFar) {
|
||||
glm_persp_move_far(proj, deltaFar);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_perspective_default(float aspect, mat4 dest) {
|
||||
|
||||
@@ -50,6 +50,12 @@ glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
|
||||
glm_mat3_mulv(m, v, dest);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat3_trace(mat3 m) {
|
||||
return glm_mat3_trace(m);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_mat3_quat(mat3 m, versor dest) {
|
||||
|
||||
12
src/mat4.c
12
src/mat4.c
@@ -74,6 +74,18 @@ glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest) {
|
||||
glm_mat4_mulv3(m, v, last, dest);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat4_trace(mat4 m) {
|
||||
return glm_mat4_trace(m);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
float
|
||||
glmc_mat4_trace3(mat4 m) {
|
||||
return glm_mat4_trace3(m);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_mat4_quat(mat4 m, versor dest) {
|
||||
|
||||
10
src/vec3.c
10
src/vec3.c
@@ -40,8 +40,14 @@ glmc_vec3_dot(vec3 a, vec3 b) {
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_vec3_cross(vec3 a, vec3 b, vec3 d) {
|
||||
glm_vec3_cross(a, b, d);
|
||||
glmc_vec3_cross(vec3 a, vec3 b, vec3 dest) {
|
||||
glm_vec3_cross(a, b, dest);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
void
|
||||
glmc_vec3_crossn(vec3 a, vec3 b, vec3 dest) {
|
||||
glm_vec3_crossn(a, b, dest);
|
||||
}
|
||||
|
||||
CGLM_EXPORT
|
||||
|
||||
@@ -24,9 +24,9 @@ test_mat3(void **state) {
|
||||
for (i = 0; i < m; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
if (i == j)
|
||||
assert_true(m3[i][j] == 1.0f);
|
||||
assert_true(glm_eq(m3[i][j], 1.0f));
|
||||
else
|
||||
assert_true(m3[i][j] == 0.0f);
|
||||
assert_true(glm_eq(m3[i][j], 0.0f));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -24,9 +24,9 @@ test_mat4(void **state) {
|
||||
for (i = 0; i < m; i++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
if (i == j)
|
||||
assert_true(m3[i][j] == 1.0f);
|
||||
assert_true(glm_eq(m3[i][j], 1.0f));
|
||||
else
|
||||
assert_true(m3[i][j] == 0.0f);
|
||||
assert_true(glm_eq(m3[i][j], 0.0f));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ test_quat(void **state) {
|
||||
|
||||
/* 0. test identiy quat */
|
||||
glm_quat_identity(q4);
|
||||
assert_true(glm_quat_real(q4) == cosf(glm_rad(0.0f) * 0.5f));
|
||||
assert_true(glm_eq(glm_quat_real(q4), cosf(glm_rad(0.0f) * 0.5f)));
|
||||
glm_quat_mat4(q4, rot1);
|
||||
test_assert_mat4_eq2(rot1, GLM_MAT4_IDENTITY, 0.000009);
|
||||
|
||||
@@ -118,7 +118,7 @@ test_quat(void **state) {
|
||||
/* 9. test imag, real */
|
||||
|
||||
/* 9.1 real */
|
||||
assert_true(glm_quat_real(q4) == cosf(glm_rad(-90.0f) * 0.5f));
|
||||
assert_true(glm_eq(glm_quat_real(q4), cosf(glm_rad(-90.0f) * 0.5f)));
|
||||
|
||||
/* 9.1 imag */
|
||||
glm_quat_imag(q4, imag);
|
||||
|
||||
Reference in New Issue
Block a user