Compare commits

..

47 Commits

Author SHA1 Message Date
Recep Aslantas
077e304fc5 Merge pull request #42 from recp/optimizations
simd: optional shuffle configuration to save move instructions
2018-05-10 16:47:00 +03:00
Recep Aslantas
599524dacf docs: add new option to docs 2018-05-10 16:42:13 +03:00
Recep Aslantas
da5ad69863 simd: rename _mm_ extensions to glmm_ 2018-05-10 14:27:53 +03:00
Recep Aslantas
9fc2ead8ef Merge branch 'master' into optimizations 2018-05-10 13:59:10 +03:00
Recep Aslantas
48d33c16cb Merge pull request #53 from recp/simd
simd: Make alignment OPTIONAL
2018-05-10 13:57:31 +03:00
Recep Aslantas
464bd917d0 update readme 2018-05-10 12:21:33 +03:00
Recep Aslantas
c6d07bb6eb surround PI with parentheses + code style + update docs 2018-05-10 12:18:54 +03:00
Recep Aslantas
94b286f1f9 docs: add new alignment option to docs 2018-05-09 16:43:42 +03:00
Recep Aslantas
f774925e8a win, simd: make sure that CGLM_ALL_UNALIGNED is defined for older visual studios 2018-05-09 15:30:54 +03:00
Recep Aslantas
0e49e95161 win: update visual studio version for align requirement 2018-05-08 18:29:02 +03:00
Recep Aslantas
b277357800 update gitignore 2018-05-08 18:28:31 +03:00
Recep Aslantas
835cec2ccb drop alignment requirement if CGLM_ALL_UNALIGNED defined
* bring alignment back for visual studio 2017
2018-05-08 16:26:33 +03:00
Recep Aslantas
5dbbd0826d simd: replace glm_simd_ with glmm_
* now glmm_ is used as global simd namescape
2018-05-08 15:55:36 +03:00
Recep Aslantas
56f0bb0928 simd, avx: make alignment optional for load/store operations 2018-05-08 15:35:17 +03:00
Recep Aslantas
568001d26a simd, sse2: make alignment optional for store operations 2018-05-08 15:31:09 +03:00
Recep Aslantas
252bf925fc simd, sse2: make alignment optional for load operations 2018-05-08 15:25:23 +03:00
Recep Aslantas
0f339c5c03 fix header dependencies 2018-05-07 21:12:29 +03:00
Recep Aslantas
a9d56f2dae docs: fix typos 2018-05-04 00:50:56 +03:00
Recep Aslantas
dd60496ffc Merge pull request #49 from Yatima1460/master
replace _WIN32 with _MSC_VER
2018-04-30 19:08:59 +03:00
Federico Santamorena
7c0e9e99c6 _WIN32 to _MSC_VER 2018-04-30 17:17:06 +02:00
Federico Santamorena
064209c917 replaced _WIN32 with _MSC_VER 2018-04-30 17:13:16 +02:00
Recep Aslantas
94d6036c38 suppress warnings for Mingw 2018-04-30 11:09:42 +03:00
Recep Aslantas
6c01eff056 now working on v0.4.5 2018-04-30 10:59:40 +03:00
Recep Aslantas
ada69a7c43 fix cocoapods validation errors 2018-04-22 10:14:17 +03:00
Recep Aslantas
cef97fca3e add cocoapods spec 2018-04-22 01:03:17 +03:00
Recep Aslantas
498a33fac5 fix public header's includes 2018-04-21 22:36:25 +03:00
Recep Aslantas
3c7a729729 build: remove making symbolic link for libtoolize 2018-04-20 15:19:06 +03:00
Recep Aslantas
a6a37995e9 build: update automake sources 2018-04-18 23:02:15 +03:00
Recep Aslantas
6202179c23 update version 2018-04-18 22:30:20 +03:00
Recep Aslantas
22b699174c build: improve calling libtoolize 2018-04-18 21:47:53 +03:00
Recep Aslantas
016c0a71a6 Merge pull request #46 from recp/affine
affine transform update
2018-04-18 15:25:40 +03:00
Recep Aslantas
e28cf1d3f6 remove unused variable 2018-04-18 15:23:07 +03:00
Recep Aslantas
63966ee5c0 quat: use the new "glm_mul_rot" for quaternion
* this should be faster than mat4_mul
2018-04-18 15:16:24 +03:00
Recep Aslantas
a723ecdb7e add troubleshooting to docs 2018-04-18 15:11:06 +03:00
Recep Aslantas
065f93ab3c update docs, drop scale1 2018-04-18 14:30:44 +03:00
Recep Aslantas
4dbcd28fdb use mul_rot for rotations to make thrm faster 2018-04-18 14:12:56 +03:00
Recep Aslantas
be0e3fc9f2 new matrix multiplication helper for rotation matrices 2018-04-18 14:05:09 +03:00
Recep Aslantas
d648f5772d affine: drop rotate_ndc functions 2018-04-18 10:57:35 +03:00
Recep Aslantas
f163fcd043 simd: load vec3 helpers for sse/sse2 2018-04-18 00:00:47 +03:00
Recep Aslantas
27ab6a7dd0 update docs, add clarifications for affine transforms 2018-04-17 15:42:24 +03:00
Recep Aslantas
33e951fe2e implement rotate_at for quat and provide make version 2018-04-17 12:17:04 +03:00
Recep Aslantas
c63c6c90ac implement rotate_at 2018-04-17 11:12:18 +03:00
Recep Aslantas
a2792178db add missing call funcs for affine transforms 2018-04-17 11:07:57 +03:00
Recep Aslantas
cefd5fb53d test: add some tests for affine transforms 2018-04-17 10:33:52 +03:00
Recep Aslantas
821c79572f test: add some tests for mat3 2018-04-15 20:47:38 +03:00
Recep Aslantas
f0a27d0ce2 now working on v0.4.2 2018-04-15 20:46:46 +03:00
Recep Aslantas
cfd3600107 simd: optional shuffle configuration to save move instructions 2018-04-04 22:42:21 +03:00
46 changed files with 1265 additions and 529 deletions

8
.gitignore vendored
View File

@@ -61,3 +61,11 @@ docs/build/*
win/cglm_test_*
* copy.*
*.o
*.obj
*codeanalysis.*.xml
*codeanalysis.xml
*.lib
*.tlog
win/x64
win/x85
win/Debug

View File

@@ -22,6 +22,8 @@ Complete documentation: http://cglm.readthedocs.io
- **[bugfix]** euler angles was implemented in reverse order (extrinsic) it was fixed, now they are intrinsic. Make sure that
you have the latest version
- **[major change]** by starting v0.4.0, quaternions are stored as [x, y, z, w], it was [w, x, y, z] in v0.3.5 and earlier versions
- **[api rename]** by starting v0.4.5, **glm_simd** functions are renamed to **glmm_**
- **[new option]** by starting v0.4.5, you can disable alignment requirement, check options in docs.
#### Note for C++ developers:
If you don't aware about original GLM library yet, you may also want to look at:

View File

@@ -8,17 +8,14 @@
cd $(dirname "$0")
if [ "$(uname)" = "Darwin" ]; then
libtoolBin=$(which glibtoolize)
libtoolBinDir=$(dirname "${libtoolBin}")
if [ ! -f "${libtoolBinDir}/libtoolize" ]; then
ln -s $libtoolBin "${libtoolBinDir}/libtoolize"
fi
fi
autoheader
libtoolize
if [ "$(uname)" = "Darwin" ]; then
glibtoolize
else
libtoolize
fi
aclocal -I m4
autoconf
automake --add-missing --copy

View File

@@ -9,19 +9,8 @@
# check if deps are pulled
git submodule update --init --recursive
# fix glibtoolize
cd $(dirname "$0")
if [ "$(uname)" = "Darwin" ]; then
libtoolBin=$(which glibtoolize)
libtoolBinDir=$(dirname "${libtoolBin}")
if [ ! -f "${libtoolBinDir}/libtoolize" ]; then
ln -s $libtoolBin "${libtoolBinDir}/libtoolize"
fi
fi
# general deps: gcc make autoconf automake libtool cmake
# test - cmocka

28
cglm.podspec Normal file
View File

@@ -0,0 +1,28 @@
Pod::Spec.new do |s|
# Description
s.name = "cglm"
s.version = "0.4.4"
s.summary = "📽 Optimized OpenGL/Graphics Math (glm) for C"
s.description = <<-DESC
cglm is math library for graphics programming for C. It is similar to original glm but it is written for C instead of C++ (you can use here too). See the documentation or README for all features.
DESC
s.documentation_url = "http://cglm.readthedocs.io"
# Home
s.homepage = "https://github.com/recp/cglm"
s.license = { :type => "MIT", :file => "LICENSE" }
s.author = { "Recep Aslantas" => "recp@acm.org" }
# Sources
s.source = { :git => "https://github.com/recp/cglm.git", :tag => "v#{s.version}" }
s.source_files = "src", "include/cglm/**/*.h"
s.public_header_files = "include", "include/cglm/**/*.h"
s.exclude_files = "src/win/*", "src/dllmain.c", "src/**/*.h"
s.preserve_paths = "include", "src"
s.header_mappings_dir = "include"
# Linking
s.library = "m"
end

View File

@@ -7,7 +7,7 @@
#*****************************************************************************
AC_PREREQ([2.69])
AC_INIT([cglm], [0.4.1], [info@recp.me])
AC_INIT([cglm], [0.4.5], [info@recp.me])
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])
AC_CONFIG_MACRO_DIR([m4])

View File

@@ -33,6 +33,7 @@ Table of contents (click func go):
Functions:
1. :c:func:`glm_mul`
#. :c:func:`glm_mul_rot`
#. :c:func:`glm_inv_tr`
Functions documentation
@@ -59,6 +60,27 @@ Functions documentation
| *[in]* **m2** affine matrix 2
| *[out]* **dest** result matrix
.. c:function:: void glm_mul_rot(mat4 m1, mat4 m2, mat4 dest)
| this is similar to glm_mat4_mul but specialized to rotation matrix
Right Matrix format should be (left is free):
.. code-block:: text
R R R 0
R R R 0
R R R 0
0 0 0 1
this reduces some multiplications. It should be faster than mat4_mul.
if you are not sure about matrix format then DON'T use this! use mat4_mul
Parameters:
| *[in]* **m1** affine matrix 1
| *[in]* **m2** affine matrix 2
| *[out]* **dest** result matrix
.. c:function:: void glm_inv_tr(mat4 mat)
| inverse orthonormal rotation + translation matrix (ridig-body)

View File

@@ -5,6 +5,8 @@ affine transforms
Header: cglm/affine.h
Initialize Transform Matrices
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Functions with **_make** prefix expect you don't have a matrix and they create
a matrix for you. You don't need to pass identity matrix.
@@ -15,6 +17,107 @@ before sending to transfrom functions.
There are also functions to decompose transform matrix. These functions can't
decompose matrix after projected.
Rotation Center
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Rotating functions uses origin as rotation center (pivot/anchor point),
since scale factors are stored in rotation matrix, same may also true for scalling.
cglm provides some functions for rotating around at given point e.g.
**glm_rotate_at**, **glm_quat_rotate_at**. Use them or follow next section for algorihm ("Rotate or Scale around specific Point (Pivot Point / Anchor Point)").
Rotate or Scale around specific Point (Anchor Point)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you want to rotate model around arbibtrary point follow these steps:
1. Move model from pivot point to origin: **translate(-pivot.x, -pivot.y, -pivot.z)**
2. Apply rotation (or scaling maybe)
3. Move model back from origin to pivot (reverse of step-1): **translate(pivot.x, pivot.y, pivot.z)**
**glm_rotate_at**, **glm_quat_rotate_at** and their helper functions works that way.
The implementation would be:
.. code-block:: c
:linenos:
glm_translate(m, pivot);
glm_rotate(m, angle, axis);
glm_translate(m, pivotInv); /* pivotInv = -pivot */
Transforms Order
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
It is important to understand this part especially if you call transform
functions multiple times
`glm_translate`, `glm_rotate`, `glm_scale` and `glm_quat_rotate` and their
helpers functions works like this (cglm may provide reverse order too as alternative in the future):
.. code-block:: c
:linenos:
TransformMatrix = TransformMatrix * TraslateMatrix; // glm_translate()
TransformMatrix = TransformMatrix * RotateMatrix; // glm_rotate(), glm_quat_rotate()
TransformMatrix = TransformMatrix * ScaleMatrix; // glm_scale()
As you can see it is multipled as right matrix. For instance what will happen if you call `glm_translate` twice?
.. code-block:: c
:linenos:
glm_translate(transform, translate1); /* transform = transform * translate1 */
glm_translate(transform, translate2); /* transform = transform * translate2 */
glm_rotate(transform, angle, axis) /* transform = transform * rotation */
Now lets try to understand this:
1. You call translate using `translate1` and you expect it will be first transform
because you call it first, do you?
Result will be **`transform = transform * translate1`**
2. Then you call translate using `translate2` and you expect it will be second transform?
Result will be **`transform = transform * translate2`**. Now lets expand transform,
it was `transform * translate1` before second call.
Now it is **`transform = transform * translate1 * translate2`**, now do you understand what I say?
3. After last call transform will be:
**`transform = transform * translate1 * translate2 * rotation`**
The order will be; **rotation will be applied first**, then **translate2** then **translate1**
It is all about matrix multiplication order. It is similar to MVP matrix:
`MVP = Projection * View * Model`, model will be applied first, then view then projection.
**Confused?**
In the end the last function call applied first in shaders.
As alternative way, you can create transform matrices individually then combine manually,
but don't forget that `glm_translate`, `glm_rotate`, `glm_scale`... are optimized and should be faster (an smaller assembly output) than manual multiplication
.. code-block:: c
:linenos:
mat4 transform1, transform2, transform3, finalTransform;
glm_translate_make(transform1, translate1);
glm_translate_make(transform2, translate2);
glm_rotate_make(transform3, angle, axis);
/* first apply transform1, then transform2, thentransform3 */
glm_mat4_mulN((mat4 *[]){&transform3, &transform2, &transform1}, 3, finalTransform);
/* if you don't want to use mulN, same as above */
glm_mat4_mul(transform3, transform2, finalTransform);
glm_mat4_mul(finalTransform, transform1, finalTransform);
Now transform1 will be applied first, then transform2 then transform3
Table of contents (click to go):
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -29,15 +132,14 @@ Functions:
#. :c:func:`glm_scale_to`
#. :c:func:`glm_scale_make`
#. :c:func:`glm_scale`
#. :c:func:`glm_scale1`
#. :c:func:`glm_scale_uni`
#. :c:func:`glm_rotate_x`
#. :c:func:`glm_rotate_y`
#. :c:func:`glm_rotate_z`
#. :c:func:`glm_rotate_ndc_make`
#. :c:func:`glm_rotate_make`
#. :c:func:`glm_rotate_ndc`
#. :c:func:`glm_rotate`
#. :c:func:`glm_rotate_at`
#. :c:func:`glm_rotate_atm`
#. :c:func:`glm_decompose_scalev`
#. :c:func:`glm_uniscaled`
#. :c:func:`glm_decompose_rs`
@@ -122,10 +224,6 @@ Functions documentation
| *[in, out]* **m** affine transfrom
| *[in]* **v** scale vector [x, y, z]
.. c:function:: void glm_scale1(mat4 m, float s)
DEPRECATED! Use glm_scale_uni
.. c:function:: void glm_scale_uni(mat4 m, float s)
applies uniform scale to existing transform matrix v = [s, s, s]
@@ -165,16 +263,6 @@ Functions documentation
| *[in]* **angle** angle (radians)
| *[out]* **dest** rotated matrix
.. c:function:: void glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc)
creates NEW rotation matrix by angle and axis
this name may change in the future. axis must be is normalized
Parameters:
| *[out]* **m** affine transfrom
| *[in]* **angle** angle (radians)
| *[in]* **axis_ndc** normalized axis
.. c:function:: void glm_rotate_make(mat4 m, float angle, vec3 axis)
creates NEW rotation matrix by angle and axis,
@@ -185,16 +273,6 @@ Functions documentation
| *[in]* **axis** angle (radians)
| *[in]* **axis** axis
.. c:function:: void glm_rotate_ndc(mat4 m, float angle, vec3 axis_ndc)
rotate existing transform matrix around Z axis by angle and axis
this name may change in the future, axis must be normalized.
Parameters:
| *[out]* **m** affine transfrom
| *[in]* **angle** angle (radians)
| *[in]* **axis_ndc** normalized axis
.. c:function:: void glm_rotate(mat4 m, float angle, vec3 axis)
rotate existing transform matrix around Z axis by angle and axis
@@ -204,6 +282,29 @@ Functions documentation
| *[in]* **angle** angle (radians)
| *[in]* **axis** axis
.. c:function:: void glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis)
rotate existing transform around given axis by angle at given pivot point (rotation center)
Parameters:
| *[in, out]* **m** affine transfrom
| *[in]* **pivot** pivot, anchor point, rotation center
| *[in]* **angle** angle (radians)
| *[in]* **axis** axis
.. c:function:: void glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis)
| creates NEW rotation matrix by angle and axis at given point
| this creates rotation matrix, it assumes you don't have a matrix
| this should work faster than glm_rotate_at because it reduces one glm_translate.
Parameters:
| *[in, out]* **m** affine transfrom
| *[in]* **pivot** pivot, anchor point, rotation center
| *[in]* **angle** angle (radians)
| *[in]* **axis** axis
.. c:function:: void glm_decompose_scalev(mat4 m, vec3 s)
decompose scale vector

View File

@@ -62,9 +62,9 @@ author = u'Recep Aslantas'
# built documents.
#
# The short X.Y version.
version = u'0.4.1'
version = u'0.4.5'
# The full version, including alpha/beta/rc tags.
release = u'0.4.1'
release = u'0.4.5'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.

View File

@@ -21,17 +21,24 @@ Types:
As you can see types don't store extra informations in favor of space.
You can send these values e.g. matrix to OpenGL directly without casting or calling a function like *value_ptr*
Aligment is Required:
Alignment is Required:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**vec4** and **mat4** requires 16 byte aligment because vec4 and mat4 operations are
**vec4** and **mat4** requires 16 byte alignment because vec4 and mat4 operations are
vectorized by SIMD instructions (SSE/AVX).
**UPDATE:**
By starting v0.4.5 cglm provides an option to disable alignment requirement, it is enabled as default
| Check :doc:`opt` page for more details
Also alignment is disabled for older msvc verisons as default. Now alignment is only required in Visual Studio 2017 version 15.6+ if CGLM_ALL_UNALIGNED macro is not defined.
Allocations:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*cglm* doesn't alloc any memory on heap. So it doesn't provide any allocator.
You must allocate memory yourself. You should alloc memory for out parameters too if you pass pointer of memory location.
When allocating memory don't forget that **vec4** and **mat4** requires aligment.
When allocating memory don't forget that **vec4** and **mat4** requires alignment.
**NOTE:** Unaligned vec4 and unaligned mat4 operations will be supported in the future. Check todo list.
Because you may want to multiply a CGLM matrix with external matrix.

View File

@@ -40,6 +40,8 @@ Also currently only **float** type is supported for most operations.
getting_started
opengl
api
opt
troubleshooting
Indices and tables
==================

42
docs/source/opt.rst Normal file
View File

@@ -0,0 +1,42 @@
.. default-domain:: C
Options
===============================================================================
A few options are provided via macros.
Alignment Option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As default, cglm requires types to be aligned. Alignment requirements:
vec3: 8 byte
vec4: 16 byte
mat4: 16 byte
versor: 16 byte
By starting **v0.4.5** cglm provides an option to disable alignment requirement.
To enable this option define **CGLM_ALL_UNALIGNED** macro before all headers.
You can define it in Xcode, Visual Studio (or other IDEs) or you can also prefer
to define it in build system. If you use pre-compiled verisons then you
have to compile cglm with **CGLM_ALL_UNALIGNED** macro.
**VERY VERY IMPORTANT:** If you use cglm in multiple projects and
those projects are depends on each other, then
| *ALWAYS* or *NEVER USE* **CGLM_ALL_UNALIGNED** macro in linked projects
if you do not know what you are doing. Because a cglm header included
via 'project A' may force types to be aligned and another cglm header
included via 'project B' may not require alignment. In this case
cglm functions will read from and write to **INVALID MEMORY LOCATIONs**.
ALWAYS USE SAME CONFIGURATION / OPTION for **cglm** if you have multiple projects.
For instance if you set CGLM_ALL_UNALIGNED in a project then set it in other projects too
SSE and SSE2 Shuffle Option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**_mm_shuffle_ps** generates **shufps** instruction even if registers are same.
You can force it to generate **pshufd** instruction by defining
**CGLM_USE_INT_DOMAIN** macro. As default it is not defined.

View File

@@ -56,6 +56,9 @@ Functions:
#. :c:func:`glm_quat_for`
#. :c:func:`glm_quat_forp`
#. :c:func:`glm_quat_rotatev`
#. :c:func:`glm_quat_rotate`
#. :c:func:`glm_quat_rotate_at`
#. :c:func:`glm_quat_rotate_atm`
Functions documentation
~~~~~~~~~~~~~~~~~~~~~~~
@@ -354,3 +357,24 @@ Functions documentation
| *[in]* **m** existing transform matrix to rotate
| *[in]* **q** quaternion
| *[out]* **dest** rotated matrix/transform
.. c:function:: void glm_quat_rotate_at(mat4 m, versor q, vec3 pivot)
| rotate existing transform matrix using quaternion at pivot point
Parameters:
| *[in, out]* **m** existing transform matrix to rotate
| *[in]* **q** quaternion
| *[in]* **pivot** pivot
.. c:function:: void glm_quat_rotate(mat4 m, versor q, mat4 dest)
| rotate NEW transform matrix using quaternion at pivot point
| this creates rotation matrix, it assumes you don't have a matrix
| this should work faster than glm_quat_rotate_at because it reduces one glm_translate.
Parameters:
| *[in, out]* **m** existing transform matrix to rotate
| *[in]* **q** quaternion
| *[in]* **pivot** pivot

View File

@@ -0,0 +1,79 @@
.. default-domain:: C
Troubleshooting
================================================================================
It is possible that sometimes you may get crashes or wrong results.
Follow these topics
Memory Allocation:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Again, **cglm** doesn't alloc any memory on heap.
cglm functions works like memcpy; it copies data from src,
makes calculations then copy the result to dest.
You are responsible for allocation of **src** and **dest** parameters.
Alignment:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**vec4** and **mat4** types requires 16 byte alignment.
These types are marked with align attribute to let compiler know about this
requirement.
But since MSVC (Windows) throws the error:
**"formal parameter with requested alignment of 16 won't be aligned"**
The alignment attribute has been commented for MSVC
.. code-block:: c
#if defined(_MSC_VER)
# define CGLM_ALIGN(X) /* __declspec(align(X)) */
#else
# define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif.
So MSVC may not know about alignment requirements when creating variables.
The interesting thing is that, if I remember correctly Visual Studio 2017
doesn't throw the above error. So we may uncomment that line for Visual Studio 2017,
you may do it yourself.
**This MSVC issue is still in TODOs.**
**UPDATE:** By starting v0.4.5 cglm provides an option to disable alignment requirement.
Also alignment is disabled for older msvc verisons as default. Now alignment is only required in Visual Studio 2017 version 15.6+ if CGLM_ALL_UNALIGNED macro is not defined.
Crashes, Invalid Memory Access:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Probably you are trying to write to invalid memory location.
You may used wrong function for what you want to do.
For instance you may called **glm_vec4_** functions for **vec3** data type.
It will try to write 32 byte but since **vec3** is 24 byte it should throw
memory access error or exit the app without saying anything.
Wrong Results:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Again, you may used wrong function.
For instance if you use **glm_normalize()** or **glm_vec_normalize()** for **vec4**,
it will assume that passed param is **vec3** and will normalize it for **vec3**.
Since you need to **vec4** to be normalized in your case, you will get wrong results.
Accessing vec4 type with vec3 functions is valid, you will not get any error, exception or crash.
You only get wrong results if you don't know what you are doing!
So be carefull, when your IDE (Xcode, Visual Studio ...) tried to autocomplete function names, READ IT :)
**Also implementation may be wrong please let us know by creating an issue on Github.**
Other Issues?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**Please let us know by creating an issue on Github.**

View File

@@ -16,6 +16,7 @@
#include "common.h"
#include "mat4.h"
#include "mat3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/affine.h"
@@ -81,6 +82,59 @@ glm_mul(mat4 m1, mat4 m2, mat4 dest) {
#endif
}
/*!
* @brief this is similar to glm_mat4_mul but specialized to affine transform
*
* Right Matrix format should be:
* R R R 0
* R R R 0
* R R R 0
* 0 0 0 1
*
* this reduces some multiplications. It should be faster than mat4_mul.
* if you are not sure about matrix format then DON'T use this! use mat4_mul
*
* @param[in] m1 affine matrix 1
* @param[in] m2 affine matrix 2
* @param[out] dest result matrix
*/
CGLM_INLINE
void
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mul_rot_sse2(m1, m2, dest);
#else
float a00 = m1[0][0], a01 = m1[0][1], a02 = m1[0][2], a03 = m1[0][3],
a10 = m1[1][0], a11 = m1[1][1], a12 = m1[1][2], a13 = m1[1][3],
a20 = m1[2][0], a21 = m1[2][1], a22 = m1[2][2], a23 = m1[2][3],
a30 = m1[3][0], a31 = m1[3][1], a32 = m1[3][2], a33 = m1[3][3],
b00 = m2[0][0], b01 = m2[0][1], b02 = m2[0][2],
b10 = m2[1][0], b11 = m2[1][1], b12 = m2[1][2],
b20 = m2[2][0], b21 = m2[2][1], b22 = m2[2][2];
dest[0][0] = a00 * b00 + a10 * b01 + a20 * b02;
dest[0][1] = a01 * b00 + a11 * b01 + a21 * b02;
dest[0][2] = a02 * b00 + a12 * b01 + a22 * b02;
dest[0][3] = a03 * b00 + a13 * b01 + a23 * b02;
dest[1][0] = a00 * b10 + a10 * b11 + a20 * b12;
dest[1][1] = a01 * b10 + a11 * b11 + a21 * b12;
dest[1][2] = a02 * b10 + a12 * b11 + a22 * b12;
dest[1][3] = a03 * b10 + a13 * b11 + a23 * b12;
dest[2][0] = a00 * b20 + a10 * b21 + a20 * b22;
dest[2][1] = a01 * b20 + a11 * b21 + a21 * b22;
dest[2][2] = a02 * b20 + a12 * b21 + a22 * b22;
dest[2][3] = a03 * b20 + a13 * b21 + a23 * b22;
dest[3][0] = a30;
dest[3][1] = a31;
dest[3][2] = a32;
dest[3][3] = a33;
#endif
}
/*!
* @brief inverse orthonormal rotation + translation matrix (ridig-body)
*

View File

@@ -16,15 +16,14 @@
CGLM_INLINE void glm_scale_to(mat4 m, vec3 v, mat4 dest);
CGLM_INLINE void glm_scale_make(mat4 m, vec3 v);
CGLM_INLINE void glm_scale(mat4 m, vec3 v);
CGLM_INLINE void glm_scale1(mat4 m, float s);
CGLM_INLINE void glm_scale_uni(mat4 m, float s);
CGLM_INLINE void glm_rotate_x(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_y(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_z(mat4 m, float angle, mat4 dest);
CGLM_INLINE void glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc);
CGLM_INLINE void glm_rotate_make(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_ndc(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate(mat4 m, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_INLINE void glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_INLINE void glm_decompose_scalev(mat4 m, vec3 s);
CGLM_INLINE bool glm_uniscaled(mat4 m);
CGLM_INLINE void glm_decompose_rs(mat4 m, mat4 r, vec3 s);
@@ -35,9 +34,15 @@
#define cglm_affine_h
#include "common.h"
#include "vec4.h"
#include "affine-mat.h"
#include "util.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#include "affine-mat.h"
CGLM_INLINE
void
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest);
/*!
* @brief translate existing transform matrix by v vector
@@ -53,19 +58,19 @@ glm_translate_to(mat4 m, vec3 v, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT;
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[2]),
_mm_set1_ps(v[2])),
_mm_load_ps(t[3]))))
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(t[2]),
_mm_set1_ps(v[2])),
glmm_load(t[3]))))
;
_mm_store_ps(dest[0], _mm_load_ps(m[0]));
_mm_store_ps(dest[1], _mm_load_ps(m[1]));
_mm_store_ps(dest[2], _mm_load_ps(m[2]));
glmm_store(dest[0], glmm_load(m[0]));
glmm_store(dest[1], glmm_load(m[1]));
glmm_store(dest[2], glmm_load(m[2]));
#else
vec4 v1, v2, v3;
@@ -92,14 +97,14 @@ CGLM_INLINE
void
glm_translate(mat4 m, vec3 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_set1_ps(v[2])),
_mm_load_ps(m[3]))))
glmm_store(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(v[2])),
glmm_load(m[3]))))
;
#else
vec4 v1, v2, v3;
@@ -124,10 +129,10 @@ CGLM_INLINE
void
glm_translate_x(mat4 m, float x) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_set1_ps(x)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(x)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -146,10 +151,10 @@ CGLM_INLINE
void
glm_translate_y(mat4 m, float y) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[1]),
_mm_set1_ps(y)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(y)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -168,10 +173,10 @@ CGLM_INLINE
void
glm_translate_z(mat4 m, float z) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_set1_ps(z)),
_mm_load_ps(m[3])))
glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(z)),
glmm_load(m[3])))
;
#else
vec4 v1;
@@ -237,16 +242,6 @@ glm_scale(mat4 m, vec3 v) {
glm_scale_to(m, v, m);
}
/*!
* @brief DEPRECATED! Use glm_scale_uni
*/
CGLM_INLINE
void
glm_scale1(mat4 m, float s) {
vec3 v = { s, s, s };
glm_scale_to(m, v, m);
}
/*!
* @brief applies uniform scale to existing transform matrix v = [s, s, s]
* and stores result in same matrix
@@ -272,19 +267,18 @@ glm_scale_uni(mat4 m, float s) {
CGLM_INLINE
void
glm_rotate_x(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s;
cosVal = cosf(angle);
sinVal = sinf(angle);
c = cosf(angle);
s = sinf(angle);
t[1][1] = cosVal;
t[1][2] = sinVal;
t[2][1] = -sinVal;
t[2][2] = cosVal;
t[1][1] = c;
t[1][2] = s;
t[2][1] = -s;
t[2][2] = c;
glm_mat4_mul(m, t, dest);
glm_mul_rot(m, t, dest);
}
/*!
@@ -298,19 +292,18 @@ glm_rotate_x(mat4 m, float angle, mat4 dest) {
CGLM_INLINE
void
glm_rotate_y(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s;
cosVal = cosf(angle);
sinVal = sinf(angle);
c = cosf(angle);
s = sinf(angle);
t[0][0] = cosVal;
t[0][2] = -sinVal;
t[2][0] = sinVal;
t[2][2] = cosVal;
t[0][0] = c;
t[0][2] = -s;
t[2][0] = s;
t[2][2] = c;
glm_mat4_mul(m, t, dest);
glm_mul_rot(m, t, dest);
}
/*!
@@ -324,61 +317,18 @@ glm_rotate_y(mat4 m, float angle, mat4 dest) {
CGLM_INLINE
void
glm_rotate_z(mat4 m, float angle, mat4 dest) {
float cosVal;
float sinVal;
mat4 t = GLM_MAT4_IDENTITY_INIT;
cosVal = cosf(angle);
sinVal = sinf(angle);
t[0][0] = cosVal;
t[0][1] = sinVal;
t[1][0] = -sinVal;
t[1][1] = cosVal;
glm_mat4_mul(m, t, dest);
}
/*!
* @brief creates NEW rotation matrix by angle and axis
*
* this name may change in the future. axis must be is normalized
*
* @param[out] m affine transfrom
* @param[in] angle angle (radians)
* @param[in] axis_ndc normalized axis
*/
CGLM_INLINE
void
glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc) {
/* https://www.opengl.org/sdk/docs/man2/xhtml/glRotate.xml */
vec3 v, vs;
float c;
float c, s;
c = cosf(angle);
s = sinf(angle);
glm_vec_scale(axis_ndc, 1.0f - c, v);
glm_vec_scale(axis_ndc, sinf(angle), vs);
t[0][0] = c;
t[0][1] = s;
t[1][0] = -s;
t[1][1] = c;
glm_vec_scale(axis_ndc, v[0], m[0]);
glm_vec_scale(axis_ndc, v[1], m[1]);
glm_vec_scale(axis_ndc, v[2], m[2]);
m[0][0] += c;
m[0][1] += vs[2];
m[0][2] -= vs[1];
m[1][0] -= vs[2];
m[1][1] += c;
m[1][2] += vs[0];
m[2][0] += vs[1];
m[2][1] -= vs[0];
m[2][2] += c;
m[0][3] = m[1][3] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
m[3][3] = 1.0f;
glm_mul_rot(m, t, dest);
}
/*!
@@ -393,53 +343,29 @@ glm_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc) {
CGLM_INLINE
void
glm_rotate_make(mat4 m, float angle, vec3 axis) {
vec3 axis_ndc;
vec3 axisn, v, vs;
float c;
glm_vec_normalize_to(axis, axis_ndc);
glm_rotate_ndc_make(m, angle, axis_ndc);
c = cosf(angle);
glm_vec_normalize_to(axis, axisn);
glm_vec_scale(axisn, 1.0f - c, v);
glm_vec_scale(axisn, sinf(angle), vs);
glm_vec_scale(axisn, v[0], m[0]);
glm_vec_scale(axisn, v[1], m[1]);
glm_vec_scale(axisn, v[2], m[2]);
m[0][0] += c; m[1][0] -= vs[2]; m[2][0] += vs[1];
m[0][1] += vs[2]; m[1][1] += c; m[2][1] -= vs[0];
m[0][2] -= vs[1]; m[1][2] += vs[0]; m[2][2] += c;
m[0][3] = m[1][3] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
m[3][3] = 1.0f;
}
/*!
* @brief rotate existing transform matrix around Z axis by angle and axis
*
* this name may change in the future, axis must be normalized.
*
* @param[in, out] m affine transfrom
* @param[in] angle angle (radians)
* @param[in] axis_ndc normalized axis
*/
CGLM_INLINE
void
glm_rotate_ndc(mat4 m, float angle, vec3 axis_ndc) {
mat4 rot, tmp;
glm_rotate_ndc_make(rot, angle, axis_ndc);
glm_vec4_scale(m[0], rot[0][0], tmp[1]);
glm_vec4_scale(m[1], rot[0][1], tmp[0]);
glm_vec4_add(tmp[1], tmp[0], tmp[1]);
glm_vec4_scale(m[2], rot[0][2], tmp[0]);
glm_vec4_add(tmp[1], tmp[0], tmp[1]);
glm_vec4_scale(m[0], rot[1][0], tmp[2]);
glm_vec4_scale(m[1], rot[1][1], tmp[0]);
glm_vec4_add(tmp[2], tmp[0], tmp[2]);
glm_vec4_scale(m[2], rot[1][2], tmp[0]);
glm_vec4_add(tmp[2], tmp[0], tmp[2]);
glm_vec4_scale(m[0], rot[2][0], tmp[3]);
glm_vec4_scale(m[1], rot[2][1], tmp[0]);
glm_vec4_add(tmp[3], tmp[0], tmp[3]);
glm_vec4_scale(m[2], rot[2][2], tmp[0]);
glm_vec4_add(tmp[3], tmp[0], tmp[3]);
glm_vec4_copy(tmp[1], m[0]);
glm_vec4_copy(tmp[2], m[1]);
glm_vec4_copy(tmp[3], m[2]);
}
/*!
* @brief rotate existing transform matrix around Z axis by angle and axis
* @brief rotate existing transform matrix around given axis by angle
*
* @param[in, out] m affine transfrom
* @param[in] angle angle (radians)
@@ -448,10 +374,56 @@ glm_rotate_ndc(mat4 m, float angle, vec3 axis_ndc) {
CGLM_INLINE
void
glm_rotate(mat4 m, float angle, vec3 axis) {
vec3 axis_ndc;
mat4 rot;
glm_rotate_make(rot, angle, axis);
glm_mul_rot(m, rot, m);
}
glm_vec_normalize_to(axis, axis_ndc);
glm_rotate_ndc(m, angle, axis_ndc);
/*!
* @brief rotate existing transform
* around given axis by angle at given pivot point (rotation center)
*
* @param[in, out] m affine transfrom
* @param[in] pivot rotation center
* @param[in] angle angle (radians)
* @param[in] axis axis
*/
CGLM_INLINE
void
glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_translate(m, pivot);
glm_rotate(m, angle, axis);
glm_translate(m, pivotInv);
}
/*!
* @brief creates NEW rotation matrix by angle and axis at given point
*
* this creates rotation matrix, it assumes you don't have a matrix
*
* this should work faster than glm_rotate_at because it reduces
* one glm_translate.
*
* @param[out] m affine transfrom
* @param[in] pivot rotation center
* @param[in] angle angle (radians)
* @param[in] axis axis
*/
CGLM_INLINE
void
glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m);
glm_vec_copy(pivot, m[3]);
glm_rotate(m, angle, axis);
glm_translate(m, pivotInv);
}
/*!
@@ -469,7 +441,7 @@ glm_decompose_scalev(mat4 m, vec3 s) {
}
/*!
* @brief returns true if matrix is uniform scaled. This is helpful for
* @brief returns true if matrix is uniform scaled. This is helpful for
* creating normal matrix.
*
* @param[in] m m

View File

@@ -11,6 +11,7 @@
#include "common.h"
#include "vec3.h"
#include "vec4.h"
#include "util.h"
/*!
* @brief apply transform to Axis-Aligned Bounding Box

View File

@@ -13,6 +13,10 @@ extern "C" {
#include "../cglm.h"
CGLM_EXPORT
void
glmc_translate_make(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_translate_to(mat4 m, vec3 v, mat4 dest);
@@ -33,6 +37,10 @@ CGLM_EXPORT
void
glmc_translate_z(mat4 m, float to);
CGLM_EXPORT
void
glmc_scale_make(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_scale_to(mat4 m, vec3 v, mat4 dest);
@@ -43,7 +51,7 @@ glmc_scale(mat4 m, vec3 v);
CGLM_EXPORT
void
glmc_scale1(mat4 m, float s);
glmc_scale_uni(mat4 m, float s);
CGLM_EXPORT
void
@@ -57,26 +65,30 @@ CGLM_EXPORT
void
glmc_rotate_z(mat4 m, float rad, mat4 dest);
CGLM_EXPORT
void
glmc_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc);
CGLM_EXPORT
void
glmc_rotate_make(mat4 m, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_ndc(mat4 m, float angle, vec3 axis_ndc);
CGLM_EXPORT
void
glmc_rotate(mat4 m, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis);
CGLM_EXPORT
void
glmc_decompose_scalev(mat4 m, vec3 s);
CGLM_EXPORT
bool
glmc_uniscaled(mat4 m);
CGLM_EXPORT
void
glmc_decompose_rs(mat4 m, mat4 r, vec3 s);

View File

@@ -137,6 +137,14 @@ CGLM_EXPORT
void
glmc_quat_rotate(mat4 m, versor q, mat4 dest);
CGLM_EXPORT
void
glmc_quat_rotate_at(mat4 model, versor q, vec3 pivot);
CGLM_EXPORT
void
glmc_quat_rotate_atm(mat4 m, versor q, vec3 pivot);
#ifdef __cplusplus
}
#endif

View File

@@ -14,7 +14,7 @@
#include <math.h>
#include <float.h>
#if defined(_WIN32)
#if defined(_MSC_VER)
# ifdef CGLM_DLL
# define CGLM_EXPORT __declspec(dllexport)
# else

View File

@@ -10,6 +10,9 @@
#include "common.h"
#include "plane.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#define GLM_LBN 0 /* left bottom near */
#define GLM_LTN 1 /* left top near */

View File

@@ -31,6 +31,7 @@
#define cglm_mat3_h
#include "common.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/mat3.h"

View File

@@ -45,7 +45,8 @@
#define cglm_mat_h
#include "common.h"
#include "quat.h"
#include "vec4.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/mat4.h"
@@ -109,13 +110,13 @@ CGLM_INLINE
void
glm_mat4_copy(mat4 mat, mat4 dest) {
#ifdef __AVX__
_mm256_store_ps(dest[0], _mm256_load_ps(mat[0]));
_mm256_store_ps(dest[2], _mm256_load_ps(mat[2]));
glmm_store256(dest[0], glmm_load256(mat[0]));
glmm_store256(dest[2], glmm_load256(mat[2]));
#elif defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[0], _mm_load_ps(mat[0]));
_mm_store_ps(dest[1], _mm_load_ps(mat[1]));
_mm_store_ps(dest[2], _mm_load_ps(mat[2]));
_mm_store_ps(dest[3], _mm_load_ps(mat[3]));
glmm_store(dest[0], glmm_load(mat[0]));
glmm_store(dest[1], glmm_load(mat[1]));
glmm_store(dest[2], glmm_load(mat[2]));
glmm_store(dest[3], glmm_load(mat[3]));
#else
glm_mat4_ucopy(mat, dest);
#endif

View File

@@ -9,9 +9,7 @@
#define cglm_plane_h
#include "common.h"
#include "mat4.h"
#include "vec4.h"
#include "vec3.h"
/*
Plane equation: Ax + By + Cz + D = 0;

View File

@@ -8,9 +8,9 @@
#ifndef cglm_project_h
#define cglm_project_h
#include "mat4.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
/*!
* @brief maps the specified viewport coordinates into specified space [1]

View File

@@ -52,21 +52,31 @@
#define cglm_quat_h
#include "common.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#include "mat3.h"
#include "affine-mat.h"
#ifdef CGLM_SSE_FP
# include "simd/sse2/quat.h"
#endif
CGLM_INLINE
void
glm_mat4_identity(mat4 mat);
CGLM_INLINE
void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
CGLM_INLINE
void
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest);
glm_mul_rot(mat4 m1, mat4 m2, mat4 dest);
CGLM_INLINE
void
glm_translate(mat4 m, vec3 v);
/*
* IMPORTANT:
@@ -188,8 +198,8 @@ glm_quat_normalize_to(versor q, versor dest) {
__m128 xdot, x0;
float dot;
x0 = _mm_load_ps(q);
xdot = glm_simd_dot(x0, x0);
x0 = glmm_load(q);
xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot);
if (dot <= 0.0f) {
@@ -197,7 +207,7 @@ glm_quat_normalize_to(versor q, versor dest) {
return;
}
_mm_store_ps(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else
float dot;
@@ -737,7 +747,51 @@ void
glm_quat_rotate(mat4 m, versor q, mat4 dest) {
mat4 rot;
glm_quat_mat4(q, rot);
glm_mat4_mul(m, rot, dest);
glm_mul_rot(m, rot, dest);
}
/*!
* @brief rotate existing transform matrix using quaternion at pivot point
*
* @param[in, out] m existing transform matrix
* @param[in] q quaternion
* @param[out] pivot pivot
*/
CGLM_INLINE
void
glm_quat_rotate_at(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_translate(m, pivot);
glm_quat_rotate(m, q, m);
glm_translate(m, pivotInv);
}
/*!
* @brief rotate NEW transform matrix using quaternion at pivot point
*
* this creates rotation matrix, it assumes you don't have a matrix
*
* this should work faster than glm_quat_rotate_at because it reduces
* one glm_translate.
*
* @param[out] m existing transform matrix
* @param[in] q quaternion
* @param[in] pivot pivot
*/
CGLM_INLINE
void
glm_quat_rotate_atm(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m);
glm_vec_copy(pivot, m[3]);
glm_quat_rotate(m, q, m);
glm_translate(m, pivotInv);
}
#endif /* cglm_quat_h */

View File

@@ -21,11 +21,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */
@@ -37,10 +37,10 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
_mm256_store_ps(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7)));
glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7)));
/* n n n n i i i i */
@@ -52,11 +52,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
}
#endif

View File

@@ -21,11 +21,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */
y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */
y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */
@@ -39,11 +39,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
/* n n n n i i i i */
/* p p p p k k k k */
@@ -54,11 +54,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9))));
}
#endif

View File

@@ -8,11 +8,19 @@
#ifndef cglm_intrin_h
#define cglm_intrin_h
#if defined( _WIN32 )
#if defined( _MSC_VER )
# if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2
# define __SSE2__
# ifndef __SSE2__
# define __SSE2__
# endif
# elif _M_IX86_FP == 1
# define __SSE__
# ifndef __SSE__
# define __SSE__
# endif
# endif
/* do not use alignment for older visual studio versions */
# if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# endif
#endif
@@ -20,32 +28,63 @@
# include <xmmintrin.h>
# include <emmintrin.h>
/* float */
# define _mm_shuffle1_ps(a, z, y, x, w) \
_mm_shuffle_ps(a, a, _MM_SHUFFLE(z, y, x, w))
/* OPTIONAL: You may save some instructions but latency (not sure) */
#ifdef CGLM_USE_INT_DOMAIN
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
_MM_SHUFFLE(z, y, x, w)))
#else
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w))
#endif
# define _mm_shuffle1_ps1(a, x) \
_mm_shuffle_ps(a, a, _MM_SHUFFLE(x, x, x, x))
#define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x)
#define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \
glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
z1, y1, x1, w1)
# define _mm_shuffle2_ps(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \
_mm_shuffle1_ps(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
z1, y1, x1, w1)
CGLM_INLINE
static inline
__m128
glm_simd_dot(__m128 a, __m128 b) {
glmm_dot(__m128 a, __m128 b) {
__m128 x0;
x0 = _mm_mul_ps(a, b);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1));
}
CGLM_INLINE
static inline
__m128
glm_simd_norm(__m128 a) {
return _mm_sqrt_ps(glm_simd_dot(a, a));
glmm_norm(__m128 a) {
return _mm_sqrt_ps(glmm_dot(a, a));
}
static inline
__m128
glmm_load3(float v[3]) {
__m128i xy;
__m128 z;
xy = _mm_loadl_epi64((const __m128i *)v);
z = _mm_load_ss(&v[2]);
return _mm_movelh_ps(_mm_castsi128_ps(xy), z);
}
static inline
void
glmm_store3(__m128 vx, float v[3]) {
_mm_storel_pi((__m64 *)&v[0], vx);
_mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2));
}
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load(p) _mm_loadu_ps(p)
# define glmm_store(p, a) _mm_storeu_ps(p, a)
#else
# define glmm_load(p) _mm_load_ps(p)
# define glmm_store(p, a) _mm_store_ps(p, a)
#endif
#endif
/* x86, x64 */
@@ -55,6 +94,15 @@ glm_simd_norm(__m128 a) {
#ifdef __AVX__
# define CGLM_AVX_FP 1
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load256(p) _mm256_loadu_ps(p)
# define glmm_store256(p, a) _mm256_storeu_ps(p, a)
#else
# define glmm_load256(p) _mm256_load_ps(p)
# define glmm_store256(p, a) _mm256_store_ps(p, a)
#endif
#endif
/* ARM Neon */

View File

@@ -18,35 +18,67 @@ glm_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]);
l1 = _mm_load_ps(m1[1]);
l2 = _mm_load_ps(m1[2]);
l3 = _mm_load_ps(m1[3]);
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]);
_mm_store_ps(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[1]);
_mm_store_ps(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[2]);
_mm_store_ps(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2)));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[3]);
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[3]);
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(glmm_shuff1x(r, 3), l3))));
}
CGLM_INLINE
void
glm_mul_rot_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r;
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(glmm_shuff1x(r, 2), l2)));
glmm_store(dest[3], l3);
}
CGLM_INLINE
@@ -54,25 +86,25 @@ void
glm_inv_tr_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1;
r0 = _mm_load_ps(mat[0]);
r1 = _mm_load_ps(mat[1]);
r2 = _mm_load_ps(mat[2]);
r3 = _mm_load_ps(mat[3]);
x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
r0 = glmm_load(mat[0]);
r1 = glmm_load(mat[1]);
r2 = glmm_load(mat[2]);
r3 = glmm_load(mat[3]);
x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
_MM_TRANSPOSE4_PS(r0, r1, r2, x1);
x0 = _mm_add_ps(_mm_mul_ps(r0, _mm_shuffle1_ps(r3, 0, 0, 0, 0)),
_mm_mul_ps(r1, _mm_shuffle1_ps(r3, 1, 1, 1, 1)));
x0 = _mm_add_ps(x0, _mm_mul_ps(r2, _mm_shuffle1_ps(r3, 2, 2, 2, 2)));
x0 = _mm_add_ps(_mm_mul_ps(r0, glmm_shuff1(r3, 0, 0, 0, 0)),
_mm_mul_ps(r1, glmm_shuff1(r3, 1, 1, 1, 1)));
x0 = _mm_add_ps(x0, _mm_mul_ps(r2, glmm_shuff1(r3, 2, 2, 2, 2)));
x0 = _mm_xor_ps(x0, _mm_set1_ps(-0.f));
x0 = _mm_add_ps(x0, x1);
_mm_store_ps(mat[0], r0);
_mm_store_ps(mat[1], r1);
_mm_store_ps(mat[2], r2);
_mm_store_ps(mat[3], x0);
glmm_store(mat[0], r0);
glmm_store(mat[1], r1);
glmm_store(mat[2], r2);
glmm_store(mat[3], x0);
}
#endif

View File

@@ -27,27 +27,25 @@ glm_mat3_mul_sse2(mat3 m1, mat3 m2, mat3 dest) {
r1 = _mm_loadu_ps(&m2[1][1]);
r2 = _mm_set1_ps(m2[2][2]);
x1 = _mm_shuffle2_ps(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0);
x2 = _mm_shuffle2_ps(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0);
x1 = glmm_shuff2(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0);
x2 = glmm_shuff2(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0);
x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 0, 2, 1, 0),
_mm_shuffle1_ps(r0, 3, 0, 0, 0)),
_mm_mul_ps(x1,
_mm_shuffle2_ps(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0)));
x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 0, 2, 1, 0),
glmm_shuff1(r0, 3, 0, 0, 0)),
_mm_mul_ps(x1, glmm_shuff2(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0)));
x0 = _mm_add_ps(x0,
_mm_mul_ps(x2,
_mm_shuffle2_ps(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0)));
_mm_mul_ps(x2, glmm_shuff2(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0)));
_mm_storeu_ps(dest[0], x0);
x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 1, 0, 2, 1),
x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 1, 0, 2, 1),
_mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3))),
_mm_mul_ps(_mm_shuffle1_ps(x1, 1, 0, 2, 1),
_mm_shuffle1_ps(r1, 3, 3, 0, 0)));
_mm_mul_ps(glmm_shuff1(x1, 1, 0, 2, 1),
glmm_shuff1(r1, 3, 3, 0, 0)));
x0 = _mm_add_ps(x0,
_mm_mul_ps(_mm_shuffle1_ps(x2, 1, 0, 2, 1),
_mm_mul_ps(glmm_shuff1(x2, 1, 0, 2, 1),
_mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1))));
_mm_storeu_ps(&dest[1][1], x0);

View File

@@ -20,10 +20,10 @@ glm_mat4_scale_sse2(mat4 m, float s){
__m128 x0;
x0 = _mm_set1_ps(s);
_mm_store_ps(m[0], _mm_mul_ps(_mm_load_ps(m[0]), x0));
_mm_store_ps(m[1], _mm_mul_ps(_mm_load_ps(m[1]), x0));
_mm_store_ps(m[2], _mm_mul_ps(_mm_load_ps(m[2]), x0));
_mm_store_ps(m[3], _mm_mul_ps(_mm_load_ps(m[3]), x0));
glmm_store(m[0], _mm_mul_ps(glmm_load(m[0]), x0));
glmm_store(m[1], _mm_mul_ps(glmm_load(m[1]), x0));
glmm_store(m[2], _mm_mul_ps(glmm_load(m[2]), x0));
glmm_store(m[3], _mm_mul_ps(glmm_load(m[3]), x0));
}
CGLM_INLINE
@@ -31,17 +31,17 @@ void
glm_mat4_transp_sse2(mat4 m, mat4 dest){
__m128 r0, r1, r2, r3;
r0 = _mm_load_ps(m[0]);
r1 = _mm_load_ps(m[1]);
r2 = _mm_load_ps(m[2]);
r3 = _mm_load_ps(m[3]);
r0 = glmm_load(m[0]);
r1 = glmm_load(m[1]);
r2 = glmm_load(m[2]);
r3 = glmm_load(m[3]);
_MM_TRANSPOSE4_PS(r0, r1, r2, r3);
_mm_store_ps(dest[0], r0);
_mm_store_ps(dest[1], r1);
_mm_store_ps(dest[2], r2);
_mm_store_ps(dest[3], r3);
glmm_store(dest[0], r0);
glmm_store(dest[1], r1);
glmm_store(dest[2], r2);
glmm_store(dest[3], r3);
}
CGLM_INLINE
@@ -51,36 +51,36 @@ glm_mat4_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
__m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]);
l1 = _mm_load_ps(m1[1]);
l2 = _mm_load_ps(m1[2]);
l3 = _mm_load_ps(m1[3]);
l0 = glmm_load(m1[0]);
l1 = glmm_load(m1[1]);
l2 = glmm_load(m1[2]);
l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]);
_mm_store_ps(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = _mm_load_ps(m2[1]);
_mm_store_ps(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = _mm_load_ps(m2[2]);
_mm_store_ps(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[0]);
glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = glmm_load(m2[1]);
glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = glmm_load(m2[2]);
glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = _mm_load_ps(m2[3]);
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3))));
r = glmm_load(m2[3]);
glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(glmm_shuff1x(r, 3), l3))));
}
CGLM_INLINE
@@ -88,18 +88,14 @@ void
glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) {
__m128 x0, x1, x2;
x0 = _mm_load_ps(v);
x1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]),
_mm_shuffle1_ps1(x0, 0)),
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_shuffle1_ps1(x0, 1)));
x0 = glmm_load(v);
x1 = _mm_add_ps(_mm_mul_ps(glmm_load(m[0]), glmm_shuff1x(x0, 0)),
_mm_mul_ps(glmm_load(m[1]), glmm_shuff1x(x0, 1)));
x2 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]),
_mm_shuffle1_ps1(x0, 2)),
_mm_mul_ps(_mm_load_ps(m[3]),
_mm_shuffle1_ps1(x0, 3)));
x2 = _mm_add_ps(_mm_mul_ps(glmm_load(m[2]), glmm_shuff1x(x0, 2)),
_mm_mul_ps(glmm_load(m[3]), glmm_shuff1x(x0, 3)));
_mm_store_ps(dest, _mm_add_ps(x1, x2));
glmm_store(dest, _mm_add_ps(x1, x2));
}
CGLM_INLINE
@@ -108,10 +104,10 @@ glm_mat4_det_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1, x2;
/* 127 <- 0, [square] det(A) = det(At) */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
/*
t[1] = j * p - n * l;
@@ -119,20 +115,20 @@ glm_mat4_det_sse2(mat4 mat) {
t[3] = i * p - m * l;
t[4] = i * o - m * k;
*/
x0 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r2, 0, 0, 1, 1),
_mm_shuffle1_ps(r3, 2, 3, 2, 3)),
_mm_mul_ps(_mm_shuffle1_ps(r3, 0, 0, 1, 1),
_mm_shuffle1_ps(r2, 2, 3, 2, 3)));
x0 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r2, 0, 0, 1, 1),
glmm_shuff1(r3, 2, 3, 2, 3)),
_mm_mul_ps(glmm_shuff1(r3, 0, 0, 1, 1),
glmm_shuff1(r2, 2, 3, 2, 3)));
/*
t[0] = k * p - o * l;
t[0] = k * p - o * l;
t[5] = i * n - m * j;
t[5] = i * n - m * j;
*/
x1 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r2, 0, 0, 2, 2),
_mm_shuffle1_ps(r3, 1, 1, 3, 3)),
_mm_mul_ps(_mm_shuffle1_ps(r3, 0, 0, 2, 2),
_mm_shuffle1_ps(r2, 1, 1, 3, 3)));
x1 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r2, 0, 0, 2, 2),
glmm_shuff1(r3, 1, 1, 3, 3)),
_mm_mul_ps(glmm_shuff1(r3, 0, 0, 2, 2),
glmm_shuff1(r2, 1, 1, 3, 3)));
/*
a * (f * t[0] - g * t[1] + h * t[2])
@@ -140,19 +136,19 @@ glm_mat4_det_sse2(mat4 mat) {
+ c * (e * t[1] - f * t[3] + h * t[5])
- d * (e * t[2] - f * t[4] + g * t[5])
*/
x2 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r1, 0, 0, 0, 1),
x2 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r1, 0, 0, 0, 1),
_mm_shuffle_ps(x1, x0, _MM_SHUFFLE(1, 0, 0, 0))),
_mm_mul_ps(_mm_shuffle1_ps(r1, 1, 1, 2, 2),
_mm_shuffle1_ps(x0, 3, 2, 2, 0)));
_mm_mul_ps(glmm_shuff1(r1, 1, 1, 2, 2),
glmm_shuff1(x0, 3, 2, 2, 0)));
x2 = _mm_add_ps(x2,
_mm_mul_ps(_mm_shuffle1_ps(r1, 2, 3, 3, 3),
_mm_mul_ps(glmm_shuff1(r1, 2, 3, 3, 3),
_mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 2, 3, 1))));
x2 = _mm_xor_ps(x2, _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
x0 = _mm_mul_ps(r0, x2);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 3, 3, 1));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 3, 3, 1));
return _mm_cvtss_f32(x0);
}
@@ -166,14 +162,14 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */
x2 = _mm_shuffle1_ps(x0, 0, 2, 2, 2); /* k o o o */
x1 = glmm_shuff1(x0, 1, 3, 3, 3); /* l p p p */
x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */
x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */
x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */
@@ -184,7 +180,7 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0));
x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */
x4 = _mm_shuffle1_ps(x4, 0, 2, 2, 2); /* j n n n */
x4 = glmm_shuff1(x4, 0, 2, 2, 2); /* j n n n */
x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */
/* t1[1] = j * p - n * l;
@@ -200,7 +196,7 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3));
x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */
x7 = _mm_shuffle2_ps(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
x7 = glmm_shuff2(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
/* t1[3] = i * p - m * l;
t1[3] = i * p - m * l;
@@ -220,10 +216,10 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t3[5] = e * j - i * f; */
t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5));
x0 = _mm_shuffle2_ps(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = _mm_shuffle2_ps(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = _mm_shuffle2_ps(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = _mm_shuffle2_ps(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
x0 = glmm_shuff2(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = glmm_shuff2(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = glmm_shuff2(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = glmm_shuff2(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
/*
dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2];
@@ -271,14 +267,14 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0));
x0 = _mm_mul_ps(x0, r0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 0, 1));
x0 = _mm_rcp_ps(x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0));
glmm_store(dest[0], _mm_mul_ps(v0, x0));
glmm_store(dest[1], _mm_mul_ps(v1, x0));
glmm_store(dest[2], _mm_mul_ps(v2, x0));
glmm_store(dest[3], _mm_mul_ps(v3, x0));
}
CGLM_INLINE
@@ -290,14 +286,14 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */
r0 = glmm_load(mat[0]); /* d c b a */
r1 = glmm_load(mat[1]); /* h g f e */
r2 = glmm_load(mat[2]); /* l k j i */
r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */
x2 = _mm_shuffle1_ps(x0, 0, 2, 2, 2); /* k o o o */
x1 = glmm_shuff1(x0, 1, 3, 3, 3); /* l p p p */
x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */
x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */
x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */
@@ -308,7 +304,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0));
x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */
x4 = _mm_shuffle1_ps(x4, 0, 2, 2, 2); /* j n n n */
x4 = glmm_shuff1(x4, 0, 2, 2, 2); /* j n n n */
x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */
/* t1[1] = j * p - n * l;
@@ -324,7 +320,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3));
x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */
x7 = _mm_shuffle2_ps(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
x7 = glmm_shuff2(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
/* t1[3] = i * p - m * l;
t1[3] = i * p - m * l;
@@ -344,10 +340,10 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t3[5] = e * j - i * f; */
t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5));
x0 = _mm_shuffle2_ps(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = _mm_shuffle2_ps(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = _mm_shuffle2_ps(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = _mm_shuffle2_ps(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
x0 = glmm_shuff2(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = glmm_shuff2(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = glmm_shuff2(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = glmm_shuff2(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
/*
dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2];
@@ -395,14 +391,14 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0));
x0 = _mm_mul_ps(x0, r0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 0, 1));
x0 = _mm_div_ps(_mm_set1_ps(1.0f), x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0));
glmm_store(dest[0], _mm_mul_ps(v0, x0));
glmm_store(dest[1], _mm_mul_ps(v1, x0));
glmm_store(dest[2], _mm_mul_ps(v2, x0));
glmm_store(dest[3], _mm_mul_ps(v3, x0));
}
#endif

View File

@@ -24,21 +24,21 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
__m128 xp, xq, x0, r;
xp = _mm_load_ps(p); /* 3 2 1 0 */
xq = _mm_load_ps(q);
xp = glmm_load(p); /* 3 2 1 0 */
xq = glmm_load(q);
r = _mm_mul_ps(_mm_shuffle1_ps1(xp, 3), xq);
r = _mm_mul_ps(glmm_shuff1x(xp, 3), xq);
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 0, 1, 2, 3)));
x0 = _mm_xor_ps(glmm_shuff1x(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 0, 1, 2, 3)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 1, 0, 3, 2)));
x0 = _mm_xor_ps(glmm_shuff1x(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 1, 0, 3, 2)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 2, 3, 0, 1)));
x0 = _mm_xor_ps(glmm_shuff1x(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 2, 3, 0, 1)));
_mm_store_ps(dest, r);
glmm_store(dest, r);
}

View File

@@ -9,23 +9,35 @@
#define cglm_types_h
#if defined(_MSC_VER)
# define CGLM_ALIGN(X) /* __declspec(align(X)) */
/* do not use alignment for older visual studio versions */
#if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# define CGLM_ALIGN(X) /* no alignment */
#else
# define CGLM_ALIGN(X) __declspec(align(X))
#endif
#else
# define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif
typedef float vec2[2];
typedef float vec3[3];
typedef int ivec3[3];
typedef CGLM_ALIGN(16) float vec4[4];
#ifndef CGLM_ALL_UNALIGNED
# define CGLM_ALIGN_IF(X) CGLM_ALIGN(X)
#else
# define CGLM_ALIGN_IF(X) /* no alignment */
#endif
typedef vec3 mat3[3];
typedef vec4 mat4[4];
typedef float vec2[2];
typedef CGLM_ALIGN_IF(8) float vec3[3];
typedef int ivec3[3];
typedef CGLM_ALIGN_IF(16) float vec4[4];
typedef vec4 versor;
typedef vec3 mat3[3];
typedef CGLM_ALIGN_IF(16) vec4 mat4[4];
#define CGLM_PI (float)M_PI
#define CGLM_PI_2 (float)M_PI_2
#define CGLM_PI_4 (float)M_PI_4
typedef vec4 versor;
#define CGLM_PI ((float)M_PI)
#define CGLM_PI_2 ((float)M_PI_2)
#define CGLM_PI_4 ((float)M_PI_4)
#endif /* cglm_types_h */

View File

@@ -42,7 +42,7 @@ CGLM_INLINE
void
glm_vec4_mulv(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
@@ -61,7 +61,7 @@ CGLM_INLINE
void
glm_vec4_broadcast(float val, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_set1_ps(val));
glmm_store(d, _mm_set1_ps(val));
#else
d[0] = d[1] = d[2] = d[3] = val;
#endif
@@ -223,14 +223,14 @@ glm_vec4_sign(vec4 v, vec4 dest) {
#if defined( __SSE2__ ) || defined( __SSE2__ )
__m128 x0, x1, x2, x3, x4;
x0 = _mm_load_ps(v);
x0 = glmm_load(v);
x1 = _mm_set_ps(0.0f, 0.0f, 1.0f, -1.0f);
x2 = _mm_shuffle1_ps1(x1, 2);
x2 = glmm_shuff1x(x1, 2);
x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), _mm_shuffle1_ps1(x1, 1));
x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), _mm_shuffle1_ps1(x1, 0));
x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), glmm_shuff1x(x1, 1));
x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), glmm_shuff1x(x1, 0));
_mm_store_ps(dest, _mm_or_ps(x3, x4));
glmm_store(dest, _mm_or_ps(x3, x4));
#else
dest[0] = glm_signf(v[0]);
dest[1] = glm_signf(v[1]);
@@ -249,7 +249,7 @@ CGLM_INLINE
void
glm_vec4_sqrt(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sqrt_ps(_mm_load_ps(v)));
glmm_store(dest, _mm_sqrt_ps(glmm_load(v)));
#else
dest[0] = sqrtf(v[0]);
dest[1] = sqrtf(v[1]);

View File

@@ -111,7 +111,7 @@ CGLM_INLINE
void
glm_vec4_copy(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_load_ps(v));
glmm_store(dest, glmm_load(v));
#else
dest[0] = v[0];
dest[1] = v[1];
@@ -129,7 +129,7 @@ CGLM_INLINE
void
glm_vec4_zero(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_setzero_ps());
glmm_store(v, _mm_setzero_ps());
#else
v[0] = 0.0f;
v[1] = 0.0f;
@@ -147,7 +147,7 @@ CGLM_INLINE
void
glm_vec4_one(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_set1_ps(1.0f));
glmm_store(v, _mm_set1_ps(1.0f));
#else
v[0] = 1.0f;
v[1] = 1.0f;
@@ -169,9 +169,9 @@ float
glm_vec4_dot(vec4 a, vec4 b) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1)));
x0 = _mm_mul_ps(glmm_load(a), glmm_load(b));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1)));
#else
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
#endif
@@ -193,10 +193,10 @@ float
glm_vec4_norm2(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = _mm_load_ps(v);
x0 = glmm_load(v);
x0 = _mm_mul_ps(x0, x0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1)));
x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1)));
#else
return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3];
#endif
@@ -214,8 +214,8 @@ float
glm_vec4_norm(vec4 vec) {
#if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0;
x0 = _mm_load_ps(vec);
return _mm_cvtss_f32(_mm_sqrt_ss(glm_simd_dot(x0, x0)));
x0 = glmm_load(vec);
return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0)));
#else
return sqrtf(glm_vec4_norm2(vec));
#endif
@@ -232,7 +232,7 @@ CGLM_INLINE
void
glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = a[0] + b[0];
dest[1] = a[1] + b[1];
@@ -252,7 +252,7 @@ CGLM_INLINE
void
glm_vec4_adds(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(v), _mm_set1_ps(s)));
glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v[0] + s;
dest[1] = v[1] + s;
@@ -272,7 +272,7 @@ CGLM_INLINE
void
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sub_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = a[0] - b[0];
dest[1] = a[1] - b[1];
@@ -292,7 +292,7 @@ CGLM_INLINE
void
glm_vec4_subs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sub_ps(_mm_load_ps(v), _mm_set1_ps(s)));
glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v[0] - s;
dest[1] = v[1] - s;
@@ -312,7 +312,7 @@ CGLM_INLINE
void
glm_vec4_mul(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else
d[0] = a[0] * b[0];
d[1] = a[1] * b[1];
@@ -332,7 +332,7 @@ CGLM_INLINE
void
glm_vec4_scale(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_mul_ps(_mm_load_ps(v), _mm_set1_ps(s)));
glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
#else
dest[0] = v[0] * s;
dest[1] = v[1] * s;
@@ -373,7 +373,7 @@ CGLM_INLINE
void
glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_div_ps(_mm_load_ps(a), _mm_load_ps(b)));
glmm_store(dest, _mm_div_ps(glmm_load(a), glmm_load(b)));
#else
dest[0] = a[0] / b[0];
dest[1] = a[1] / b[1];
@@ -393,7 +393,7 @@ CGLM_INLINE
void
glm_vec4_divs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_div_ps(_mm_load_ps(v), _mm_set1_ps(s)));
glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
#else
glm_vec4_scale(v, 1.0f / s, dest);
#endif
@@ -413,9 +413,9 @@ CGLM_INLINE
void
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest),
_mm_add_ps(_mm_load_ps(a),
_mm_load_ps(b))));
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_add_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] + b[0];
dest[1] += a[1] + b[1];
@@ -437,9 +437,9 @@ CGLM_INLINE
void
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest),
_mm_sub_ps(_mm_load_ps(a),
_mm_load_ps(b))));
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_sub_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] - b[0];
dest[1] += a[1] - b[1];
@@ -461,9 +461,9 @@ CGLM_INLINE
void
glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest),
_mm_mul_ps(_mm_load_ps(a),
_mm_load_ps(b))));
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(glmm_load(a),
glmm_load(b))));
#else
dest[0] += a[0] * b[0];
dest[1] += a[1] * b[1];
@@ -485,9 +485,9 @@ CGLM_INLINE
void
glm_vec4_muladds(vec4 a, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest),
_mm_mul_ps(_mm_load_ps(a),
_mm_set1_ps(s))));
glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(glmm_load(a),
_mm_set1_ps(s))));
#else
dest[0] += a[0] * s;
dest[1] += a[1] * s;
@@ -505,7 +505,7 @@ CGLM_INLINE
void
glm_vec4_flipsign(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_xor_ps(_mm_load_ps(v), _mm_set1_ps(-0.0f)));
glmm_store(v, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
#else
v[0] = -v[0];
v[1] = -v[1];
@@ -524,8 +524,7 @@ CGLM_INLINE
void
glm_vec4_flipsign_to(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_xor_ps(_mm_load_ps(v),
_mm_set1_ps(-0.0f)));
glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
#else
dest[0] = -v[0];
dest[1] = -v[1];
@@ -571,16 +570,16 @@ glm_vec4_normalize_to(vec4 vec, vec4 dest) {
__m128 xdot, x0;
float dot;
x0 = _mm_load_ps(vec);
xdot = glm_simd_dot(x0, x0);
x0 = glmm_load(vec);
xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot);
if (dot == 0.0f) {
_mm_store_ps(dest, _mm_setzero_ps());
glmm_store(dest, _mm_setzero_ps());
return;
}
_mm_store_ps(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else
float norm;
@@ -633,7 +632,7 @@ CGLM_INLINE
void
glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_max_ps(_mm_load_ps(v1), _mm_load_ps(v2)));
glmm_store(dest, _mm_max_ps(glmm_load(v1), glmm_load(v2)));
#else
dest[0] = glm_max(v1[0], v2[0]);
dest[1] = glm_max(v1[1], v2[1]);
@@ -653,7 +652,7 @@ CGLM_INLINE
void
glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_min_ps(_mm_load_ps(v1), _mm_load_ps(v2)));
glmm_store(dest, _mm_min_ps(glmm_load(v1), glmm_load(v2)));
#else
dest[0] = glm_min(v1[0], v2[0]);
dest[1] = glm_min(v1[1], v2[1]);
@@ -673,8 +672,8 @@ CGLM_INLINE
void
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_min_ps(_mm_max_ps(_mm_load_ps(v), _mm_set1_ps(minVal)),
_mm_set1_ps(maxVal)));
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
_mm_set1_ps(maxVal)));
#else
v[0] = glm_clamp(v[0], minVal, maxVal);
v[1] = glm_clamp(v[1], minVal, maxVal);

View File

@@ -10,6 +10,6 @@
#define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 4
#define CGLM_VERSION_PATCH 1
#define CGLM_VERSION_PATCH 5
#endif /* cglm_version_h */

View File

@@ -54,7 +54,8 @@ cglm_HEADERS = include/cglm/version.h \
include/cglm/plane.h \
include/cglm/frustum.h \
include/cglm/box.h \
include/cglm/color.h
include/cglm/color.h \
include/cglm/project.h
cglm_calldir=$(includedir)/cglm/call
cglm_call_HEADERS = include/cglm/call/mat4.h \
@@ -68,7 +69,8 @@ cglm_call_HEADERS = include/cglm/call/mat4.h \
include/cglm/call/euler.h \
include/cglm/call/plane.h \
include/cglm/call/frustum.h \
include/cglm/call/box.h
include/cglm/call/box.h \
include/cglm/call/project.h
cglm_simddir=$(includedir)/cglm/simd
cglm_simd_HEADERS = include/cglm/simd/intrin.h
@@ -111,7 +113,9 @@ test_tests_SOURCES=\
test/src/test_euler.c \
test/src/test_quat.c \
test/src/test_vec4.c \
test/src/test_vec3.c
test/src/test_vec3.c \
test/src/test_mat3.c \
test/src/test_affine.c
all-local:
sh ./post-build.sh

View File

@@ -8,6 +8,12 @@
#include "../include/cglm/cglm.h"
#include "../include/cglm/call.h"
CGLM_EXPORT
void
glmc_translate_make(mat4 m, vec3 v) {
glm_translate_make(m, v);
}
CGLM_EXPORT
void
glmc_translate_to(mat4 m, vec3 v, mat4 dest) {
@@ -38,6 +44,12 @@ glmc_translate_z(mat4 m, float to) {
glm_translate_z(m, to);
}
CGLM_EXPORT
void
glmc_scale_make(mat4 m, vec3 v) {
glm_scale_make(m, v);
}
CGLM_EXPORT
void
glmc_scale_to(mat4 m, vec3 v, mat4 dest) {
@@ -52,8 +64,8 @@ glmc_scale(mat4 m, vec3 v) {
CGLM_EXPORT
void
glmc_scale1(mat4 m, float s) {
glm_scale1(m, s);
glmc_scale_uni(mat4 m, float s) {
glm_scale_uni(m, s);
}
CGLM_EXPORT
@@ -74,36 +86,42 @@ glmc_rotate_z(mat4 m, float rad, mat4 dest) {
glm_rotate_z(m, rad, dest);
}
CGLM_EXPORT
void
glmc_rotate_ndc_make(mat4 m, float angle, vec3 axis_ndc) {
glm_rotate_ndc_make(m, angle, axis_ndc);
}
CGLM_EXPORT
void
glmc_rotate_make(mat4 m, float angle, vec3 axis) {
glm_rotate_make(m, angle, axis);
}
CGLM_EXPORT
void
glmc_rotate_ndc(mat4 m, float angle, vec3 axis_ndc) {
glm_rotate_ndc(m, angle, axis_ndc);
}
CGLM_EXPORT
void
glmc_rotate(mat4 m, float angle, vec3 axis) {
glm_rotate(m, angle, axis);
}
CGLM_EXPORT
void
glmc_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) {
glm_rotate_at(m, pivot, angle, axis);
}
CGLM_EXPORT
void
glmc_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis) {
glm_rotate_atm(m, pivot, angle, axis);
}
CGLM_EXPORT
void
glmc_decompose_scalev(mat4 m, vec3 s) {
glm_decompose_scalev(m, s);
}
CGLM_EXPORT
bool
glmc_uniscaled(mat4 m) {
return glm_uniscaled(m);
}
CGLM_EXPORT
void
glmc_decompose_rs(mat4 m, mat4 r, vec3 s) {

View File

@@ -194,3 +194,15 @@ void
glmc_quat_rotate(mat4 m, versor q, mat4 dest) {
glm_quat_rotate(m, q, dest);
}
CGLM_EXPORT
void
glmc_quat_rotate_at(mat4 model, versor q, vec3 pivot) {
glm_quat_rotate_at(model, q, pivot);
}
CGLM_EXPORT
void
glmc_quat_rotate_atm(mat4 m, versor q, vec3 pivot) {
glm_quat_rotate_atm(m, q, pivot);
}

113
test/src/test_affine.c Normal file
View File

@@ -0,0 +1,113 @@
/*
* Copyright (c), Recep Aslantas.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#include "test_common.h"
void
test_affine(void **state) {
mat4 t1, t2, t3, t4, t5;
/* test translate is postmultiplied */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t1, t2, t3); /* R * T */
glm_translate(t1, (vec3){34, 57, 36});
test_assert_mat4_eq(t1, t3);
/* test rotate is postmultiplied */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glm_rotate(t2, M_PI_4, GLM_YUP);
test_assert_mat4_eq(t2, t3);
/* test scale is postmultiplied */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){34, 57, 36});
glm_scale_make(t4, (vec3){3, 5, 6});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glmc_mat4_mul(t3, t4, t5); /* T * R * S */
glm_scale(t3, (vec3){3, 5, 6});
test_assert_mat4_eq(t3, t5);
/* test translate_x */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){34, 0, 0});
glmc_mat4_mul(t1, t2, t3); /* R * T */
glm_translate_x(t1, 34);
test_assert_mat4_eq(t1, t3);
/* test translate_y */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){0, 57, 0});
glmc_mat4_mul(t1, t2, t3); /* R * T */
glm_translate_y(t1, 57);
test_assert_mat4_eq(t1, t3);
/* test translate_z */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){0, 0, 36});
glmc_mat4_mul(t1, t2, t3); /* R * T */
glm_translate_z(t1, 36);
test_assert_mat4_eq(t1, t3);
/* test rotate_x */
glmc_rotate_make(t1, M_PI_4, (vec3){1, 0, 0});
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glm_rotate_x(t2, M_PI_4, t2);
test_assert_mat4_eq(t2, t3);
/* test rotate_y */
glmc_rotate_make(t1, M_PI_4, (vec3){0, 1, 0});
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glm_rotate_y(t2, M_PI_4, t2);
test_assert_mat4_eq(t2, t3);
/* test rotate_z */
glmc_rotate_make(t1, M_PI_4, (vec3){0, 0, 1});
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glm_rotate_z(t2, M_PI_4, t2);
test_assert_mat4_eq(t2, t3);
/* test rotate */
glmc_rotate_make(t1, M_PI_4, (vec3){0, 0, 1});
glm_translate_make(t2, (vec3){34, 57, 36});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glmc_rotate(t2, M_PI_4, (vec3){0, 0, 1});
test_assert_mat4_eq(t3, t2);
/* test scale_uni */
glmc_rotate_make(t1, M_PI_4, GLM_YUP);
glm_translate_make(t2, (vec3){34, 57, 36});
glm_scale_make(t4, (vec3){3, 3, 3});
glmc_mat4_mul(t2, t1, t3); /* T * R */
glmc_mat4_mul(t3, t4, t5); /* T * R * S */
glm_scale_uni(t3, 3);
test_assert_mat4_eq(t3, t5);
}

View File

@@ -27,6 +27,17 @@ test_rand_mat4(mat4 dest) {
/* glm_scale(dest, (vec3){drand48(), drand48(), drand48()}); */
}
void
test_rand_mat3(mat3 dest) {
mat4 m4;
srand((unsigned int)time(NULL));
/* random rotatation around random axis with random angle */
glm_rotate_make(m4, drand48(), (vec3){drand48(), drand48(), drand48()});
glm_mat4_pick3(m4, dest);
}
void
test_rand_vec3(vec3 dest) {
srand((unsigned int)time(NULL));
@@ -84,6 +95,18 @@ test_assert_mat4_eq2(mat4 m1, mat4 m2, float eps) {
}
}
void
test_assert_mat3_eq(mat3 m1, mat3 m2) {
int i, j, k;
for (i = 0; i < 3; i++) {
for (j = 0; j < 3; j++) {
for (k = 0; k < 3; k++)
assert_true(fabsf(m1[i][j] - m2[i][j]) <= 0.0000009);
}
}
}
void
test_assert_eqf(float a, float b) {
assert_true(fabsf(a - b) <= 0.000009); /* rounding errors */

View File

@@ -25,6 +25,9 @@
void
test_rand_mat4(mat4 dest);
void
test_rand_mat3(mat3 dest);
void
test_assert_eqf(float a, float b);
@@ -34,6 +37,9 @@ test_assert_mat4_eq(mat4 m1, mat4 m2);
void
test_assert_mat4_eq2(mat4 m1, mat4 m2, float eps);
void
test_assert_mat3_eq(mat3 m1, mat3 m2);
void
test_assert_vec3_eq(vec3 v1, vec3 v2);

View File

@@ -12,6 +12,9 @@ main(int argc, const char * argv[]) {
/* mat4 */
cmocka_unit_test(test_mat4),
/* mat3 */
cmocka_unit_test(test_mat3),
/* camera */
cmocka_unit_test(test_camera_lookat),
cmocka_unit_test(test_camera_decomp),
@@ -32,7 +35,10 @@ main(int argc, const char * argv[]) {
cmocka_unit_test(test_vec4),
/* vec3 */
cmocka_unit_test(test_vec3)
cmocka_unit_test(test_vec3),
/* affine */
cmocka_unit_test(test_affine)
};
return cmocka_run_group_tests(tests, NULL, NULL);

58
test/src/test_mat3.c Normal file
View File

@@ -0,0 +1,58 @@
/*
* Copyright (c), Recep Aslantas.
*
* MIT License (MIT), http://opensource.org/licenses/MIT
* Full license can be found in the LICENSE file
*/
#include "test_common.h"
#define m 3
#define n 3
void
test_mat3(void **state) {
mat3 m1 = GLM_MAT3_IDENTITY_INIT;
mat3 m2 = GLM_MAT3_IDENTITY_INIT;
mat3 m3;
mat3 m4 = GLM_MAT3_ZERO_INIT;
mat3 m5;
int i, j, k;
/* test identity matrix multiplication */
glmc_mat3_mul(m1, m2, m3);
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
if (i == j)
assert_true(m3[i][j] == 1.0f);
else
assert_true(m3[i][j] == 0.0f);
}
}
/* test random matrices */
/* random matrices */
test_rand_mat3(m1);
test_rand_mat3(m2);
glmc_mat3_mul(m1, m2, m3);
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) {
for (k = 0; k < m; k++)
/* column-major */
m4[i][j] += m1[k][j] * m2[i][k];
}
}
test_assert_mat3_eq(m3, m4);
for (i = 0; i < 100000; i++) {
test_rand_mat3(m3);
test_rand_mat3(m4);
/* test inverse precise */
glmc_mat3_inv(m3, m4);
glmc_mat3_inv(m4, m5);
test_assert_mat3_eq(m3, m5);
}
}

View File

@@ -9,6 +9,9 @@
/* mat4 */
void test_mat4(void **state);
/* mat3 */
void test_mat3(void **state);
/* camera */
void
test_camera_lookat(void **state);
@@ -34,4 +37,7 @@ test_vec4(void **state);
void
test_vec3(void **state);
void
test_affine(void **state);
#endif /* test_tests_h */