Compare commits

...

39 Commits

Author SHA1 Message Date
Recep Aslantas
059bdfdd4b update docs 2018-05-27 11:54:05 +03:00
Recep Aslantas
ef0653640f update cocoapod version tag 2018-05-27 11:53:48 +03:00
Recep Aslantas
e5d61b3433 update mat4_mulv3 api to include translation 2018-05-27 11:46:27 +03:00
Recep Aslantas
73c073cf32 add missing call functions 2018-05-27 11:44:06 +03:00
Recep Aslantas
1362bef50f fix glm_translate_to 2018-05-23 23:13:41 +03:00
Recep Aslantas
7d783eeace align local variables on stack 2018-05-23 23:04:06 +03:00
Recep Aslantas
e12e79b1a5 improve scale_make 2018-05-23 22:11:44 +03:00
Recep Aslantas
6cd3d52dc5 improve translate_make 2018-05-23 22:08:12 +03:00
Recep Aslantas
fb2cac9816 aabb: center of AABB helper
* it is just wrapper of vec_center but it saves to access min and max values of AABB
2018-05-22 17:45:37 +03:00
Recep Aslantas
4e63325f55 aabb: add missing call versions 2018-05-22 17:44:36 +03:00
Recep Aslantas
96c3e604ff now working on v0.4.6 2018-05-22 17:43:46 +03:00
Recep Aslantas
077e304fc5 Merge pull request #42 from recp/optimizations
simd: optional shuffle configuration to save move instructions
2018-05-10 16:47:00 +03:00
Recep Aslantas
599524dacf docs: add new option to docs 2018-05-10 16:42:13 +03:00
Recep Aslantas
da5ad69863 simd: rename _mm_ extensions to glmm_ 2018-05-10 14:27:53 +03:00
Recep Aslantas
9fc2ead8ef Merge branch 'master' into optimizations 2018-05-10 13:59:10 +03:00
Recep Aslantas
48d33c16cb Merge pull request #53 from recp/simd
simd: Make alignment OPTIONAL
2018-05-10 13:57:31 +03:00
Recep Aslantas
464bd917d0 update readme 2018-05-10 12:21:33 +03:00
Recep Aslantas
c6d07bb6eb surround PI with parentheses + code style + update docs 2018-05-10 12:18:54 +03:00
Recep Aslantas
94b286f1f9 docs: add new alignment option to docs 2018-05-09 16:43:42 +03:00
Recep Aslantas
f774925e8a win, simd: make sure that CGLM_ALL_UNALIGNED is defined for older visual studios 2018-05-09 15:30:54 +03:00
Recep Aslantas
0e49e95161 win: update visual studio version for align requirement 2018-05-08 18:29:02 +03:00
Recep Aslantas
b277357800 update gitignore 2018-05-08 18:28:31 +03:00
Recep Aslantas
835cec2ccb drop alignment requirement if CGLM_ALL_UNALIGNED defined
* bring alignment back for visual studio 2017
2018-05-08 16:26:33 +03:00
Recep Aslantas
5dbbd0826d simd: replace glm_simd_ with glmm_
* now glmm_ is used as global simd namescape
2018-05-08 15:55:36 +03:00
Recep Aslantas
56f0bb0928 simd, avx: make alignment optional for load/store operations 2018-05-08 15:35:17 +03:00
Recep Aslantas
568001d26a simd, sse2: make alignment optional for store operations 2018-05-08 15:31:09 +03:00
Recep Aslantas
252bf925fc simd, sse2: make alignment optional for load operations 2018-05-08 15:25:23 +03:00
Recep Aslantas
0f339c5c03 fix header dependencies 2018-05-07 21:12:29 +03:00
Recep Aslantas
a9d56f2dae docs: fix typos 2018-05-04 00:50:56 +03:00
Recep Aslantas
dd60496ffc Merge pull request #49 from Yatima1460/master
replace _WIN32 with _MSC_VER
2018-04-30 19:08:59 +03:00
Federico Santamorena
7c0e9e99c6 _WIN32 to _MSC_VER 2018-04-30 17:17:06 +02:00
Federico Santamorena
064209c917 replaced _WIN32 with _MSC_VER 2018-04-30 17:13:16 +02:00
Recep Aslantas
94d6036c38 suppress warnings for Mingw 2018-04-30 11:09:42 +03:00
Recep Aslantas
6c01eff056 now working on v0.4.5 2018-04-30 10:59:40 +03:00
Recep Aslantas
ada69a7c43 fix cocoapods validation errors 2018-04-22 10:14:17 +03:00
Recep Aslantas
cef97fca3e add cocoapods spec 2018-04-22 01:03:17 +03:00
Recep Aslantas
498a33fac5 fix public header's includes 2018-04-21 22:36:25 +03:00
Recep Aslantas
3c7a729729 build: remove making symbolic link for libtoolize 2018-04-20 15:19:06 +03:00
Recep Aslantas
cfd3600107 simd: optional shuffle configuration to save move instructions 2018-04-04 22:42:21 +03:00
48 changed files with 912 additions and 453 deletions

8
.gitignore vendored
View File

@@ -61,3 +61,11 @@ docs/build/*
win/cglm_test_* win/cglm_test_*
* copy.* * copy.*
*.o *.o
*.obj
*codeanalysis.*.xml
*codeanalysis.xml
*.lib
*.tlog
win/x64
win/x85
win/Debug

View File

@@ -22,6 +22,8 @@ Complete documentation: http://cglm.readthedocs.io
- **[bugfix]** euler angles was implemented in reverse order (extrinsic) it was fixed, now they are intrinsic. Make sure that - **[bugfix]** euler angles was implemented in reverse order (extrinsic) it was fixed, now they are intrinsic. Make sure that
you have the latest version you have the latest version
- **[major change]** by starting v0.4.0, quaternions are stored as [x, y, z, w], it was [w, x, y, z] in v0.3.5 and earlier versions - **[major change]** by starting v0.4.0, quaternions are stored as [x, y, z, w], it was [w, x, y, z] in v0.3.5 and earlier versions
- **[api rename]** by starting v0.4.5, **glm_simd** functions are renamed to **glmm_**
- **[new option]** by starting v0.4.5, you can disable alignment requirement, check options in docs.
#### Note for C++ developers: #### Note for C++ developers:
If you don't aware about original GLM library yet, you may also want to look at: If you don't aware about original GLM library yet, you may also want to look at:

View File

@@ -9,19 +9,8 @@
# check if deps are pulled # check if deps are pulled
git submodule update --init --recursive git submodule update --init --recursive
# fix glibtoolize
cd $(dirname "$0") cd $(dirname "$0")
if [ "$(uname)" = "Darwin" ]; then
libtoolBin=$(which glibtoolize)
libtoolBinDir=$(dirname "${libtoolBin}")
if [ ! -f "${libtoolBinDir}/libtoolize" ]; then
ln -s $libtoolBin "${libtoolBinDir}/libtoolize"
fi
fi
# general deps: gcc make autoconf automake libtool cmake # general deps: gcc make autoconf automake libtool cmake
# test - cmocka # test - cmocka

28
cglm.podspec Normal file
View File

@@ -0,0 +1,28 @@
Pod::Spec.new do |s|
# Description
s.name = "cglm"
s.version = "0.4.6"
s.summary = "📽 Optimized OpenGL/Graphics Math (glm) for C"
s.description = <<-DESC
cglm is math library for graphics programming for C. It is similar to original glm but it is written for C instead of C++ (you can use here too). See the documentation or README for all features.
DESC
s.documentation_url = "http://cglm.readthedocs.io"
# Home
s.homepage = "https://github.com/recp/cglm"
s.license = { :type => "MIT", :file => "LICENSE" }
s.author = { "Recep Aslantas" => "recp@acm.org" }
# Sources
s.source = { :git => "https://github.com/recp/cglm.git", :tag => "v#{s.version}" }
s.source_files = "src", "include/cglm/**/*.h"
s.public_header_files = "include", "include/cglm/**/*.h"
s.exclude_files = "src/win/*", "src/dllmain.c", "src/**/*.h"
s.preserve_paths = "include", "src"
s.header_mappings_dir = "include"
# Linking
s.library = "m"
end

View File

@@ -7,7 +7,7 @@
#***************************************************************************** #*****************************************************************************
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([cglm], [0.4.3], [info@recp.me]) AC_INIT([cglm], [0.4.6], [info@recp.me])
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects]) AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects])
AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_MACRO_DIR([m4])

View File

@@ -28,6 +28,7 @@ Functions:
#. :c:func:`glm_aabb_isvalid` #. :c:func:`glm_aabb_isvalid`
#. :c:func:`glm_aabb_size` #. :c:func:`glm_aabb_size`
#. :c:func:`glm_aabb_radius` #. :c:func:`glm_aabb_radius`
#. :c:func:`glm_aabb_center`
Functions documentation Functions documentation
~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~
@@ -131,3 +132,11 @@ Functions documentation
Parameters: Parameters:
| *[in]* **box** bounding box | *[in]* **box** bounding box
.. c:function:: void glm_aabb_center(vec3 box[2], vec3 dest)
| computes center point of AABB
Parameters:
| *[in]* **box** bounding box
| *[out]* **box** center of bounding box

View File

@@ -62,9 +62,9 @@ author = u'Recep Aslantas'
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = u'0.4.3' version = u'0.4.6'
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = u'0.4.3' release = u'0.4.6'
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.

View File

@@ -21,17 +21,24 @@ Types:
As you can see types don't store extra informations in favor of space. As you can see types don't store extra informations in favor of space.
You can send these values e.g. matrix to OpenGL directly without casting or calling a function like *value_ptr* You can send these values e.g. matrix to OpenGL directly without casting or calling a function like *value_ptr*
Aligment is Required: Alignment is Required:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**vec4** and **mat4** requires 16 byte aligment because vec4 and mat4 operations are **vec4** and **mat4** requires 16 byte alignment because vec4 and mat4 operations are
vectorized by SIMD instructions (SSE/AVX). vectorized by SIMD instructions (SSE/AVX).
**UPDATE:**
By starting v0.4.5 cglm provides an option to disable alignment requirement, it is enabled as default
| Check :doc:`opt` page for more details
Also alignment is disabled for older msvc verisons as default. Now alignment is only required in Visual Studio 2017 version 15.6+ if CGLM_ALL_UNALIGNED macro is not defined.
Allocations: Allocations:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*cglm* doesn't alloc any memory on heap. So it doesn't provide any allocator. *cglm* doesn't alloc any memory on heap. So it doesn't provide any allocator.
You must allocate memory yourself. You should alloc memory for out parameters too if you pass pointer of memory location. You must allocate memory yourself. You should alloc memory for out parameters too if you pass pointer of memory location.
When allocating memory don't forget that **vec4** and **mat4** requires aligment. When allocating memory don't forget that **vec4** and **mat4** requires alignment.
**NOTE:** Unaligned vec4 and unaligned mat4 operations will be supported in the future. Check todo list. **NOTE:** Unaligned vec4 and unaligned mat4 operations will be supported in the future. Check todo list.
Because you may want to multiply a CGLM matrix with external matrix. Because you may want to multiply a CGLM matrix with external matrix.

View File

@@ -40,6 +40,7 @@ Also currently only **float** type is supported for most operations.
getting_started getting_started
opengl opengl
api api
opt
troubleshooting troubleshooting
Indices and tables Indices and tables

View File

@@ -24,6 +24,7 @@ Functions:
#. :c:func:`glm_mat3_transpose_to` #. :c:func:`glm_mat3_transpose_to`
#. :c:func:`glm_mat3_transpose` #. :c:func:`glm_mat3_transpose`
#. :c:func:`glm_mat3_mulv` #. :c:func:`glm_mat3_mulv`
#. :c:func:`glm_mat3_quat`
#. :c:func:`glm_mat3_scale` #. :c:func:`glm_mat3_scale`
#. :c:func:`glm_mat3_det` #. :c:func:`glm_mat3_det`
#. :c:func:`glm_mat3_inv` #. :c:func:`glm_mat3_inv`
@@ -89,6 +90,14 @@ Functions documentation
| *[in]* **v** vec3 (right, column vector) | *[in]* **v** vec3 (right, column vector)
| *[out]* **dest** destination (result, column vector) | *[out]* **dest** destination (result, column vector)
.. c:function:: void glm_mat3_quat(mat3 m, versor dest)
convert mat3 to quaternion
Parameters:
| *[in]* **m** rotation matrix
| *[out]* **dest** destination quaternion
.. c:function:: void glm_mat3_scale(mat3 m, float s) .. c:function:: void glm_mat3_scale(mat3 m, float s)
multiply matrix with scalar multiply matrix with scalar

View File

@@ -32,6 +32,7 @@ Functions:
#. :c:func:`glm_mat4_mulN` #. :c:func:`glm_mat4_mulN`
#. :c:func:`glm_mat4_mulv` #. :c:func:`glm_mat4_mulv`
#. :c:func:`glm_mat4_mulv3` #. :c:func:`glm_mat4_mulv3`
#. :c:func:`glm_mat4_quat`
#. :c:func:`glm_mat4_transpose_to` #. :c:func:`glm_mat4_transpose_to`
#. :c:func:`glm_mat4_transpose` #. :c:func:`glm_mat4_transpose`
#. :c:func:`glm_mat4_scale_p` #. :c:func:`glm_mat4_scale_p`
@@ -146,6 +147,14 @@ Functions documentation
| *[in]* **v** vec3 (right, column vector) | *[in]* **v** vec3 (right, column vector)
| *[out]* **dest** vec3 (result, column vector) | *[out]* **dest** vec3 (result, column vector)
.. c:function:: void glm_mat4_quat(mat4 m, versor dest)
convert mat4's rotation part to quaternion
Parameters:
| *[in]* **m** affine matrix
| *[out]* **dest** destination quaternion
.. c:function:: void glm_mat4_transpose_to(mat4 m, mat4 dest) .. c:function:: void glm_mat4_transpose_to(mat4 m, mat4 dest)
transpose mat4 and store in dest transpose mat4 and store in dest

42
docs/source/opt.rst Normal file
View File

@@ -0,0 +1,42 @@
.. default-domain:: C
Options
===============================================================================
A few options are provided via macros.
Alignment Option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
As default, cglm requires types to be aligned. Alignment requirements:
vec3: 8 byte
vec4: 16 byte
mat4: 16 byte
versor: 16 byte
By starting **v0.4.5** cglm provides an option to disable alignment requirement.
To enable this option define **CGLM_ALL_UNALIGNED** macro before all headers.
You can define it in Xcode, Visual Studio (or other IDEs) or you can also prefer
to define it in build system. If you use pre-compiled verisons then you
have to compile cglm with **CGLM_ALL_UNALIGNED** macro.
**VERY VERY IMPORTANT:** If you use cglm in multiple projects and
those projects are depends on each other, then
| *ALWAYS* or *NEVER USE* **CGLM_ALL_UNALIGNED** macro in linked projects
if you do not know what you are doing. Because a cglm header included
via 'project A' may force types to be aligned and another cglm header
included via 'project B' may not require alignment. In this case
cglm functions will read from and write to **INVALID MEMORY LOCATIONs**.
ALWAYS USE SAME CONFIGURATION / OPTION for **cglm** if you have multiple projects.
For instance if you set CGLM_ALL_UNALIGNED in a project then set it in other projects too
SSE and SSE2 Shuffle Option
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**_mm_shuffle_ps** generates **shufps** instruction even if registers are same.
You can force it to generate **pshufd** instruction by defining
**CGLM_USE_INT_DOMAIN** macro. As default it is not defined.

View File

@@ -15,10 +15,10 @@ makes calculations then copy the result to dest.
You are responsible for allocation of **src** and **dest** parameters. You are responsible for allocation of **src** and **dest** parameters.
Aligment: Alignment:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
**vec4** and **mat4** types requires 16 byte aligment aligment. **vec4** and **mat4** types requires 16 byte alignment.
These types are marked with align attribute to let compiler know about this These types are marked with align attribute to let compiler know about this
requirement. requirement.
@@ -26,7 +26,7 @@ But since MSVC (Windows) throws the error:
**"formal parameter with requested alignment of 16 won't be aligned"** **"formal parameter with requested alignment of 16 won't be aligned"**
The aligment attribute has been commented for MSVC The alignment attribute has been commented for MSVC
.. code-block:: c .. code-block:: c
@@ -36,13 +36,16 @@ The aligment attribute has been commented for MSVC
# define CGLM_ALIGN(X) __attribute((aligned(X))) # define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif. #endif.
So MSVC may not know about aligment requirements when creating variables. So MSVC may not know about alignment requirements when creating variables.
The interesting thing is that, if I remember correctly Visual Studio 2017 The interesting thing is that, if I remember correctly Visual Studio 2017
doesn't throw the above error. So we may uncomment that line for Visual Studio 2017, doesn't throw the above error. So we may uncomment that line for Visual Studio 2017,
you may do it yourself. you may do it yourself.
**This MSVC issue is still in TODOs.** **This MSVC issue is still in TODOs.**
**UPDATE:** By starting v0.4.5 cglm provides an option to disable alignment requirement.
Also alignment is disabled for older msvc verisons as default. Now alignment is only required in Visual Studio 2017 version 15.6+ if CGLM_ALL_UNALIGNED macro is not defined.
Crashes, Invalid Memory Access: Crashes, Invalid Memory Access:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@@ -16,6 +16,7 @@
#include "common.h" #include "common.h"
#include "mat4.h" #include "mat4.h"
#include "mat3.h"
#ifdef CGLM_SSE_FP #ifdef CGLM_SSE_FP
# include "simd/sse2/affine.h" # include "simd/sse2/affine.h"

View File

@@ -34,57 +34,16 @@
#define cglm_affine_h #define cglm_affine_h
#include "common.h" #include "common.h"
#include "vec4.h"
#include "affine-mat.h"
#include "util.h" #include "util.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h" #include "mat4.h"
#include "affine-mat.h"
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest); glm_mat4_mul(mat4 m1, mat4 m2, mat4 dest);
/*!
* @brief translate existing transform matrix by v vector
* and store result in dest
*
* @param[in] m affine transfrom
* @param[in] v translate vector [x, y, z]
* @param[out] dest translated matrix
*/
CGLM_INLINE
void
glm_translate_to(mat4 m, vec3 v, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT;
#if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[0]),
_mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(t[1]),
_mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(t[2]),
_mm_set1_ps(v[2])),
_mm_load_ps(t[3]))))
;
_mm_store_ps(dest[0], _mm_load_ps(m[0]));
_mm_store_ps(dest[1], _mm_load_ps(m[1]));
_mm_store_ps(dest[2], _mm_load_ps(m[2]));
#else
vec4 v1, v2, v3;
glm_vec4_scale(t[0], v[0], v1);
glm_vec4_scale(t[1], v[1], v2);
glm_vec4_scale(t[2], v[2], v3);
glm_vec4_add(v1, t[3], t[3]);
glm_vec4_add(v2, t[3], t[3]);
glm_vec4_add(v3, t[3], t[3]);
glm__memcpy(float, dest, t, sizeof(mat4));
#endif
}
/*! /*!
* @brief translate existing transform matrix by v vector * @brief translate existing transform matrix by v vector
* and stores result in same matrix * and stores result in same matrix
@@ -96,14 +55,14 @@ CGLM_INLINE
void void
glm_translate(mat4 m, vec3 v) { glm_translate(mat4 m, vec3 v) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3], glmm_store(m[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(v[0])), _mm_set1_ps(v[0])),
_mm_mul_ps(_mm_load_ps(m[1]), _mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(v[1]))), _mm_set1_ps(v[1]))),
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]), _mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(v[2])), _mm_set1_ps(v[2])),
_mm_load_ps(m[3])))) glmm_load(m[3]))))
; ;
#else #else
vec4 v1, v2, v3; vec4 v1, v2, v3;
@@ -118,6 +77,23 @@ glm_translate(mat4 m, vec3 v) {
#endif #endif
} }
/*!
* @brief translate existing transform matrix by v vector
* and store result in dest
*
* source matrix will remain same
*
* @param[in] m affine transfrom
* @param[in] v translate vector [x, y, z]
* @param[out] dest translated matrix
*/
CGLM_INLINE
void
glm_translate_to(mat4 m, vec3 v, mat4 dest) {
glm_mat4_copy(m, dest);
glm_translate(dest, v);
}
/*! /*!
* @brief translate existing transform matrix by x factor * @brief translate existing transform matrix by x factor
* *
@@ -128,10 +104,10 @@ CGLM_INLINE
void void
glm_translate_x(mat4 m, float x) { glm_translate_x(mat4 m, float x) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3], glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]), _mm_add_ps(_mm_mul_ps(glmm_load(m[0]),
_mm_set1_ps(x)), _mm_set1_ps(x)),
_mm_load_ps(m[3]))) glmm_load(m[3])))
; ;
#else #else
vec4 v1; vec4 v1;
@@ -150,10 +126,10 @@ CGLM_INLINE
void void
glm_translate_y(mat4 m, float y) { glm_translate_y(mat4 m, float y) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3], glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[1]), _mm_add_ps(_mm_mul_ps(glmm_load(m[1]),
_mm_set1_ps(y)), _mm_set1_ps(y)),
_mm_load_ps(m[3]))) glmm_load(m[3])))
; ;
#else #else
vec4 v1; vec4 v1;
@@ -172,10 +148,10 @@ CGLM_INLINE
void void
glm_translate_z(mat4 m, float z) { glm_translate_z(mat4 m, float z) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(m[3], glmm_store(m[3],
_mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]), _mm_add_ps(_mm_mul_ps(glmm_load(m[2]),
_mm_set1_ps(z)), _mm_set1_ps(z)),
_mm_load_ps(m[3]))) glmm_load(m[3])))
; ;
#else #else
vec4 v1; vec4 v1;
@@ -193,8 +169,8 @@ glm_translate_z(mat4 m, float z) {
CGLM_INLINE CGLM_INLINE
void void
glm_translate_make(mat4 m, vec3 v) { glm_translate_make(mat4 m, vec3 v) {
mat4 t = GLM_MAT4_IDENTITY_INIT; glm_mat4_identity(m);
glm_translate_to(t, v, m); glm_vec_copy(v, m[3]);
} }
/*! /*!
@@ -224,8 +200,10 @@ glm_scale_to(mat4 m, vec3 v, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_scale_make(mat4 m, vec3 v) { glm_scale_make(mat4 m, vec3 v) {
mat4 t = GLM_MAT4_IDENTITY_INIT; glm_mat4_identity(m);
glm_scale_to(t, v, m); m[0][0] = v[0];
m[1][1] = v[1];
m[2][2] = v[2];
} }
/*! /*!
@@ -251,7 +229,7 @@ glm_scale(mat4 m, vec3 v) {
CGLM_INLINE CGLM_INLINE
void void
glm_scale_uni(mat4 m, float s) { glm_scale_uni(mat4 m, float s) {
vec3 v = { s, s, s }; CGLM_ALIGN(8) vec3 v = { s, s, s };
glm_scale_to(m, v, m); glm_scale_to(m, v, m);
} }
@@ -266,7 +244,7 @@ glm_scale_uni(mat4 m, float s) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_x(mat4 m, float angle, mat4 dest) { glm_rotate_x(mat4 m, float angle, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT; CGLM_ALIGN(16) mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s; float c, s;
c = cosf(angle); c = cosf(angle);
@@ -291,7 +269,7 @@ glm_rotate_x(mat4 m, float angle, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_y(mat4 m, float angle, mat4 dest) { glm_rotate_y(mat4 m, float angle, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT; CGLM_ALIGN(16) mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s; float c, s;
c = cosf(angle); c = cosf(angle);
@@ -316,7 +294,7 @@ glm_rotate_y(mat4 m, float angle, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_z(mat4 m, float angle, mat4 dest) { glm_rotate_z(mat4 m, float angle, mat4 dest) {
mat4 t = GLM_MAT4_IDENTITY_INIT; CGLM_ALIGN(16) mat4 t = GLM_MAT4_IDENTITY_INIT;
float c, s; float c, s;
c = cosf(angle); c = cosf(angle);
@@ -342,7 +320,7 @@ glm_rotate_z(mat4 m, float angle, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_make(mat4 m, float angle, vec3 axis) { glm_rotate_make(mat4 m, float angle, vec3 axis) {
vec3 axisn, v, vs; CGLM_ALIGN(8) vec3 axisn, v, vs;
float c; float c;
c = cosf(angle); c = cosf(angle);
@@ -373,7 +351,7 @@ glm_rotate_make(mat4 m, float angle, vec3 axis) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate(mat4 m, float angle, vec3 axis) { glm_rotate(mat4 m, float angle, vec3 axis) {
mat4 rot; CGLM_ALIGN(16) mat4 rot;
glm_rotate_make(rot, angle, axis); glm_rotate_make(rot, angle, axis);
glm_mul_rot(m, rot, m); glm_mul_rot(m, rot, m);
} }
@@ -390,7 +368,7 @@ glm_rotate(mat4 m, float angle, vec3 axis) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) { glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv; CGLM_ALIGN(8) vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv); glm_vec_inv_to(pivot, pivotInv);
@@ -415,12 +393,11 @@ glm_rotate_at(mat4 m, vec3 pivot, float angle, vec3 axis) {
CGLM_INLINE CGLM_INLINE
void void
glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis) { glm_rotate_atm(mat4 m, vec3 pivot, float angle, vec3 axis) {
vec3 pivotInv; CGLM_ALIGN(8) vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv); glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m); glm_translate_make(m, pivot);
glm_vec_copy(pivot, m[3]);
glm_rotate(m, angle, axis); glm_rotate(m, angle, axis);
glm_translate(m, pivotInv); glm_translate(m, pivotInv);
} }
@@ -450,9 +427,8 @@ glm_decompose_scalev(mat4 m, vec3 s) {
CGLM_INLINE CGLM_INLINE
bool bool
glm_uniscaled(mat4 m) { glm_uniscaled(mat4 m) {
vec3 s; CGLM_ALIGN(8) vec3 s;
glm_decompose_scalev(m, s); glm_decompose_scalev(m, s);
return glm_vec_eq_all(s); return glm_vec_eq_all(s);
} }
@@ -467,8 +443,8 @@ glm_uniscaled(mat4 m) {
CGLM_INLINE CGLM_INLINE
void void
glm_decompose_rs(mat4 m, mat4 r, vec3 s) { glm_decompose_rs(mat4 m, mat4 r, vec3 s) {
vec4 t = {0.0f, 0.0f, 0.0f, 1.0f}; CGLM_ALIGN(16) vec4 t = {0.0f, 0.0f, 0.0f, 1.0f};
vec3 v; CGLM_ALIGN(8) vec3 v;
glm_vec4_copy(m[0], r[0]); glm_vec4_copy(m[0], r[0]);
glm_vec4_copy(m[1], r[1]); glm_vec4_copy(m[1], r[1]);

View File

@@ -11,6 +11,7 @@
#include "common.h" #include "common.h"
#include "vec3.h" #include "vec3.h"
#include "vec4.h" #include "vec4.h"
#include "util.h"
/*! /*!
* @brief apply transform to Axis-Aligned Bounding Box * @brief apply transform to Axis-Aligned Bounding Box
@@ -199,4 +200,16 @@ glm_aabb_radius(vec3 box[2]) {
return glm_aabb_size(box) * 0.5f; return glm_aabb_size(box) * 0.5f;
} }
/*!
* @brief computes center point of AABB
*
* @param[in] box bounding box
* @param[out] dest center of bounding box
*/
CGLM_INLINE
void
glm_aabb_center(vec3 box[2], vec3 dest) {
glm_vec_center(box[0], box[1], dest);
}
#endif /* cglm_box_h */ #endif /* cglm_box_h */

View File

@@ -97,6 +97,20 @@ CGLM_EXPORT
void void
glmc_decompose(mat4 m, vec4 t, mat4 r, vec3 s); glmc_decompose(mat4 m, vec4 t, mat4 r, vec3 s);
/* affine-mat */
CGLM_EXPORT
void
glmc_mul(mat4 m1, mat4 m2, mat4 dest);
CGLM_EXPORT
void
glmc_mul_rot(mat4 m1, mat4 m2, mat4 dest);
CGLM_EXPORT
void
glmc_inv_tr(mat4 mat);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -32,6 +32,30 @@ glmc_aabb_crop_until(vec3 box[2],
vec3 clampBox[2], vec3 clampBox[2],
vec3 dest[2]); vec3 dest[2]);
CGLM_EXPORT
bool
glmc_aabb_frustum(vec3 box[2], vec4 planes[6]);
CGLM_EXPORT
void
glmc_aabb_invalidate(vec3 box[2]);
CGLM_EXPORT
bool
glmc_aabb_isvalid(vec3 box[2]);
CGLM_EXPORT
float
glmc_aabb_size(vec3 box[2]);
CGLM_EXPORT
float
glmc_aabb_radius(vec3 box[2]);
CGLM_EXPORT
void
glmc_aabb_center(vec3 box[2], vec3 dest);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -33,6 +33,26 @@ glmc_ortho(float left,
float farVal, float farVal,
mat4 dest); mat4 dest);
CGLM_EXPORT
void
glmc_ortho_aabb(vec3 box[2], mat4 dest);
CGLM_EXPORT
void
glmc_ortho_aabb_p(vec3 box[2], float padding, mat4 dest);
CGLM_EXPORT
void
glmc_ortho_aabb_pz(vec3 box[2], float padding, mat4 dest);
CGLM_EXPORT
void
glmc_ortho_default(float aspect, mat4 dest);
CGLM_EXPORT
void
glmc_ortho_default_s(float aspect, float size, mat4 dest);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_perspective(float fovy, glmc_perspective(float fovy,
@@ -41,6 +61,14 @@ glmc_perspective(float fovy,
float farVal, float farVal,
mat4 dest); mat4 dest);
CGLM_EXPORT
void
glmc_perspective_default(float aspect, mat4 dest);
CGLM_EXPORT
void
glmc_perspective_resize(float aspect, mat4 proj);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_lookat(vec3 eye, vec3 center, vec3 up, mat4 dest); glmc_lookat(vec3 eye, vec3 center, vec3 up, mat4 dest);
@@ -53,6 +81,58 @@ CGLM_EXPORT
void void
glmc_look_anyup(vec3 eye, vec3 dir, mat4 dest); glmc_look_anyup(vec3 eye, vec3 dir, mat4 dest);
CGLM_EXPORT
void
glmc_persp_decomp(mat4 proj,
float * __restrict nearVal,
float * __restrict farVal,
float * __restrict top,
float * __restrict bottom,
float * __restrict left,
float * __restrict right);
CGLM_EXPORT
void
glmc_persp_decompv(mat4 proj, float dest[6]);
CGLM_EXPORT
void
glmc_persp_decomp_x(mat4 proj,
float * __restrict left,
float * __restrict right);
CGLM_EXPORT
void
glmc_persp_decomp_y(mat4 proj,
float * __restrict top,
float * __restrict bottom);
CGLM_EXPORT
void
glmc_persp_decomp_z(mat4 proj,
float * __restrict nearVal,
float * __restrict farVal);
CGLM_EXPORT
void
glmc_persp_decomp_far(mat4 proj, float * __restrict farVal);
CGLM_EXPORT
void
glmc_persp_decomp_near(mat4 proj, float * __restrict nearVal);
CGLM_EXPORT
float
glmc_persp_fovy(mat4 proj);
CGLM_EXPORT
float
glmc_persp_aspect(mat4 proj);
CGLM_EXPORT
void
glmc_persp_sizes(mat4 proj, float fovy, vec4 dest);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@@ -40,6 +40,10 @@ CGLM_EXPORT
void void
glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest); glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_mat3_quat(mat3 m, versor dest);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat3_scale(mat3 m, float s); glmc_mat3_scale(mat3 m, float s);

View File

@@ -53,6 +53,10 @@ CGLM_EXPORT
void void
glmc_mat4_mulv(mat4 m, vec4 v, vec4 dest); glmc_mat4_mulv(mat4 m, vec4 v, vec4 dest);
CGLM_EXPORT
void
glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_quat(mat4 m, versor dest); glmc_mat4_quat(mat4 m, versor dest);
@@ -85,6 +89,10 @@ CGLM_EXPORT
void void
glmc_mat4_inv_precise(mat4 mat, mat4 dest); glmc_mat4_inv_precise(mat4 mat, mat4 dest);
CGLM_EXPORT
void
glmc_mat4_inv_fast(mat4 mat, mat4 dest);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_swap_col(mat4 mat, int col1, int col2); glmc_mat4_swap_col(mat4 mat, int col1, int col2);

View File

@@ -136,6 +136,10 @@ CGLM_EXPORT
void void
glmc_vec_rotate_m4(mat4 m, vec3 v, vec3 dest); glmc_vec_rotate_m4(mat4 m, vec3 v, vec3 dest);
CGLM_EXPORT
void
glmc_vec_rotate_m3(mat3 m, vec3 v, vec3 dest);
CGLM_EXPORT CGLM_EXPORT
void void
glmc_vec_proj(vec3 a, vec3 b, vec3 dest); glmc_vec_proj(vec3 a, vec3 b, vec3 dest);

View File

@@ -332,7 +332,7 @@ glm_lookat(vec3 eye,
vec3 center, vec3 center,
vec3 up, vec3 up,
mat4 dest) { mat4 dest) {
vec3 f, u, s; CGLM_ALIGN(8) vec3 f, u, s;
glm_vec_sub(center, eye, f); glm_vec_sub(center, eye, f);
glm_vec_normalize(f); glm_vec_normalize(f);
@@ -372,7 +372,7 @@ glm_lookat(vec3 eye,
CGLM_INLINE CGLM_INLINE
void void
glm_look(vec3 eye, vec3 dir, vec3 up, mat4 dest) { glm_look(vec3 eye, vec3 dir, vec3 up, mat4 dest) {
vec3 target; CGLM_ALIGN(8) vec3 target;
glm_vec_add(eye, dir, target); glm_vec_add(eye, dir, target);
glm_lookat(eye, target, up, dest); glm_lookat(eye, target, up, dest);
} }
@@ -390,7 +390,7 @@ glm_look(vec3 eye, vec3 dir, vec3 up, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_look_anyup(vec3 eye, vec3 dir, mat4 dest) { glm_look_anyup(vec3 eye, vec3 dir, mat4 dest) {
vec3 up; CGLM_ALIGN(8) vec3 up;
glm_vec_ortho(dir, up); glm_vec_ortho(dir, up);
glm_look(eye, dir, up, dest); glm_look(eye, dir, up, dest);
} }

View File

@@ -14,7 +14,7 @@
#include <math.h> #include <math.h>
#include <float.h> #include <float.h>
#if defined(_WIN32) #if defined(_MSC_VER)
# ifdef CGLM_DLL # ifdef CGLM_DLL
# define CGLM_EXPORT __declspec(dllexport) # define CGLM_EXPORT __declspec(dllexport)
# else # else

View File

@@ -10,6 +10,9 @@
#include "common.h" #include "common.h"
#include "plane.h" #include "plane.h"
#include "vec3.h"
#include "vec4.h"
#include "mat4.h"
#define GLM_LBN 0 /* left bottom near */ #define GLM_LBN 0 /* left bottom near */
#define GLM_LTN 1 /* left top near */ #define GLM_LTN 1 /* left top near */

View File

@@ -31,6 +31,7 @@
#define cglm_mat3_h #define cglm_mat3_h
#include "common.h" #include "common.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP #ifdef CGLM_SSE_FP
# include "simd/sse2/mat3.h" # include "simd/sse2/mat3.h"
@@ -80,7 +81,7 @@ glm_mat3_copy(mat3 mat, mat3 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat3_identity(mat3 mat) { glm_mat3_identity(mat3 mat) {
mat3 t = GLM_MAT3_IDENTITY_INIT; CGLM_ALIGN(16) mat3 t = GLM_MAT3_IDENTITY_INIT;
glm_mat3_copy(t, mat); glm_mat3_copy(t, mat);
} }
@@ -154,7 +155,7 @@ glm_mat3_transpose_to(mat3 m, mat3 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat3_transpose(mat3 m) { glm_mat3_transpose(mat3 m) {
mat3 tmp; CGLM_ALIGN(16) mat3 tmp;
tmp[0][1] = m[1][0]; tmp[0][1] = m[1][0];
tmp[0][2] = m[2][0]; tmp[0][2] = m[2][0];
@@ -188,9 +189,9 @@ glm_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
/*! /*!
* @brief convert mat4's rotation part to quaternion * @brief convert mat3 to quaternion
* *
* @param[in] m left matrix * @param[in] m rotation matrix
* @param[out] dest destination quaternion * @param[out] dest destination quaternion
*/ */
CGLM_INLINE CGLM_INLINE

View File

@@ -45,7 +45,8 @@
#define cglm_mat_h #define cglm_mat_h
#include "common.h" #include "common.h"
#include "quat.h" #include "vec4.h"
#include "vec3.h"
#ifdef CGLM_SSE_FP #ifdef CGLM_SSE_FP
# include "simd/sse2/mat4.h" # include "simd/sse2/mat4.h"
@@ -109,13 +110,13 @@ CGLM_INLINE
void void
glm_mat4_copy(mat4 mat, mat4 dest) { glm_mat4_copy(mat4 mat, mat4 dest) {
#ifdef __AVX__ #ifdef __AVX__
_mm256_store_ps(dest[0], _mm256_load_ps(mat[0])); glmm_store256(dest[0], glmm_load256(mat[0]));
_mm256_store_ps(dest[2], _mm256_load_ps(mat[2])); glmm_store256(dest[2], glmm_load256(mat[2]));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest[0], _mm_load_ps(mat[0])); glmm_store(dest[0], glmm_load(mat[0]));
_mm_store_ps(dest[1], _mm_load_ps(mat[1])); glmm_store(dest[1], glmm_load(mat[1]));
_mm_store_ps(dest[2], _mm_load_ps(mat[2])); glmm_store(dest[2], glmm_load(mat[2]));
_mm_store_ps(dest[3], _mm_load_ps(mat[3])); glmm_store(dest[3], glmm_load(mat[3]));
#else #else
glm_mat4_ucopy(mat, dest); glm_mat4_ucopy(mat, dest);
#endif #endif
@@ -138,7 +139,7 @@ glm_mat4_copy(mat4 mat, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_identity(mat4 mat) { glm_mat4_identity(mat4 mat) {
mat4 t = GLM_MAT4_IDENTITY_INIT; CGLM_ALIGN(16) mat4 t = GLM_MAT4_IDENTITY_INIT;
glm_mat4_copy(t, mat); glm_mat4_copy(t, mat);
} }
@@ -322,7 +323,7 @@ glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
/*! /*!
* @brief convert mat4's rotation part to quaternion * @brief convert mat4's rotation part to quaternion
* *
* @param[in] m left matrix * @param[in] m affine matrix
* @param[out] dest destination quaternion * @param[out] dest destination quaternion
*/ */
CGLM_INLINE CGLM_INLINE
@@ -369,20 +370,20 @@ glm_mat4_quat(mat4 m, versor dest) {
} }
/*! /*!
* @brief multiply vector with mat4's mat3 part(rotation) * @brief multiply vector with mat4
* *
* @param[in] m mat4(affine transform) * @param[in] m mat4(affine transform)
* @param[in] v vec3 * @param[in] v vec3
* @param[out] dest vec3 * @param[in] last 4th item to make it vec4
* @param[out] dest result vector (vec3)
*/ */
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_mulv3(mat4 m, vec3 v, vec3 dest) { glm_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest) {
vec3 res; vec4 res;
res[0] = m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2]; glm_vec4(v, last, res);
res[1] = m[0][1] * v[0] + m[1][1] * v[1] + m[2][1] * v[2]; glm_mat4_mulv(m, res, res);
res[2] = m[0][2] * v[0] + m[1][2] * v[1] + m[2][2] * v[2]; glm_vec3(res, dest);
glm_vec_copy(res, dest);
} }
/*! /*!
@@ -585,7 +586,7 @@ glm_mat4_inv_fast(mat4 mat, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_swap_col(mat4 mat, int col1, int col2) { glm_mat4_swap_col(mat4 mat, int col1, int col2) {
vec4 tmp; CGLM_ALIGN(16) vec4 tmp;
glm_vec4_copy(mat[col1], tmp); glm_vec4_copy(mat[col1], tmp);
glm_vec4_copy(mat[col2], mat[col1]); glm_vec4_copy(mat[col2], mat[col1]);
glm_vec4_copy(tmp, mat[col2]); glm_vec4_copy(tmp, mat[col2]);
@@ -601,7 +602,7 @@ glm_mat4_swap_col(mat4 mat, int col1, int col2) {
CGLM_INLINE CGLM_INLINE
void void
glm_mat4_swap_row(mat4 mat, int row1, int row2) { glm_mat4_swap_row(mat4 mat, int row1, int row2) {
vec4 tmp; CGLM_ALIGN(16) vec4 tmp;
tmp[0] = mat[0][row1]; tmp[0] = mat[0][row1];
tmp[1] = mat[1][row1]; tmp[1] = mat[1][row1];
tmp[2] = mat[2][row1]; tmp[2] = mat[2][row1];

View File

@@ -9,9 +9,7 @@
#define cglm_plane_h #define cglm_plane_h
#include "common.h" #include "common.h"
#include "mat4.h"
#include "vec4.h" #include "vec4.h"
#include "vec3.h"
/* /*
Plane equation: Ax + By + Cz + D = 0; Plane equation: Ax + By + Cz + D = 0;

View File

@@ -8,9 +8,9 @@
#ifndef cglm_project_h #ifndef cglm_project_h
#define cglm_project_h #define cglm_project_h
#include "mat4.h"
#include "vec3.h" #include "vec3.h"
#include "vec4.h" #include "vec4.h"
#include "mat4.h"
/*! /*!
* @brief maps the specified viewport coordinates into specified space [1] * @brief maps the specified viewport coordinates into specified space [1]
@@ -100,7 +100,7 @@ glm_unproject(vec3 pos, mat4 m, vec4 vp, vec3 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_project(vec3 pos, mat4 m, vec4 vp, vec3 dest) { glm_project(vec3 pos, mat4 m, vec4 vp, vec3 dest) {
vec4 pos4, vone = GLM_VEC4_ONE_INIT; CGLM_ALIGN(16) vec4 pos4, vone = GLM_VEC4_ONE_INIT;
glm_vec4(pos, 1.0f, pos4); glm_vec4(pos, 1.0f, pos4);

View File

@@ -52,6 +52,7 @@
#define cglm_quat_h #define cglm_quat_h
#include "common.h" #include "common.h"
#include "vec3.h"
#include "vec4.h" #include "vec4.h"
#include "mat4.h" #include "mat4.h"
#include "mat3.h" #include "mat3.h"
@@ -98,7 +99,7 @@ glm_translate(mat4 m, vec3 v);
CGLM_INLINE CGLM_INLINE
void void
glm_quat_identity(versor q) { glm_quat_identity(versor q) {
versor v = GLM_QUAT_IDENTITY_INIT; CGLM_ALIGN(16) versor v = GLM_QUAT_IDENTITY_INIT;
glm_vec4_copy(v, q); glm_vec4_copy(v, q);
} }
@@ -130,7 +131,7 @@ glm_quat_init(versor q, float x, float y, float z, float w) {
CGLM_INLINE CGLM_INLINE
void void
glm_quatv(versor q, float angle, vec3 axis) { glm_quatv(versor q, float angle, vec3 axis) {
vec3 k; CGLM_ALIGN(8) vec3 k;
float a, c, s; float a, c, s;
a = angle * 0.5f; a = angle * 0.5f;
@@ -157,7 +158,7 @@ glm_quatv(versor q, float angle, vec3 axis) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat(versor q, float angle, float x, float y, float z) { glm_quat(versor q, float angle, float x, float y, float z) {
vec3 axis = {x, y, z}; CGLM_ALIGN(8) vec3 axis = {x, y, z};
glm_quatv(q, angle, axis); glm_quatv(q, angle, axis);
} }
@@ -197,8 +198,8 @@ glm_quat_normalize_to(versor q, versor dest) {
__m128 xdot, x0; __m128 xdot, x0;
float dot; float dot;
x0 = _mm_load_ps(q); x0 = glmm_load(q);
xdot = glm_simd_dot(x0, x0); xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot); dot = _mm_cvtss_f32(xdot);
if (dot <= 0.0f) { if (dot <= 0.0f) {
@@ -206,7 +207,7 @@ glm_quat_normalize_to(versor q, versor dest) {
return; return;
} }
_mm_store_ps(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot))); glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else #else
float dot; float dot;
@@ -266,7 +267,7 @@ glm_quat_conjugate(versor q, versor dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_inv(versor q, versor dest) { glm_quat_inv(versor q, versor dest) {
versor conj; CGLM_ALIGN(8) versor conj;
glm_quat_conjugate(q, conj); glm_quat_conjugate(q, conj);
glm_vec4_scale(conj, 1.0f / glm_vec4_norm2(q), dest); glm_vec4_scale(conj, 1.0f / glm_vec4_norm2(q), dest);
} }
@@ -602,7 +603,7 @@ glm_quat_lerp(versor from, versor to, float t, versor dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_slerp(versor from, versor to, float t, versor dest) { glm_quat_slerp(versor from, versor to, float t, versor dest) {
vec4 q1, q2; CGLM_ALIGN(16) vec4 q1, q2;
float cosTheta, sinTheta, angle; float cosTheta, sinTheta, angle;
cosTheta = glm_quat_dot(from, to); cosTheta = glm_quat_dot(from, to);
@@ -645,7 +646,7 @@ glm_quat_slerp(versor from, versor to, float t, versor dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_look(vec3 eye, versor ori, mat4 dest) { glm_quat_look(vec3 eye, versor ori, mat4 dest) {
vec4 t; CGLM_ALIGN(16) vec4 t;
/* orientation */ /* orientation */
glm_quat_mat4t(ori, dest); glm_quat_mat4t(ori, dest);
@@ -667,7 +668,7 @@ glm_quat_look(vec3 eye, versor ori, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest) { glm_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest) {
vec3 axis; CGLM_ALIGN(8) vec3 axis;
float dot, angle; float dot, angle;
dot = glm_vec_dot(dir, fwd); dot = glm_vec_dot(dir, fwd);
@@ -701,7 +702,7 @@ glm_quat_for(vec3 dir, vec3 fwd, vec3 up, versor dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_forp(vec3 from, vec3 to, vec3 fwd, vec3 up, versor dest) { glm_quat_forp(vec3 from, vec3 to, vec3 fwd, vec3 up, versor dest) {
vec3 dir; CGLM_ALIGN(8) vec3 dir;
glm_vec_sub(to, from, dir); glm_vec_sub(to, from, dir);
glm_quat_for(dir, fwd, up, dest); glm_quat_for(dir, fwd, up, dest);
} }
@@ -716,9 +717,9 @@ glm_quat_forp(vec3 from, vec3 to, vec3 fwd, vec3 up, versor dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_rotatev(versor q, vec3 v, vec3 dest) { glm_quat_rotatev(versor q, vec3 v, vec3 dest) {
versor p; CGLM_ALIGN(16) versor p;
vec3 u, v1, v2; CGLM_ALIGN(8) vec3 u, v1, v2;
float s; float s;
glm_quat_normalize_to(q, p); glm_quat_normalize_to(q, p);
glm_quat_imag(p, u); glm_quat_imag(p, u);
@@ -744,7 +745,7 @@ glm_quat_rotatev(versor q, vec3 v, vec3 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_rotate(mat4 m, versor q, mat4 dest) { glm_quat_rotate(mat4 m, versor q, mat4 dest) {
mat4 rot; CGLM_ALIGN(16) mat4 rot;
glm_quat_mat4(q, rot); glm_quat_mat4(q, rot);
glm_mul_rot(m, rot, dest); glm_mul_rot(m, rot, dest);
} }
@@ -759,7 +760,7 @@ glm_quat_rotate(mat4 m, versor q, mat4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_rotate_at(mat4 m, versor q, vec3 pivot) { glm_quat_rotate_at(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv; CGLM_ALIGN(8) vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv); glm_vec_inv_to(pivot, pivotInv);
@@ -783,12 +784,11 @@ glm_quat_rotate_at(mat4 m, versor q, vec3 pivot) {
CGLM_INLINE CGLM_INLINE
void void
glm_quat_rotate_atm(mat4 m, versor q, vec3 pivot) { glm_quat_rotate_atm(mat4 m, versor q, vec3 pivot) {
vec3 pivotInv; CGLM_ALIGN(8) vec3 pivotInv;
glm_vec_inv_to(pivot, pivotInv); glm_vec_inv_to(pivot, pivotInv);
glm_mat4_identity(m); glm_translate_make(m, pivot);
glm_vec_copy(pivot, m[3]);
glm_quat_rotate(m, q, m); glm_quat_rotate(m, q, m);
glm_translate(m, pivotInv); glm_translate(m, pivotInv);
} }

View File

@@ -21,11 +21,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; __m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */ y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */ y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */ y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */ y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */ y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */ y5 = _mm256_permute2f128_ps(y3, y3, 0b00000000); /* l k j i l k j i */
@@ -37,10 +37,10 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0)); y6 = _mm256_permutevar_ps(y0, _mm256_set_epi32(1, 1, 1, 1, 0, 0, 0, 0));
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
_mm256_store_ps(dest[0], glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y4, y8)), _mm256_mul_ps(y4, y8)),
_mm256_mul_ps(y5, y7))); _mm256_mul_ps(y5, y7)));
/* n n n n i i i i */ /* n n n n i i i i */
@@ -52,11 +52,11 @@ glm_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2], glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)), _mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8), _mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9)))); _mm256_mul_ps(y5, y9))));
} }
#endif #endif

View File

@@ -21,11 +21,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
__m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; __m256 y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
y0 = _mm256_load_ps(m2[0]); /* h g f e d c b a */ y0 = glmm_load256(m2[0]); /* h g f e d c b a */
y1 = _mm256_load_ps(m2[2]); /* p o n m l k j i */ y1 = glmm_load256(m2[2]); /* p o n m l k j i */
y2 = _mm256_load_ps(m1[0]); /* h g f e d c b a */ y2 = glmm_load256(m1[0]); /* h g f e d c b a */
y3 = _mm256_load_ps(m1[2]); /* p o n m l k j i */ y3 = glmm_load256(m1[2]); /* p o n m l k j i */
y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */ y4 = _mm256_permute2f128_ps(y2, y2, 0b00000011); /* d c b a h g f e */
y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */ y5 = _mm256_permute2f128_ps(y3, y3, 0b00000011); /* l k j i p o n m */
@@ -39,11 +39,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y0, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); y9 = _mm256_permutevar_ps(y0, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[0], glmm_store256(dest[0],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)), _mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8), _mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9)))); _mm256_mul_ps(y5, y9))));
/* n n n n i i i i */ /* n n n n i i i i */
/* p p p p k k k k */ /* p p p p k k k k */
@@ -54,11 +54,11 @@ glm_mat4_mul_avx(mat4 m1, mat4 m2, mat4 dest) {
y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1)); y8 = _mm256_permutevar_ps(y1, _mm256_set_epi32(0, 0, 0, 0, 1, 1, 1, 1));
y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3)); y9 = _mm256_permutevar_ps(y1, _mm256_set_epi32(2, 2, 2, 2, 3, 3, 3, 3));
_mm256_store_ps(dest[2], glmm_store256(dest[2],
_mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6), _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(y2, y6),
_mm256_mul_ps(y3, y7)), _mm256_mul_ps(y3, y7)),
_mm256_add_ps(_mm256_mul_ps(y4, y8), _mm256_add_ps(_mm256_mul_ps(y4, y8),
_mm256_mul_ps(y5, y9)))); _mm256_mul_ps(y5, y9))));
} }
#endif #endif

View File

@@ -8,11 +8,19 @@
#ifndef cglm_intrin_h #ifndef cglm_intrin_h
#define cglm_intrin_h #define cglm_intrin_h
#if defined( _WIN32 ) #if defined( _MSC_VER )
# if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2 # if (defined(_M_AMD64) || defined(_M_X64)) || _M_IX86_FP == 2
# define __SSE2__ # ifndef __SSE2__
# define __SSE2__
# endif
# elif _M_IX86_FP == 1 # elif _M_IX86_FP == 1
# define __SSE__ # ifndef __SSE__
# define __SSE__
# endif
# endif
/* do not use alignment for older visual studio versions */
# if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# endif # endif
#endif #endif
@@ -20,35 +28,39 @@
# include <xmmintrin.h> # include <xmmintrin.h>
# include <emmintrin.h> # include <emmintrin.h>
/* float */ /* OPTIONAL: You may save some instructions but latency (not sure) */
# define _mm_shuffle1_ps(a, z, y, x, w) \ #ifdef CGLM_USE_INT_DOMAIN
_mm_shuffle_ps(a, a, _MM_SHUFFLE(z, y, x, w)) # define glmm_shuff1(xmm, z, y, x, w) \
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xmm), \
_MM_SHUFFLE(z, y, x, w)))
#else
# define glmm_shuff1(xmm, z, y, x, w) \
_mm_shuffle_ps(xmm, xmm, _MM_SHUFFLE(z, y, x, w))
#endif
# define _mm_shuffle1_ps1(a, x) \ #define glmm_shuff1x(xmm, x) glmm_shuff1(xmm, x, x, x, x)
_mm_shuffle_ps(a, a, _MM_SHUFFLE(x, x, x, x)) #define glmm_shuff2(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \
glmm_shuff1(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
z1, y1, x1, w1)
# define _mm_shuffle2_ps(a, b, z0, y0, x0, w0, z1, y1, x1, w1) \ static inline
_mm_shuffle1_ps(_mm_shuffle_ps(a, b, _MM_SHUFFLE(z0, y0, x0, w0)), \
z1, y1, x1, w1)
CGLM_INLINE
__m128 __m128
glm_simd_dot(__m128 a, __m128 b) { glmm_dot(__m128 a, __m128 b) {
__m128 x0; __m128 x0;
x0 = _mm_mul_ps(a, b); x0 = _mm_mul_ps(a, b);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1)); return _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 0, 1));
}
CGLM_INLINE
__m128
glm_simd_norm(__m128 a) {
return _mm_sqrt_ps(glm_simd_dot(a, a));
} }
static inline static inline
__m128 __m128
glm_simd_load_v3(vec3 v) { glmm_norm(__m128 a) {
return _mm_sqrt_ps(glmm_dot(a, a));
}
static inline
__m128
glmm_load3(float v[3]) {
__m128i xy; __m128i xy;
__m128 z; __m128 z;
@@ -60,11 +72,19 @@ glm_simd_load_v3(vec3 v) {
static inline static inline
void void
glm_simd_store_v3(__m128 vx, vec3 v) { glmm_store3(__m128 vx, float v[3]) {
_mm_storel_pi((__m64 *)&v[0], vx); _mm_storel_pi((__m64 *)&v[0], vx);
_mm_store_ss(&v[2], _mm_shuffle1_ps(vx, 2, 2, 2, 2)); _mm_store_ss(&v[2], glmm_shuff1(vx, 2, 2, 2, 2));
} }
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load(p) _mm_loadu_ps(p)
# define glmm_store(p, a) _mm_storeu_ps(p, a)
#else
# define glmm_load(p) _mm_load_ps(p)
# define glmm_store(p, a) _mm_store_ps(p, a)
#endif
#endif #endif
/* x86, x64 */ /* x86, x64 */
@@ -74,6 +94,15 @@ glm_simd_store_v3(__m128 vx, vec3 v) {
#ifdef __AVX__ #ifdef __AVX__
# define CGLM_AVX_FP 1 # define CGLM_AVX_FP 1
#ifdef CGLM_ALL_UNALIGNED
# define glmm_load256(p) _mm256_loadu_ps(p)
# define glmm_store256(p, a) _mm256_storeu_ps(p, a)
#else
# define glmm_load256(p) _mm256_load_ps(p)
# define glmm_store256(p, a) _mm256_store_ps(p, a)
#endif
#endif #endif
/* ARM Neon */ /* ARM Neon */

View File

@@ -18,35 +18,35 @@ glm_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */ /* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r; __m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]); l0 = glmm_load(m1[0]);
l1 = _mm_load_ps(m1[1]); l1 = glmm_load(m1[1]);
l2 = _mm_load_ps(m1[2]); l2 = glmm_load(m1[2]);
l3 = _mm_load_ps(m1[3]); l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]); r = glmm_load(m2[0]);
_mm_store_ps(dest[0], glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[1]); r = glmm_load(m2[1]);
_mm_store_ps(dest[1], glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[2]); r = glmm_load(m2[2]);
_mm_store_ps(dest[2], glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[3]); r = glmm_load(m2[3]);
_mm_store_ps(dest[3], glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2), _mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3)))); _mm_mul_ps(glmm_shuff1x(r, 3), l3))));
} }
CGLM_INLINE CGLM_INLINE
@@ -55,30 +55,30 @@ glm_mul_rot_sse2(mat4 m1, mat4 m2, mat4 dest) {
/* D = R * L (Column-Major) */ /* D = R * L (Column-Major) */
__m128 l0, l1, l2, l3, r; __m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]); l0 = glmm_load(m1[0]);
l1 = _mm_load_ps(m1[1]); l1 = glmm_load(m1[1]);
l2 = _mm_load_ps(m1[2]); l2 = glmm_load(m1[2]);
l3 = _mm_load_ps(m1[3]); l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]); r = glmm_load(m2[0]);
_mm_store_ps(dest[0], glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[1]); r = glmm_load(m2[1]);
_mm_store_ps(dest[1], glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
r = _mm_load_ps(m2[2]); r = glmm_load(m2[2]);
_mm_store_ps(dest[2], glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2))); _mm_mul_ps(glmm_shuff1x(r, 2), l2)));
_mm_store_ps(dest[3], l3); glmm_store(dest[3], l3);
} }
CGLM_INLINE CGLM_INLINE
@@ -86,25 +86,25 @@ void
glm_inv_tr_sse2(mat4 mat) { glm_inv_tr_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1; __m128 r0, r1, r2, r3, x0, x1;
r0 = _mm_load_ps(mat[0]); r0 = glmm_load(mat[0]);
r1 = _mm_load_ps(mat[1]); r1 = glmm_load(mat[1]);
r2 = _mm_load_ps(mat[2]); r2 = glmm_load(mat[2]);
r3 = _mm_load_ps(mat[3]); r3 = glmm_load(mat[3]);
x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f); x1 = _mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f);
_MM_TRANSPOSE4_PS(r0, r1, r2, x1); _MM_TRANSPOSE4_PS(r0, r1, r2, x1);
x0 = _mm_add_ps(_mm_mul_ps(r0, _mm_shuffle1_ps(r3, 0, 0, 0, 0)), x0 = _mm_add_ps(_mm_mul_ps(r0, glmm_shuff1(r3, 0, 0, 0, 0)),
_mm_mul_ps(r1, _mm_shuffle1_ps(r3, 1, 1, 1, 1))); _mm_mul_ps(r1, glmm_shuff1(r3, 1, 1, 1, 1)));
x0 = _mm_add_ps(x0, _mm_mul_ps(r2, _mm_shuffle1_ps(r3, 2, 2, 2, 2))); x0 = _mm_add_ps(x0, _mm_mul_ps(r2, glmm_shuff1(r3, 2, 2, 2, 2)));
x0 = _mm_xor_ps(x0, _mm_set1_ps(-0.f)); x0 = _mm_xor_ps(x0, _mm_set1_ps(-0.f));
x0 = _mm_add_ps(x0, x1); x0 = _mm_add_ps(x0, x1);
_mm_store_ps(mat[0], r0); glmm_store(mat[0], r0);
_mm_store_ps(mat[1], r1); glmm_store(mat[1], r1);
_mm_store_ps(mat[2], r2); glmm_store(mat[2], r2);
_mm_store_ps(mat[3], x0); glmm_store(mat[3], x0);
} }
#endif #endif

View File

@@ -27,27 +27,25 @@ glm_mat3_mul_sse2(mat3 m1, mat3 m2, mat3 dest) {
r1 = _mm_loadu_ps(&m2[1][1]); r1 = _mm_loadu_ps(&m2[1][1]);
r2 = _mm_set1_ps(m2[2][2]); r2 = _mm_set1_ps(m2[2][2]);
x1 = _mm_shuffle2_ps(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0); x1 = glmm_shuff2(l0, l1, 1, 0, 3, 3, 0, 3, 2, 0);
x2 = _mm_shuffle2_ps(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0); x2 = glmm_shuff2(l1, l2, 0, 0, 3, 2, 0, 2, 1, 0);
x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 0, 2, 1, 0), x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 0, 2, 1, 0),
_mm_shuffle1_ps(r0, 3, 0, 0, 0)), glmm_shuff1(r0, 3, 0, 0, 0)),
_mm_mul_ps(x1, _mm_mul_ps(x1, glmm_shuff2(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0)));
_mm_shuffle2_ps(r0, r1, 0, 0, 1, 1, 2, 0, 0, 0)));
x0 = _mm_add_ps(x0, x0 = _mm_add_ps(x0,
_mm_mul_ps(x2, _mm_mul_ps(x2, glmm_shuff2(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0)));
_mm_shuffle2_ps(r0, r1, 1, 1, 2, 2, 2, 0, 0, 0)));
_mm_storeu_ps(dest[0], x0); _mm_storeu_ps(dest[0], x0);
x0 = _mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps(l0, 1, 0, 2, 1), x0 = _mm_add_ps(_mm_mul_ps(glmm_shuff1(l0, 1, 0, 2, 1),
_mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3))), _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(2, 2, 3, 3))),
_mm_mul_ps(_mm_shuffle1_ps(x1, 1, 0, 2, 1), _mm_mul_ps(glmm_shuff1(x1, 1, 0, 2, 1),
_mm_shuffle1_ps(r1, 3, 3, 0, 0))); glmm_shuff1(r1, 3, 3, 0, 0)));
x0 = _mm_add_ps(x0, x0 = _mm_add_ps(x0,
_mm_mul_ps(_mm_shuffle1_ps(x2, 1, 0, 2, 1), _mm_mul_ps(glmm_shuff1(x2, 1, 0, 2, 1),
_mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1)))); _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(0, 0, 1, 1))));
_mm_storeu_ps(&dest[1][1], x0); _mm_storeu_ps(&dest[1][1], x0);

View File

@@ -20,10 +20,10 @@ glm_mat4_scale_sse2(mat4 m, float s){
__m128 x0; __m128 x0;
x0 = _mm_set1_ps(s); x0 = _mm_set1_ps(s);
_mm_store_ps(m[0], _mm_mul_ps(_mm_load_ps(m[0]), x0)); glmm_store(m[0], _mm_mul_ps(glmm_load(m[0]), x0));
_mm_store_ps(m[1], _mm_mul_ps(_mm_load_ps(m[1]), x0)); glmm_store(m[1], _mm_mul_ps(glmm_load(m[1]), x0));
_mm_store_ps(m[2], _mm_mul_ps(_mm_load_ps(m[2]), x0)); glmm_store(m[2], _mm_mul_ps(glmm_load(m[2]), x0));
_mm_store_ps(m[3], _mm_mul_ps(_mm_load_ps(m[3]), x0)); glmm_store(m[3], _mm_mul_ps(glmm_load(m[3]), x0));
} }
CGLM_INLINE CGLM_INLINE
@@ -31,17 +31,17 @@ void
glm_mat4_transp_sse2(mat4 m, mat4 dest){ glm_mat4_transp_sse2(mat4 m, mat4 dest){
__m128 r0, r1, r2, r3; __m128 r0, r1, r2, r3;
r0 = _mm_load_ps(m[0]); r0 = glmm_load(m[0]);
r1 = _mm_load_ps(m[1]); r1 = glmm_load(m[1]);
r2 = _mm_load_ps(m[2]); r2 = glmm_load(m[2]);
r3 = _mm_load_ps(m[3]); r3 = glmm_load(m[3]);
_MM_TRANSPOSE4_PS(r0, r1, r2, r3); _MM_TRANSPOSE4_PS(r0, r1, r2, r3);
_mm_store_ps(dest[0], r0); glmm_store(dest[0], r0);
_mm_store_ps(dest[1], r1); glmm_store(dest[1], r1);
_mm_store_ps(dest[2], r2); glmm_store(dest[2], r2);
_mm_store_ps(dest[3], r3); glmm_store(dest[3], r3);
} }
CGLM_INLINE CGLM_INLINE
@@ -51,36 +51,36 @@ glm_mat4_mul_sse2(mat4 m1, mat4 m2, mat4 dest) {
__m128 l0, l1, l2, l3, r; __m128 l0, l1, l2, l3, r;
l0 = _mm_load_ps(m1[0]); l0 = glmm_load(m1[0]);
l1 = _mm_load_ps(m1[1]); l1 = glmm_load(m1[1]);
l2 = _mm_load_ps(m1[2]); l2 = glmm_load(m1[2]);
l3 = _mm_load_ps(m1[3]); l3 = glmm_load(m1[3]);
r = _mm_load_ps(m2[0]); r = glmm_load(m2[0]);
_mm_store_ps(dest[0], glmm_store(dest[0],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2), _mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3)))); _mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = _mm_load_ps(m2[1]); r = glmm_load(m2[1]);
_mm_store_ps(dest[1], glmm_store(dest[1],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2), _mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3)))); _mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = _mm_load_ps(m2[2]); r = glmm_load(m2[2]);
_mm_store_ps(dest[2], glmm_store(dest[2],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2), _mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3)))); _mm_mul_ps(glmm_shuff1x(r, 3), l3))));
r = _mm_load_ps(m2[3]); r = glmm_load(m2[3]);
_mm_store_ps(dest[3], glmm_store(dest[3],
_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 0), l0), _mm_add_ps(_mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 0), l0),
_mm_mul_ps(_mm_shuffle1_ps1(r, 1), l1)), _mm_mul_ps(glmm_shuff1x(r, 1), l1)),
_mm_add_ps(_mm_mul_ps(_mm_shuffle1_ps1(r, 2), l2), _mm_add_ps(_mm_mul_ps(glmm_shuff1x(r, 2), l2),
_mm_mul_ps(_mm_shuffle1_ps1(r, 3), l3)))); _mm_mul_ps(glmm_shuff1x(r, 3), l3))));
} }
CGLM_INLINE CGLM_INLINE
@@ -88,18 +88,14 @@ void
glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) { glm_mat4_mulv_sse2(mat4 m, vec4 v, vec4 dest) {
__m128 x0, x1, x2; __m128 x0, x1, x2;
x0 = _mm_load_ps(v); x0 = glmm_load(v);
x1 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[0]), x1 = _mm_add_ps(_mm_mul_ps(glmm_load(m[0]), glmm_shuff1x(x0, 0)),
_mm_shuffle1_ps1(x0, 0)), _mm_mul_ps(glmm_load(m[1]), glmm_shuff1x(x0, 1)));
_mm_mul_ps(_mm_load_ps(m[1]),
_mm_shuffle1_ps1(x0, 1)));
x2 = _mm_add_ps(_mm_mul_ps(_mm_load_ps(m[2]), x2 = _mm_add_ps(_mm_mul_ps(glmm_load(m[2]), glmm_shuff1x(x0, 2)),
_mm_shuffle1_ps1(x0, 2)), _mm_mul_ps(glmm_load(m[3]), glmm_shuff1x(x0, 3)));
_mm_mul_ps(_mm_load_ps(m[3]),
_mm_shuffle1_ps1(x0, 3)));
_mm_store_ps(dest, _mm_add_ps(x1, x2)); glmm_store(dest, _mm_add_ps(x1, x2));
} }
CGLM_INLINE CGLM_INLINE
@@ -108,10 +104,10 @@ glm_mat4_det_sse2(mat4 mat) {
__m128 r0, r1, r2, r3, x0, x1, x2; __m128 r0, r1, r2, r3, x0, x1, x2;
/* 127 <- 0, [square] det(A) = det(At) */ /* 127 <- 0, [square] det(A) = det(At) */
r0 = _mm_load_ps(mat[0]); /* d c b a */ r0 = glmm_load(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */ r1 = glmm_load(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */ r2 = glmm_load(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */ r3 = glmm_load(mat[3]); /* p o n m */
/* /*
t[1] = j * p - n * l; t[1] = j * p - n * l;
@@ -119,20 +115,20 @@ glm_mat4_det_sse2(mat4 mat) {
t[3] = i * p - m * l; t[3] = i * p - m * l;
t[4] = i * o - m * k; t[4] = i * o - m * k;
*/ */
x0 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r2, 0, 0, 1, 1), x0 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r2, 0, 0, 1, 1),
_mm_shuffle1_ps(r3, 2, 3, 2, 3)), glmm_shuff1(r3, 2, 3, 2, 3)),
_mm_mul_ps(_mm_shuffle1_ps(r3, 0, 0, 1, 1), _mm_mul_ps(glmm_shuff1(r3, 0, 0, 1, 1),
_mm_shuffle1_ps(r2, 2, 3, 2, 3))); glmm_shuff1(r2, 2, 3, 2, 3)));
/* /*
t[0] = k * p - o * l; t[0] = k * p - o * l;
t[0] = k * p - o * l; t[0] = k * p - o * l;
t[5] = i * n - m * j; t[5] = i * n - m * j;
t[5] = i * n - m * j; t[5] = i * n - m * j;
*/ */
x1 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r2, 0, 0, 2, 2), x1 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r2, 0, 0, 2, 2),
_mm_shuffle1_ps(r3, 1, 1, 3, 3)), glmm_shuff1(r3, 1, 1, 3, 3)),
_mm_mul_ps(_mm_shuffle1_ps(r3, 0, 0, 2, 2), _mm_mul_ps(glmm_shuff1(r3, 0, 0, 2, 2),
_mm_shuffle1_ps(r2, 1, 1, 3, 3))); glmm_shuff1(r2, 1, 1, 3, 3)));
/* /*
a * (f * t[0] - g * t[1] + h * t[2]) a * (f * t[0] - g * t[1] + h * t[2])
@@ -140,19 +136,19 @@ glm_mat4_det_sse2(mat4 mat) {
+ c * (e * t[1] - f * t[3] + h * t[5]) + c * (e * t[1] - f * t[3] + h * t[5])
- d * (e * t[2] - f * t[4] + g * t[5]) - d * (e * t[2] - f * t[4] + g * t[5])
*/ */
x2 = _mm_sub_ps(_mm_mul_ps(_mm_shuffle1_ps(r1, 0, 0, 0, 1), x2 = _mm_sub_ps(_mm_mul_ps(glmm_shuff1(r1, 0, 0, 0, 1),
_mm_shuffle_ps(x1, x0, _MM_SHUFFLE(1, 0, 0, 0))), _mm_shuffle_ps(x1, x0, _MM_SHUFFLE(1, 0, 0, 0))),
_mm_mul_ps(_mm_shuffle1_ps(r1, 1, 1, 2, 2), _mm_mul_ps(glmm_shuff1(r1, 1, 1, 2, 2),
_mm_shuffle1_ps(x0, 3, 2, 2, 0))); glmm_shuff1(x0, 3, 2, 2, 0)));
x2 = _mm_add_ps(x2, x2 = _mm_add_ps(x2,
_mm_mul_ps(_mm_shuffle1_ps(r1, 2, 3, 3, 3), _mm_mul_ps(glmm_shuff1(r1, 2, 3, 3, 3),
_mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 2, 3, 1)))); _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 2, 3, 1))));
x2 = _mm_xor_ps(x2, _mm_set_ps(-0.f, 0.f, -0.f, 0.f)); x2 = _mm_xor_ps(x2, _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
x0 = _mm_mul_ps(r0, x2); x0 = _mm_mul_ps(r0, x2);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 3, 3, 1)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 3, 3, 1));
return _mm_cvtss_f32(x0); return _mm_cvtss_f32(x0);
} }
@@ -166,14 +162,14 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7; x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */ /* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */ r0 = glmm_load(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */ r1 = glmm_load(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */ r2 = glmm_load(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */ r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */ x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */ x1 = glmm_shuff1(x0, 1, 3, 3, 3); /* l p p p */
x2 = _mm_shuffle1_ps(x0, 0, 2, 2, 2); /* k o o o */ x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */
x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */ x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */
x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */ x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */
@@ -184,7 +180,7 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0)); t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0));
x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */ x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */
x4 = _mm_shuffle1_ps(x4, 0, 2, 2, 2); /* j n n n */ x4 = glmm_shuff1(x4, 0, 2, 2, 2); /* j n n n */
x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */ x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */
/* t1[1] = j * p - n * l; /* t1[1] = j * p - n * l;
@@ -200,7 +196,7 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3)); t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3));
x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */ x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */
x7 = _mm_shuffle2_ps(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */ x7 = glmm_shuff2(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
/* t1[3] = i * p - m * l; /* t1[3] = i * p - m * l;
t1[3] = i * p - m * l; t1[3] = i * p - m * l;
@@ -220,10 +216,10 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
t3[5] = e * j - i * f; */ t3[5] = e * j - i * f; */
t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5)); t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5));
x0 = _mm_shuffle2_ps(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */ x0 = glmm_shuff2(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = _mm_shuffle2_ps(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */ x1 = glmm_shuff2(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = _mm_shuffle2_ps(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */ x2 = glmm_shuff2(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = _mm_shuffle2_ps(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */ x3 = glmm_shuff2(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
/* /*
dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2]; dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2];
@@ -271,14 +267,14 @@ glm_mat4_inv_fast_sse2(mat4 mat, mat4 dest) {
x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0)); x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0));
x0 = _mm_mul_ps(x0, r0); x0 = _mm_mul_ps(x0, r0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 0, 1));
x0 = _mm_rcp_ps(x0); x0 = _mm_rcp_ps(x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0)); glmm_store(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0)); glmm_store(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0)); glmm_store(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0)); glmm_store(dest[3], _mm_mul_ps(v3, x0));
} }
CGLM_INLINE CGLM_INLINE
@@ -290,14 +286,14 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0, x1, x2, x3, x4, x5, x6, x7; x0, x1, x2, x3, x4, x5, x6, x7;
/* 127 <- 0 */ /* 127 <- 0 */
r0 = _mm_load_ps(mat[0]); /* d c b a */ r0 = glmm_load(mat[0]); /* d c b a */
r1 = _mm_load_ps(mat[1]); /* h g f e */ r1 = glmm_load(mat[1]); /* h g f e */
r2 = _mm_load_ps(mat[2]); /* l k j i */ r2 = glmm_load(mat[2]); /* l k j i */
r3 = _mm_load_ps(mat[3]); /* p o n m */ r3 = glmm_load(mat[3]); /* p o n m */
x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */ x0 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2)); /* p o l k */
x1 = _mm_shuffle1_ps(x0, 1, 3, 3, 3); /* l p p p */ x1 = glmm_shuff1(x0, 1, 3, 3, 3); /* l p p p */
x2 = _mm_shuffle1_ps(x0, 0, 2, 2, 2); /* k o o o */ x2 = glmm_shuff1(x0, 0, 2, 2, 2); /* k o o o */
x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */ x0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(3, 3, 3, 3)); /* h h l l */
x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */ x3 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(2, 2, 2, 2)); /* g g k k */
@@ -308,7 +304,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0)); t0 = _mm_sub_ps(_mm_mul_ps(x3, x1), _mm_mul_ps(x2, x0));
x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */ x4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(2, 1, 2, 1)); /* o n k j */
x4 = _mm_shuffle1_ps(x4, 0, 2, 2, 2); /* j n n n */ x4 = glmm_shuff1(x4, 0, 2, 2, 2); /* j n n n */
x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */ x5 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(1, 1, 1, 1)); /* f f j j */
/* t1[1] = j * p - n * l; /* t1[1] = j * p - n * l;
@@ -324,7 +320,7 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3)); t2 = _mm_sub_ps(_mm_mul_ps(x5, x2), _mm_mul_ps(x4, x3));
x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */ x6 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0, 0, 0, 0)); /* e e i i */
x7 = _mm_shuffle2_ps(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */ x7 = glmm_shuff2(r3, r2, 0, 0, 0, 0, 2, 0, 0, 0); /* i m m m */
/* t1[3] = i * p - m * l; /* t1[3] = i * p - m * l;
t1[3] = i * p - m * l; t1[3] = i * p - m * l;
@@ -344,10 +340,10 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
t3[5] = e * j - i * f; */ t3[5] = e * j - i * f; */
t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5)); t5 = _mm_sub_ps(_mm_mul_ps(x6, x4), _mm_mul_ps(x7, x5));
x0 = _mm_shuffle2_ps(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */ x0 = glmm_shuff2(r1, r0, 0, 0, 0, 0, 2, 2, 2, 0); /* a a a e */
x1 = _mm_shuffle2_ps(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */ x1 = glmm_shuff2(r1, r0, 1, 1, 1, 1, 2, 2, 2, 0); /* b b b f */
x2 = _mm_shuffle2_ps(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */ x2 = glmm_shuff2(r1, r0, 2, 2, 2, 2, 2, 2, 2, 0); /* c c c g */
x3 = _mm_shuffle2_ps(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */ x3 = glmm_shuff2(r1, r0, 3, 3, 3, 3, 2, 2, 2, 0); /* d d d h */
/* /*
dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2]; dest[0][0] = f * t1[0] - g * t1[1] + h * t1[2];
@@ -395,14 +391,14 @@ glm_mat4_inv_sse2(mat4 mat, mat4 dest) {
x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0)); x0 = _mm_shuffle_ps(x0, x1, _MM_SHUFFLE(2, 0, 2, 0));
x0 = _mm_mul_ps(x0, r0); x0 = _mm_mul_ps(x0, r0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 0, 1, 2, 3)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 0, 1, 2, 3));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 0, 1)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 0, 1));
x0 = _mm_div_ps(_mm_set1_ps(1.0f), x0); x0 = _mm_div_ps(_mm_set1_ps(1.0f), x0);
_mm_store_ps(dest[0], _mm_mul_ps(v0, x0)); glmm_store(dest[0], _mm_mul_ps(v0, x0));
_mm_store_ps(dest[1], _mm_mul_ps(v1, x0)); glmm_store(dest[1], _mm_mul_ps(v1, x0));
_mm_store_ps(dest[2], _mm_mul_ps(v2, x0)); glmm_store(dest[2], _mm_mul_ps(v2, x0));
_mm_store_ps(dest[3], _mm_mul_ps(v3, x0)); glmm_store(dest[3], _mm_mul_ps(v3, x0));
} }
#endif #endif

View File

@@ -24,21 +24,21 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
__m128 xp, xq, x0, r; __m128 xp, xq, x0, r;
xp = _mm_load_ps(p); /* 3 2 1 0 */ xp = glmm_load(p); /* 3 2 1 0 */
xq = _mm_load_ps(q); xq = glmm_load(q);
r = _mm_mul_ps(_mm_shuffle1_ps1(xp, 3), xq); r = _mm_mul_ps(glmm_shuff1x(xp, 3), xq);
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f)); x0 = _mm_xor_ps(glmm_shuff1x(xp, 0), _mm_set_ps(-0.f, 0.f, -0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 0, 1, 2, 3))); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 0, 1, 2, 3)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f)); x0 = _mm_xor_ps(glmm_shuff1x(xp, 1), _mm_set_ps(-0.f, -0.f, 0.f, 0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 1, 0, 3, 2))); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 1, 0, 3, 2)));
x0 = _mm_xor_ps(_mm_shuffle1_ps1(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f)); x0 = _mm_xor_ps(glmm_shuff1x(xp, 2), _mm_set_ps(-0.f, 0.f, 0.f, -0.f));
r = _mm_add_ps(r, _mm_mul_ps(x0, _mm_shuffle1_ps(xq, 2, 3, 0, 1))); r = _mm_add_ps(r, _mm_mul_ps(x0, glmm_shuff1(xq, 2, 3, 0, 1)));
_mm_store_ps(dest, r); glmm_store(dest, r);
} }

View File

@@ -9,23 +9,35 @@
#define cglm_types_h #define cglm_types_h
#if defined(_MSC_VER) #if defined(_MSC_VER)
# define CGLM_ALIGN(X) /* __declspec(align(X)) */ /* do not use alignment for older visual studio versions */
#if _MSC_VER < 1913 /* Visual Studio 2017 version 15.6 */
# define CGLM_ALL_UNALIGNED
# define CGLM_ALIGN(X) /* no alignment */
#else
# define CGLM_ALIGN(X) __declspec(align(X))
#endif
#else #else
# define CGLM_ALIGN(X) __attribute((aligned(X))) # define CGLM_ALIGN(X) __attribute((aligned(X)))
#endif #endif
typedef float vec2[2]; #ifndef CGLM_ALL_UNALIGNED
typedef CGLM_ALIGN(8) float vec3[3]; # define CGLM_ALIGN_IF(X) CGLM_ALIGN(X)
typedef int ivec3[3]; #else
typedef CGLM_ALIGN(16) float vec4[4]; # define CGLM_ALIGN_IF(X) /* no alignment */
#endif
typedef vec3 mat3[3]; typedef float vec2[2];
typedef CGLM_ALIGN(16) vec4 mat4[4]; typedef CGLM_ALIGN_IF(8) float vec3[3];
typedef int ivec3[3];
typedef CGLM_ALIGN_IF(16) float vec4[4];
typedef vec4 versor; typedef vec3 mat3[3];
typedef CGLM_ALIGN_IF(16) vec4 mat4[4];
#define CGLM_PI (float)M_PI typedef vec4 versor;
#define CGLM_PI_2 (float)M_PI_2
#define CGLM_PI_4 (float)M_PI_4 #define CGLM_PI ((float)M_PI)
#define CGLM_PI_2 ((float)M_PI_2)
#define CGLM_PI_4 ((float)M_PI_4)
#endif /* cglm_types_h */ #endif /* cglm_types_h */

View File

@@ -454,8 +454,7 @@ glm_vec_inv(vec3 v) {
CGLM_INLINE CGLM_INLINE
void void
glm_vec_inv_to(vec3 v, vec3 dest) { glm_vec_inv_to(vec3 v, vec3 dest) {
glm_vec_copy(v, dest); glm_vec_flipsign_to(v, dest);
glm_vec_flipsign(dest);
} }
/*! /*!

View File

@@ -42,7 +42,7 @@ CGLM_INLINE
void void
glm_vec4_mulv(vec4 a, vec4 b, vec4 d) { glm_vec4_mulv(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b))); glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else #else
d[0] = a[0] * b[0]; d[0] = a[0] * b[0];
d[1] = a[1] * b[1]; d[1] = a[1] * b[1];
@@ -61,7 +61,7 @@ CGLM_INLINE
void void
glm_vec4_broadcast(float val, vec4 d) { glm_vec4_broadcast(float val, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_set1_ps(val)); glmm_store(d, _mm_set1_ps(val));
#else #else
d[0] = d[1] = d[2] = d[3] = val; d[0] = d[1] = d[2] = d[3] = val;
#endif #endif
@@ -223,14 +223,14 @@ glm_vec4_sign(vec4 v, vec4 dest) {
#if defined( __SSE2__ ) || defined( __SSE2__ ) #if defined( __SSE2__ ) || defined( __SSE2__ )
__m128 x0, x1, x2, x3, x4; __m128 x0, x1, x2, x3, x4;
x0 = _mm_load_ps(v); x0 = glmm_load(v);
x1 = _mm_set_ps(0.0f, 0.0f, 1.0f, -1.0f); x1 = _mm_set_ps(0.0f, 0.0f, 1.0f, -1.0f);
x2 = _mm_shuffle1_ps1(x1, 2); x2 = glmm_shuff1x(x1, 2);
x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), _mm_shuffle1_ps1(x1, 1)); x3 = _mm_and_ps(_mm_cmpgt_ps(x0, x2), glmm_shuff1x(x1, 1));
x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), _mm_shuffle1_ps1(x1, 0)); x4 = _mm_and_ps(_mm_cmplt_ps(x0, x2), glmm_shuff1x(x1, 0));
_mm_store_ps(dest, _mm_or_ps(x3, x4)); glmm_store(dest, _mm_or_ps(x3, x4));
#else #else
dest[0] = glm_signf(v[0]); dest[0] = glm_signf(v[0]);
dest[1] = glm_signf(v[1]); dest[1] = glm_signf(v[1]);
@@ -249,7 +249,7 @@ CGLM_INLINE
void void
glm_vec4_sqrt(vec4 v, vec4 dest) { glm_vec4_sqrt(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sqrt_ps(_mm_load_ps(v))); glmm_store(dest, _mm_sqrt_ps(glmm_load(v)));
#else #else
dest[0] = sqrtf(v[0]); dest[0] = sqrtf(v[0]);
dest[1] = sqrtf(v[1]); dest[1] = sqrtf(v[1]);

View File

@@ -111,7 +111,7 @@ CGLM_INLINE
void void
glm_vec4_copy(vec4 v, vec4 dest) { glm_vec4_copy(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_load_ps(v)); glmm_store(dest, glmm_load(v));
#else #else
dest[0] = v[0]; dest[0] = v[0];
dest[1] = v[1]; dest[1] = v[1];
@@ -129,7 +129,7 @@ CGLM_INLINE
void void
glm_vec4_zero(vec4 v) { glm_vec4_zero(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_setzero_ps()); glmm_store(v, _mm_setzero_ps());
#else #else
v[0] = 0.0f; v[0] = 0.0f;
v[1] = 0.0f; v[1] = 0.0f;
@@ -147,7 +147,7 @@ CGLM_INLINE
void void
glm_vec4_one(vec4 v) { glm_vec4_one(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_set1_ps(1.0f)); glmm_store(v, _mm_set1_ps(1.0f));
#else #else
v[0] = 1.0f; v[0] = 1.0f;
v[1] = 1.0f; v[1] = 1.0f;
@@ -169,9 +169,9 @@ float
glm_vec4_dot(vec4 a, vec4 b) { glm_vec4_dot(vec4 a, vec4 b) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0; __m128 x0;
x0 = _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b)); x0 = _mm_mul_ps(glmm_load(a), glmm_load(b));
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1))); return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1)));
#else #else
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
#endif #endif
@@ -193,10 +193,10 @@ float
glm_vec4_norm2(vec4 v) { glm_vec4_norm2(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0; __m128 x0;
x0 = _mm_load_ps(v); x0 = glmm_load(v);
x0 = _mm_mul_ps(x0, x0); x0 = _mm_mul_ps(x0, x0);
x0 = _mm_add_ps(x0, _mm_shuffle1_ps(x0, 1, 0, 3, 2)); x0 = _mm_add_ps(x0, glmm_shuff1(x0, 1, 0, 3, 2));
return _mm_cvtss_f32(_mm_add_ss(x0, _mm_shuffle1_ps(x0, 0, 1, 0, 1))); return _mm_cvtss_f32(_mm_add_ss(x0, glmm_shuff1(x0, 0, 1, 0, 1)));
#else #else
return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3]; return v[0] * v[0] + v[1] * v[1] + v[2] * v[2] + v[3] * v[3];
#endif #endif
@@ -214,8 +214,8 @@ float
glm_vec4_norm(vec4 vec) { glm_vec4_norm(vec4 vec) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
__m128 x0; __m128 x0;
x0 = _mm_load_ps(vec); x0 = glmm_load(vec);
return _mm_cvtss_f32(_mm_sqrt_ss(glm_simd_dot(x0, x0))); return _mm_cvtss_f32(_mm_sqrt_ss(glmm_dot(x0, x0)));
#else #else
return sqrtf(glm_vec4_norm2(vec)); return sqrtf(glm_vec4_norm2(vec));
#endif #endif
@@ -232,7 +232,7 @@ CGLM_INLINE
void void
glm_vec4_add(vec4 a, vec4 b, vec4 dest) { glm_vec4_add(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(a), _mm_load_ps(b))); glmm_store(dest, _mm_add_ps(glmm_load(a), glmm_load(b)));
#else #else
dest[0] = a[0] + b[0]; dest[0] = a[0] + b[0];
dest[1] = a[1] + b[1]; dest[1] = a[1] + b[1];
@@ -252,7 +252,7 @@ CGLM_INLINE
void void
glm_vec4_adds(vec4 v, float s, vec4 dest) { glm_vec4_adds(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(v), _mm_set1_ps(s))); glmm_store(dest, _mm_add_ps(glmm_load(v), _mm_set1_ps(s)));
#else #else
dest[0] = v[0] + s; dest[0] = v[0] + s;
dest[1] = v[1] + s; dest[1] = v[1] + s;
@@ -272,7 +272,7 @@ CGLM_INLINE
void void
glm_vec4_sub(vec4 a, vec4 b, vec4 dest) { glm_vec4_sub(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sub_ps(_mm_load_ps(a), _mm_load_ps(b))); glmm_store(dest, _mm_sub_ps(glmm_load(a), glmm_load(b)));
#else #else
dest[0] = a[0] - b[0]; dest[0] = a[0] - b[0];
dest[1] = a[1] - b[1]; dest[1] = a[1] - b[1];
@@ -292,7 +292,7 @@ CGLM_INLINE
void void
glm_vec4_subs(vec4 v, float s, vec4 dest) { glm_vec4_subs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_sub_ps(_mm_load_ps(v), _mm_set1_ps(s))); glmm_store(dest, _mm_sub_ps(glmm_load(v), _mm_set1_ps(s)));
#else #else
dest[0] = v[0] - s; dest[0] = v[0] - s;
dest[1] = v[1] - s; dest[1] = v[1] - s;
@@ -312,7 +312,7 @@ CGLM_INLINE
void void
glm_vec4_mul(vec4 a, vec4 b, vec4 d) { glm_vec4_mul(vec4 a, vec4 b, vec4 d) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(d, _mm_mul_ps(_mm_load_ps(a), _mm_load_ps(b))); glmm_store(d, _mm_mul_ps(glmm_load(a), glmm_load(b)));
#else #else
d[0] = a[0] * b[0]; d[0] = a[0] * b[0];
d[1] = a[1] * b[1]; d[1] = a[1] * b[1];
@@ -332,7 +332,7 @@ CGLM_INLINE
void void
glm_vec4_scale(vec4 v, float s, vec4 dest) { glm_vec4_scale(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_mul_ps(_mm_load_ps(v), _mm_set1_ps(s))); glmm_store(dest, _mm_mul_ps(glmm_load(v), _mm_set1_ps(s)));
#else #else
dest[0] = v[0] * s; dest[0] = v[0] * s;
dest[1] = v[1] * s; dest[1] = v[1] * s;
@@ -373,7 +373,7 @@ CGLM_INLINE
void void
glm_vec4_div(vec4 a, vec4 b, vec4 dest) { glm_vec4_div(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_div_ps(_mm_load_ps(a), _mm_load_ps(b))); glmm_store(dest, _mm_div_ps(glmm_load(a), glmm_load(b)));
#else #else
dest[0] = a[0] / b[0]; dest[0] = a[0] / b[0];
dest[1] = a[1] / b[1]; dest[1] = a[1] / b[1];
@@ -393,7 +393,7 @@ CGLM_INLINE
void void
glm_vec4_divs(vec4 v, float s, vec4 dest) { glm_vec4_divs(vec4 v, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_div_ps(_mm_load_ps(v), _mm_set1_ps(s))); glmm_store(dest, _mm_div_ps(glmm_load(v), _mm_set1_ps(s)));
#else #else
glm_vec4_scale(v, 1.0f / s, dest); glm_vec4_scale(v, 1.0f / s, dest);
#endif #endif
@@ -413,9 +413,9 @@ CGLM_INLINE
void void
glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) { glm_vec4_addadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_add_ps(_mm_load_ps(a), _mm_add_ps(glmm_load(a),
_mm_load_ps(b)))); glmm_load(b))));
#else #else
dest[0] += a[0] + b[0]; dest[0] += a[0] + b[0];
dest[1] += a[1] + b[1]; dest[1] += a[1] + b[1];
@@ -437,9 +437,9 @@ CGLM_INLINE
void void
glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) { glm_vec4_subadd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_sub_ps(_mm_load_ps(a), _mm_sub_ps(glmm_load(a),
_mm_load_ps(b)))); glmm_load(b))));
#else #else
dest[0] += a[0] - b[0]; dest[0] += a[0] - b[0];
dest[1] += a[1] - b[1]; dest[1] += a[1] - b[1];
@@ -461,9 +461,9 @@ CGLM_INLINE
void void
glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) { glm_vec4_muladd(vec4 a, vec4 b, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(_mm_load_ps(a), _mm_mul_ps(glmm_load(a),
_mm_load_ps(b)))); glmm_load(b))));
#else #else
dest[0] += a[0] * b[0]; dest[0] += a[0] * b[0];
dest[1] += a[1] * b[1]; dest[1] += a[1] * b[1];
@@ -485,9 +485,9 @@ CGLM_INLINE
void void
glm_vec4_muladds(vec4 a, float s, vec4 dest) { glm_vec4_muladds(vec4 a, float s, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_add_ps(_mm_load_ps(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_mul_ps(_mm_load_ps(a), _mm_mul_ps(glmm_load(a),
_mm_set1_ps(s)))); _mm_set1_ps(s))));
#else #else
dest[0] += a[0] * s; dest[0] += a[0] * s;
dest[1] += a[1] * s; dest[1] += a[1] * s;
@@ -505,7 +505,7 @@ CGLM_INLINE
void void
glm_vec4_flipsign(vec4 v) { glm_vec4_flipsign(vec4 v) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_xor_ps(_mm_load_ps(v), _mm_set1_ps(-0.0f))); glmm_store(v, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
#else #else
v[0] = -v[0]; v[0] = -v[0];
v[1] = -v[1]; v[1] = -v[1];
@@ -524,8 +524,7 @@ CGLM_INLINE
void void
glm_vec4_flipsign_to(vec4 v, vec4 dest) { glm_vec4_flipsign_to(vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_xor_ps(_mm_load_ps(v), glmm_store(dest, _mm_xor_ps(glmm_load(v), _mm_set1_ps(-0.0f)));
_mm_set1_ps(-0.0f)));
#else #else
dest[0] = -v[0]; dest[0] = -v[0];
dest[1] = -v[1]; dest[1] = -v[1];
@@ -571,16 +570,16 @@ glm_vec4_normalize_to(vec4 vec, vec4 dest) {
__m128 xdot, x0; __m128 xdot, x0;
float dot; float dot;
x0 = _mm_load_ps(vec); x0 = glmm_load(vec);
xdot = glm_simd_dot(x0, x0); xdot = glmm_dot(x0, x0);
dot = _mm_cvtss_f32(xdot); dot = _mm_cvtss_f32(xdot);
if (dot == 0.0f) { if (dot == 0.0f) {
_mm_store_ps(dest, _mm_setzero_ps()); glmm_store(dest, _mm_setzero_ps());
return; return;
} }
_mm_store_ps(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot))); glmm_store(dest, _mm_div_ps(x0, _mm_sqrt_ps(xdot)));
#else #else
float norm; float norm;
@@ -633,7 +632,7 @@ CGLM_INLINE
void void
glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest) { glm_vec4_maxv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_max_ps(_mm_load_ps(v1), _mm_load_ps(v2))); glmm_store(dest, _mm_max_ps(glmm_load(v1), glmm_load(v2)));
#else #else
dest[0] = glm_max(v1[0], v2[0]); dest[0] = glm_max(v1[0], v2[0]);
dest[1] = glm_max(v1[1], v2[1]); dest[1] = glm_max(v1[1], v2[1]);
@@ -653,7 +652,7 @@ CGLM_INLINE
void void
glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest) { glm_vec4_minv(vec4 v1, vec4 v2, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(dest, _mm_min_ps(_mm_load_ps(v1), _mm_load_ps(v2))); glmm_store(dest, _mm_min_ps(glmm_load(v1), glmm_load(v2)));
#else #else
dest[0] = glm_min(v1[0], v2[0]); dest[0] = glm_min(v1[0], v2[0]);
dest[1] = glm_min(v1[1], v2[1]); dest[1] = glm_min(v1[1], v2[1]);
@@ -673,8 +672,8 @@ CGLM_INLINE
void void
glm_vec4_clamp(vec4 v, float minVal, float maxVal) { glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
#if defined( __SSE__ ) || defined( __SSE2__ ) #if defined( __SSE__ ) || defined( __SSE2__ )
_mm_store_ps(v, _mm_min_ps(_mm_max_ps(_mm_load_ps(v), _mm_set1_ps(minVal)), glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
_mm_set1_ps(maxVal))); _mm_set1_ps(maxVal)));
#else #else
v[0] = glm_clamp(v[0], minVal, maxVal); v[0] = glm_clamp(v[0], minVal, maxVal);
v[1] = glm_clamp(v[1], minVal, maxVal); v[1] = glm_clamp(v[1], minVal, maxVal);

View File

@@ -10,6 +10,6 @@
#define CGLM_VERSION_MAJOR 0 #define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 4 #define CGLM_VERSION_MINOR 4
#define CGLM_VERSION_PATCH 3 #define CGLM_VERSION_PATCH 6
#endif /* cglm_version_h */ #endif /* cglm_version_h */

View File

@@ -133,3 +133,21 @@ void
glmc_decompose(mat4 m, vec4 t, mat4 r, vec3 s) { glmc_decompose(mat4 m, vec4 t, mat4 r, vec3 s) {
glm_decompose(m, t, r, s); glm_decompose(m, t, r, s);
} }
CGLM_EXPORT
void
glmc_mul(mat4 m1, mat4 m2, mat4 dest) {
glm_mul(m1, m2, dest);
}
CGLM_EXPORT
void
glmc_mul_rot(mat4 m1, mat4 m2, mat4 dest) {
glm_mul_rot(m1, m2, dest);
}
CGLM_EXPORT
void
glmc_inv_tr(mat4 mat) {
glm_inv_tr(mat);
}

View File

@@ -34,3 +34,39 @@ glmc_aabb_crop_until(vec3 box[2],
vec3 dest[2]) { vec3 dest[2]) {
glm_aabb_crop_until(box, cropBox, clampBox, dest); glm_aabb_crop_until(box, cropBox, clampBox, dest);
} }
CGLM_EXPORT
bool
glmc_aabb_frustum(vec3 box[2], vec4 planes[6]) {
return glm_aabb_frustum(box, planes);
}
CGLM_EXPORT
void
glmc_aabb_invalidate(vec3 box[2]) {
glm_aabb_invalidate(box);
}
CGLM_EXPORT
bool
glmc_aabb_isvalid(vec3 box[2]) {
return glm_aabb_isvalid(box);
}
CGLM_EXPORT
float
glmc_aabb_size(vec3 box[2]) {
return glm_aabb_size(box);
}
CGLM_EXPORT
float
glmc_aabb_radius(vec3 box[2]) {
return glm_aabb_radius(box);
}
CGLM_EXPORT
void
glmc_aabb_center(vec3 box[2], vec3 dest) {
glm_aabb_center(box, dest);
}

114
src/cam.c
View File

@@ -44,6 +44,36 @@ glmc_ortho(float left,
dest); dest);
} }
CGLM_EXPORT
void
glmc_ortho_aabb(vec3 box[2], mat4 dest) {
glm_ortho_aabb(box, dest);
}
CGLM_EXPORT
void
glmc_ortho_aabb_p(vec3 box[2], float padding, mat4 dest) {
glm_ortho_aabb_p(box, padding, dest);
}
CGLM_EXPORT
void
glmc_ortho_aabb_pz(vec3 box[2], float padding, mat4 dest) {
glm_ortho_aabb_pz(box, padding, dest);
}
CGLM_EXPORT
void
glmc_ortho_default(float aspect, mat4 dest) {
glm_ortho_default(aspect, dest);
}
CGLM_EXPORT
void
glmc_ortho_default_s(float aspect, float size, mat4 dest) {
glm_ortho_default_s(aspect, size, dest);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_perspective(float fovy, glmc_perspective(float fovy,
@@ -58,6 +88,18 @@ glmc_perspective(float fovy,
dest); dest);
} }
CGLM_EXPORT
void
glmc_perspective_default(float aspect, mat4 dest) {
glm_perspective_default(aspect, dest);
}
CGLM_EXPORT
void
glmc_perspective_resize(float aspect, mat4 proj) {
glm_perspective_resize(aspect, proj);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_lookat(vec3 eye, glmc_lookat(vec3 eye,
@@ -78,3 +120,75 @@ void
glmc_look_anyup(vec3 eye, vec3 dir, mat4 dest) { glmc_look_anyup(vec3 eye, vec3 dir, mat4 dest) {
glm_look_anyup(eye, dir, dest); glm_look_anyup(eye, dir, dest);
} }
CGLM_EXPORT
void
glmc_persp_decomp(mat4 proj,
float * __restrict nearVal,
float * __restrict farVal,
float * __restrict top,
float * __restrict bottom,
float * __restrict left,
float * __restrict right) {
glm_persp_decomp(proj, nearVal, farVal, top, bottom, left, right);
}
CGLM_EXPORT
void
glmc_persp_decompv(mat4 proj, float dest[6]) {
glm_persp_decompv(proj, dest);
}
CGLM_EXPORT
void
glmc_persp_decomp_x(mat4 proj,
float * __restrict left,
float * __restrict right) {
glm_persp_decomp_x(proj, left, right);
}
CGLM_EXPORT
void
glmc_persp_decomp_y(mat4 proj,
float * __restrict top,
float * __restrict bottom) {
glm_persp_decomp_y(proj, top, bottom);
}
CGLM_EXPORT
void
glmc_persp_decomp_z(mat4 proj,
float * __restrict nearVal,
float * __restrict farVal) {
glm_persp_decomp_z(proj, nearVal, farVal);
}
CGLM_EXPORT
void
glmc_persp_decomp_far(mat4 proj, float * __restrict farVal) {
glm_persp_decomp_far(proj, farVal);
}
CGLM_EXPORT
void
glmc_persp_decomp_near(mat4 proj, float * __restrict nearVal) {
glm_persp_decomp_near(proj, nearVal);
}
CGLM_EXPORT
float
glmc_persp_fovy(mat4 proj) {
return glm_persp_fovy(proj);
}
CGLM_EXPORT
float
glmc_persp_aspect(mat4 proj) {
return glm_persp_aspect(proj);
}
CGLM_EXPORT
void
glmc_persp_sizes(mat4 proj, float fovy, vec4 dest) {
glm_persp_sizes(proj, fovy, dest);
}

View File

@@ -44,6 +44,12 @@ glmc_mat3_mulv(mat3 m, vec3 v, vec3 dest) {
glm_mat3_mulv(m, v, dest); glm_mat3_mulv(m, v, dest);
} }
CGLM_EXPORT
void
glmc_mat3_quat(mat3 m, versor dest) {
glm_mat3_quat(m, dest);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat3_scale(mat3 m, float s) { glmc_mat3_scale(mat3 m, float s) {

View File

@@ -62,6 +62,12 @@ glmc_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
glm_mat4_mulv(m, v, dest); glm_mat4_mulv(m, v, dest);
} }
CGLM_EXPORT
void
glmc_mat4_mulv3(mat4 m, vec3 v, float last, vec3 dest) {
glm_mat4_mulv3(m, v, last, dest);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_quat(mat4 m, versor dest) { glmc_mat4_quat(mat4 m, versor dest) {
@@ -110,6 +116,12 @@ glmc_mat4_inv_precise(mat4 mat, mat4 dest) {
glm_mat4_inv_precise(mat, dest); glm_mat4_inv_precise(mat, dest);
} }
CGLM_EXPORT
void
glmc_mat4_inv_fast(mat4 mat, mat4 dest) {
glm_mat4_inv_fast(mat, dest);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_mat4_swap_col(mat4 mat, int col1, int col2) { glmc_mat4_swap_col(mat4 mat, int col1, int col2) {

View File

@@ -188,6 +188,12 @@ glmc_vec_rotate_m4(mat4 m, vec3 v, vec3 dest) {
glm_vec_rotate_m4(m, v, dest); glm_vec_rotate_m4(m, v, dest);
} }
CGLM_EXPORT
void
glmc_vec_rotate_m3(mat3 m, vec3 v, vec3 dest) {
glm_vec_rotate_m3(m, v, dest);
}
CGLM_EXPORT CGLM_EXPORT
void void
glmc_vec_proj(vec3 a, vec3 b, vec3 dest) { glmc_vec_proj(vec3 a, vec3 b, vec3 dest) {