Compare commits

...

4 Commits

Author SHA1 Message Date
Recep Aslantas
e34601f578 arm neon: multiply mat4 with vec4 2020-08-29 11:51:07 +03:00
Recep Aslantas
fa01a3077b neon: support transpose mat4 with neon 2020-08-29 11:33:13 +03:00
Recep Aslantas
54f805a62d neon: move neon-scale to simd header 2020-08-29 11:32:44 +03:00
Recep Aslantas
a05b282fad now working on v0.7.9 2020-08-29 10:19:50 +03:00
7 changed files with 61 additions and 12 deletions

View File

@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.8.2)
project(cglm VERSION 0.7.8 LANGUAGES C)
project(cglm VERSION 0.7.9 LANGUAGES C)
set(CMAKE_C_STANDARD 11)
set(CMAKE_C_STANDARD_REQUIRED YES)

View File

@@ -7,7 +7,7 @@
#*****************************************************************************
AC_PREREQ([2.69])
AC_INIT([cglm], [0.7.8], [info@recp.me])
AC_INIT([cglm], [0.7.9], [info@recp.me])
AM_INIT_AUTOMAKE([-Wall -Werror foreign subdir-objects serial-tests])
# Don't use the default cflags (-O2 -g), we set ours manually in Makefile.am.

View File

@@ -62,9 +62,9 @@ author = u'Recep Aslantas'
# built documents.
#
# The short X.Y version.
version = u'0.7.8'
version = u'0.7.9'
# The full version, including alpha/beta/rc tags.
release = u'0.7.8'
release = u'0.7.9'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.

View File

@@ -358,6 +358,8 @@ void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_mulv_sse2(m, v, dest);
#elif defined(CGLM_NEON_FP)
glm_mat4_mulv_neon(m, v, dest);
#else
vec4 res;
res[0] = m[0][0] * v[0] + m[1][0] * v[1] + m[2][0] * v[2] + m[3][0] * v[3];
@@ -476,6 +478,8 @@ void
glm_mat4_transpose_to(mat4 m, mat4 dest) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_transp_sse2(m, dest);
#elif defined(CGLM_NEON_FP)
glm_mat4_transp_neon(m, dest);
#else
dest[0][0] = m[0][0]; dest[1][0] = m[0][1];
dest[0][1] = m[1][0]; dest[1][1] = m[1][1];
@@ -498,6 +502,8 @@ void
glm_mat4_transpose(mat4 m) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_transp_sse2(m, m);
#elif defined(CGLM_NEON_FP)
glm_mat4_transp_neon(m, m);
#else
mat4 d;
glm_mat4_transpose_to(m, d);
@@ -536,12 +542,7 @@ glm_mat4_scale(mat4 m, float s) {
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_mat4_scale_sse2(m, s);
#elif defined(CGLM_NEON_FP)
float32x4_t v0;
v0 = vdupq_n_f32(s);
vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), v0));
vst1q_f32(m[1], vmulq_f32(vld1q_f32(m[1]), v0));
vst1q_f32(m[2], vmulq_f32(vld1q_f32(m[2]), v0));
vst1q_f32(m[3], vmulq_f32(vld1q_f32(m[3]), v0));
glm_mat4_scale_neon(m, s);
#else
glm_mat4_scale_p(m, s);
#endif

View File

@@ -12,6 +12,32 @@
#include "../../common.h"
#include "../intrin.h"
CGLM_INLINE
void
glm_mat4_scale_neon(mat4 m, float s) {
float32x4_t v0;
v0 = vdupq_n_f32(s);
vst1q_f32(m[0], vmulq_f32(vld1q_f32(m[0]), v0));
vst1q_f32(m[1], vmulq_f32(vld1q_f32(m[1]), v0));
vst1q_f32(m[2], vmulq_f32(vld1q_f32(m[2]), v0));
vst1q_f32(m[3], vmulq_f32(vld1q_f32(m[3]), v0));
}
CGLM_INLINE
void
glm_mat4_transp_neon(mat4 m, mat4 dest) {
float32x4x4_t vmat;
vmat = vld4q_f32(m[0]);
vst1q_f32(dest[0], vmat.val[0]);
vst1q_f32(dest[1], vmat.val[1]);
vst1q_f32(dest[2], vmat.val[2]);
vst1q_f32(dest[3], vmat.val[3]);
}
CGLM_INLINE
void
glm_mat4_mul_neon(mat4 m1, mat4 m2, mat4 dest) {
@@ -53,5 +79,27 @@ glm_mat4_mul_neon(mat4 m1, mat4 m2, mat4 dest) {
vst1q_f32(dest[3], d3);
}
CGLM_INLINE
void
glm_mat4_mulv_neon(mat4 m, vec4 v, vec4 dest) {
float32x4_t l0, l1, l2, l3;
float32x2_t vlo, vhi;
l0 = vld1q_f32(m[0]);
l1 = vld1q_f32(m[1]);
l2 = vld1q_f32(m[2]);
l3 = vld1q_f32(m[3]);
vlo = vld1_f32(&v[0]);
vhi = vld1_f32(&v[2]);
l0 = vmulq_lane_f32(l0, vlo, 0);
l0 = vmlaq_lane_f32(l0, l1, vlo, 1);
l0 = vmlaq_lane_f32(l0, l2, vhi, 0);
l0 = vmlaq_lane_f32(l0, l3, vhi, 1);
vst1q_f32(dest, l0);
}
#endif
#endif /* cglm_mat4_neon_h */

View File

@@ -10,6 +10,6 @@
#define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 7
#define CGLM_VERSION_PATCH 8
#define CGLM_VERSION_PATCH 9
#endif /* cglm_version_h */

View File

@@ -1,5 +1,5 @@
project('cglm', 'c',
version : '0.7.8',
version : '0.7.9',
license : 'mit',
default_options : [
'c_std=c11',