Compare commits

...

6 Commits

Author SHA1 Message Date
Recep Aslantas
6d8dd42ac2 simd: use new glmm_min/max in vec4 where possible 2024-01-11 00:14:28 +03:00
Recep Aslantas
dab86796a4 simd: min / max helpers 2024-01-09 21:35:39 +03:00
Recep Aslantas
4b93cb3e05 Merge pull request #378 from recp/win32_intrin
win32, simd: ensure we are on msvc when checking MSVC specific headers
2024-01-08 00:10:43 +03:00
Recep Aslantas
a682b9e6cf win32, tests: fix drand48() error on mingw 2024-01-04 12:49:45 +03:00
Recep Aslantas
bca93a379d win32, simd: ensure we are on msvc when checking MSVC specific headers 2024-01-04 11:54:42 +03:00
Recep Aslantas
34f0d59f5a now working on v0.9.3 2023-12-31 15:19:36 +03:00
11 changed files with 48 additions and 70 deletions

View File

@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.8.2) cmake_minimum_required(VERSION 3.8.2)
project(cglm project(cglm
VERSION 0.9.2 VERSION 0.9.3
HOMEPAGE_URL https://github.com/recp/cglm HOMEPAGE_URL https://github.com/recp/cglm
DESCRIPTION "OpenGL Mathematics (glm) for C" DESCRIPTION "OpenGL Mathematics (glm) for C"
LANGUAGES C LANGUAGES C

View File

@@ -2,7 +2,7 @@ Pod::Spec.new do |s|
# Description # Description
s.name = "cglm" s.name = "cglm"
s.version = "0.9.1" s.version = "0.9.2"
s.summary = "📽 Highly Optimized Graphics Math (glm) for C" s.summary = "📽 Highly Optimized Graphics Math (glm) for C"
s.description = <<-DESC s.description = <<-DESC
cglm is math library for graphics programming for C. See the documentation or README for all features. cglm is math library for graphics programming for C. See the documentation or README for all features.

View File

@@ -7,7 +7,7 @@
#***************************************************************************** #*****************************************************************************
AC_PREREQ([2.69]) AC_PREREQ([2.69])
AC_INIT([cglm], [0.9.2], [info@recp.me]) AC_INIT([cglm], [0.9.3], [info@recp.me])
AM_INIT_AUTOMAKE([-Wall foreign subdir-objects serial-tests]) AM_INIT_AUTOMAKE([-Wall foreign subdir-objects serial-tests])
# Don't use the default cflags (-O2 -g), we set ours manually in Makefile.am. # Don't use the default cflags (-O2 -g), we set ours manually in Makefile.am.

View File

@@ -56,11 +56,9 @@ glmm_float32x4_init(float x, float y, float z, float w) {
#define glmm_float32x4_SIGNMASK_NPNP glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f) #define glmm_float32x4_SIGNMASK_NPNP glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f)
#define glmm_float32x4_SIGNMASK_NPPN glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f) #define glmm_float32x4_SIGNMASK_NPPN glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f)
static inline static inline float32x4_t glmm_abs(float32x4_t v) { return vabsq_f32(v); }
float32x4_t static inline float32x4_t glmm_min(float32x4_t a, float32x4_t b) { return vminq_f32(a, b); }
glmm_abs(float32x4_t v) { static inline float32x4_t glmm_max(float32x4_t a, float32x4_t b) { return vmaxq_f32(a, b); }
return vabsq_f32(v);
}
static inline static inline
float32x4_t float32x4_t

View File

@@ -63,7 +63,7 @@
#endif #endif
/* ARM Neon */ /* ARM Neon */
#if defined(_WIN32) #if defined(_WIN32) && defined(_MSC_VER)
/* TODO: non-ARM stuff already inported, will this be better option */ /* TODO: non-ARM stuff already inported, will this be better option */
/* # include <intrin.h> */ /* # include <intrin.h> */

View File

@@ -36,11 +36,9 @@
#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf) #define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf)
#define glmm_float32x4_SIGNMASK_NEG wasm_i32x4_const_splat(GLMM_NEGZEROf) #define glmm_float32x4_SIGNMASK_NEG wasm_i32x4_const_splat(GLMM_NEGZEROf)
static inline static inline glmm_128 glmm_abs(glmm_128 x) { return wasm_f32x4_abs(x); }
glmm_128 static inline glmm_128 glmm_min(glmm_128 a, glmm_128 b) { return wasm_f32x4_pmin(b, a); }
glmm_abs(glmm_128 x) { static inline glmm_128 glmm_max(glmm_128 a, glmm_128 b) { return wasm_f32x4_pmax(b, a); }
return wasm_f32x4_abs(x);
}
static inline static inline
glmm_128 glmm_128

View File

@@ -74,6 +74,9 @@ glmm_abs(__m128 x) {
return _mm_andnot_ps(glmm_float32x4_SIGNMASK_NEG, x); return _mm_andnot_ps(glmm_float32x4_SIGNMASK_NEG, x);
} }
static inline __m128 glmm_min(__m128 a, __m128 b) { return _mm_min_ps(a, b); }
static inline __m128 glmm_max(__m128 a, __m128 b) { return _mm_max_ps(a, b); }
static inline static inline
__m128 __m128
glmm_vhadd(__m128 v) { glmm_vhadd(__m128 v) {

View File

@@ -653,17 +653,14 @@ CGLM_INLINE
void void
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) { glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(dest, wasm_f32x4_add( glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
glmm_load(dest), glmm_max(glmm_load(a), glmm_load(b))));
wasm_f32x4_pmax(glmm_load(a), glmm_load(b))));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_max_ps(glmm_load(a), glmm_max(glmm_load(a), glmm_load(b))));
glmm_load(b))));
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), glmm_store(dest, vaddq_f32(glmm_load(dest),
vmaxq_f32(vld1q_f32(a), glmm_max(glmm_load(a), glmm_load(b))));
vld1q_f32(b))));
#else #else
dest[0] += glm_max(a[0], b[0]); dest[0] += glm_max(a[0], b[0]);
dest[1] += glm_max(a[1], b[1]); dest[1] += glm_max(a[1], b[1]);
@@ -685,17 +682,14 @@ CGLM_INLINE
void void
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) { glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(dest, wasm_f32x4_add( glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
glmm_load(dest), glmm_min(glmm_load(a), glmm_load(b))));
wasm_f32x4_pmin(glmm_load(a), glmm_load(b))));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_add_ps(glmm_load(dest), glmm_store(dest, _mm_add_ps(glmm_load(dest),
_mm_min_ps(glmm_load(a), glmm_min(glmm_load(a), glmm_load(b))));
glmm_load(b))));
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest), glmm_store(dest, vaddq_f32(glmm_load(dest),
vminq_f32(vld1q_f32(a), glmm_min(glmm_load(a), glmm_load(b))));
vld1q_f32(b))));
#else #else
dest[0] += glm_min(a[0], b[0]); dest[0] += glm_min(a[0], b[0]);
dest[1] += glm_min(a[1], b[1]); dest[1] += glm_min(a[1], b[1]);
@@ -825,17 +819,14 @@ CGLM_INLINE
void void
glm_vec4_maxsub(vec4 a, vec4 b, vec4 dest) { glm_vec4_maxsub(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(dest, wasm_f32x4_sub( glmm_store(dest, wasm_f32x4_sub(glmm_load(dest),
glmm_load(dest), glmm_max(glmm_load(a), glmm_load(b))));
wasm_f32x4_pmax(glmm_load(a), glmm_load(b))));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_sub_ps(glmm_load(dest), glmm_store(dest, _mm_sub_ps(glmm_load(dest),
_mm_max_ps(glmm_load(a), glmm_max(glmm_load(a), glmm_load(b))));
glmm_load(b))));
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vsubq_f32(vld1q_f32(dest), glmm_store(dest, vsubq_f32(glmm_load(dest),
vmaxq_f32(vld1q_f32(a), glmm_max(glmm_load(a), glmm_load(b))));
vld1q_f32(b))));
#else #else
dest[0] -= glm_max(a[0], b[0]); dest[0] -= glm_max(a[0], b[0]);
dest[1] -= glm_max(a[1], b[1]); dest[1] -= glm_max(a[1], b[1]);
@@ -857,17 +848,14 @@ CGLM_INLINE
void void
glm_vec4_minsub(vec4 a, vec4 b, vec4 dest) { glm_vec4_minsub(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(dest, wasm_f32x4_sub( glmm_store(dest, wasm_f32x4_sub(glmm_load(dest),
glmm_load(dest), glmm_min(glmm_load(a), glmm_load(b))));
wasm_f32x4_pmin(glmm_load(a), glmm_load(b))));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_sub_ps(glmm_load(dest), glmm_store(dest, _mm_sub_ps(glmm_load(dest),
_mm_min_ps(glmm_load(a), glmm_min(glmm_load(a), glmm_load(b))));
glmm_load(b))));
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vsubq_f32(vld1q_f32(dest), glmm_store(dest, vsubq_f32(vld1q_f32(dest),
vminq_f32(vld1q_f32(a), glmm_min(glmm_load(a), glmm_load(b))));
vld1q_f32(b))));
#else #else
dest[0] -= glm_min(a[0], b[0]); dest[0] -= glm_min(a[0], b[0]);
dest[1] -= glm_min(a[1], b[1]); dest[1] -= glm_min(a[1], b[1]);
@@ -1031,12 +1019,8 @@ glm_vec4_distance2(vec4 a, vec4 b) {
CGLM_INLINE CGLM_INLINE
void void
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) { glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(CGLM_SIMD)
glmm_store(dest, wasm_f32x4_pmax(glmm_load(a), glmm_load(b))); glmm_store(dest, glmm_max(glmm_load(a), glmm_load(b)));
#elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
#elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
#else #else
dest[0] = glm_max(a[0], b[0]); dest[0] = glm_max(a[0], b[0]);
dest[1] = glm_max(a[1], b[1]); dest[1] = glm_max(a[1], b[1]);
@@ -1055,12 +1039,8 @@ glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
CGLM_INLINE CGLM_INLINE
void void
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) { glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(CGLM_SIMD)
glmm_store(dest, wasm_f32x4_pmin(glmm_load(a), glmm_load(b))); glmm_store(dest, glmm_min(glmm_load(a), glmm_load(b)));
#elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
#elif defined(CGLM_NEON_FP)
vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
#else #else
dest[0] = glm_min(a[0], b[0]); dest[0] = glm_min(a[0], b[0]);
dest[1] = glm_min(a[1], b[1]); dest[1] = glm_min(a[1], b[1]);
@@ -1080,14 +1060,13 @@ CGLM_INLINE
void void
glm_vec4_clamp(vec4 v, float minVal, float maxVal) { glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
#if defined(__wasm__) && defined(__wasm_simd128__) #if defined(__wasm__) && defined(__wasm_simd128__)
glmm_store(v, wasm_f32x4_pmin( glmm_store(v, glmm_min(glmm_max(glmm_load(v), wasm_f32x4_splat(minVal)),
wasm_f32x4_pmax(glmm_load(v), wasm_f32x4_splat(minVal)),
wasm_f32x4_splat(maxVal))); wasm_f32x4_splat(maxVal)));
#elif defined( __SSE__ ) || defined( __SSE2__ ) #elif defined( __SSE__ ) || defined( __SSE2__ )
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)), glmm_store(v, glmm_min(glmm_max(glmm_load(v), _mm_set1_ps(minVal)),
_mm_set1_ps(maxVal))); _mm_set1_ps(maxVal)));
#elif defined(CGLM_NEON_FP) #elif defined(CGLM_NEON_FP)
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)), glmm_store(v, glmm_min(glmm_max(vld1q_f32(v), vdupq_n_f32(minVal)),
vdupq_n_f32(maxVal))); vdupq_n_f32(maxVal)));
#else #else
v[0] = glm_clamp(v[0], minVal, maxVal); v[0] = glm_clamp(v[0], minVal, maxVal);

View File

@@ -10,6 +10,6 @@
#define CGLM_VERSION_MAJOR 0 #define CGLM_VERSION_MAJOR 0
#define CGLM_VERSION_MINOR 9 #define CGLM_VERSION_MINOR 9
#define CGLM_VERSION_PATCH 2 #define CGLM_VERSION_PATCH 3
#endif /* cglm_version_h */ #endif /* cglm_version_h */

View File

@@ -1,5 +1,5 @@
project('cglm', 'c', project('cglm', 'c',
version : '0.9.2', version : '0.9.3',
license : 'mit', license : 'mit',
default_options : [ default_options : [
'c_std=c11', 'c_std=c11',

View File

@@ -145,7 +145,7 @@ typedef struct test_entry_t {
} \ } \
} while(0); } while(0);
#if defined(_WIN32) #if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__)
# define drand48() ((float)(rand() / (RAND_MAX + 1.0))) # define drand48() ((float)(rand() / (RAND_MAX + 1.0)))
# define OK_TEXT "ok:" # define OK_TEXT "ok:"
# define FAIL_TEXT "fail:" # define FAIL_TEXT "fail:"