mirror of
https://github.com/recp/cglm.git
synced 2026-02-17 03:39:05 +00:00
Compare commits
6 Commits
v0.9.2
...
simd_min_m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6d8dd42ac2 | ||
|
|
dab86796a4 | ||
|
|
4b93cb3e05 | ||
|
|
a682b9e6cf | ||
|
|
bca93a379d | ||
|
|
34f0d59f5a |
@@ -1,6 +1,6 @@
|
|||||||
cmake_minimum_required(VERSION 3.8.2)
|
cmake_minimum_required(VERSION 3.8.2)
|
||||||
project(cglm
|
project(cglm
|
||||||
VERSION 0.9.2
|
VERSION 0.9.3
|
||||||
HOMEPAGE_URL https://github.com/recp/cglm
|
HOMEPAGE_URL https://github.com/recp/cglm
|
||||||
DESCRIPTION "OpenGL Mathematics (glm) for C"
|
DESCRIPTION "OpenGL Mathematics (glm) for C"
|
||||||
LANGUAGES C
|
LANGUAGES C
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ Pod::Spec.new do |s|
|
|||||||
|
|
||||||
# Description
|
# Description
|
||||||
s.name = "cglm"
|
s.name = "cglm"
|
||||||
s.version = "0.9.1"
|
s.version = "0.9.2"
|
||||||
s.summary = "📽 Highly Optimized Graphics Math (glm) for C"
|
s.summary = "📽 Highly Optimized Graphics Math (glm) for C"
|
||||||
s.description = <<-DESC
|
s.description = <<-DESC
|
||||||
cglm is math library for graphics programming for C. See the documentation or README for all features.
|
cglm is math library for graphics programming for C. See the documentation or README for all features.
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
#*****************************************************************************
|
#*****************************************************************************
|
||||||
|
|
||||||
AC_PREREQ([2.69])
|
AC_PREREQ([2.69])
|
||||||
AC_INIT([cglm], [0.9.2], [info@recp.me])
|
AC_INIT([cglm], [0.9.3], [info@recp.me])
|
||||||
AM_INIT_AUTOMAKE([-Wall foreign subdir-objects serial-tests])
|
AM_INIT_AUTOMAKE([-Wall foreign subdir-objects serial-tests])
|
||||||
|
|
||||||
# Don't use the default cflags (-O2 -g), we set ours manually in Makefile.am.
|
# Don't use the default cflags (-O2 -g), we set ours manually in Makefile.am.
|
||||||
|
|||||||
@@ -56,11 +56,9 @@ glmm_float32x4_init(float x, float y, float z, float w) {
|
|||||||
#define glmm_float32x4_SIGNMASK_NPNP glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f)
|
#define glmm_float32x4_SIGNMASK_NPNP glmm_float32x4_init(-0.f, 0.f, -0.f, 0.f)
|
||||||
#define glmm_float32x4_SIGNMASK_NPPN glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f)
|
#define glmm_float32x4_SIGNMASK_NPPN glmm_float32x4_init(-0.f, 0.f, 0.f, -0.f)
|
||||||
|
|
||||||
static inline
|
static inline float32x4_t glmm_abs(float32x4_t v) { return vabsq_f32(v); }
|
||||||
float32x4_t
|
static inline float32x4_t glmm_min(float32x4_t a, float32x4_t b) { return vminq_f32(a, b); }
|
||||||
glmm_abs(float32x4_t v) {
|
static inline float32x4_t glmm_max(float32x4_t a, float32x4_t b) { return vmaxq_f32(a, b); }
|
||||||
return vabsq_f32(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
float32x4_t
|
float32x4_t
|
||||||
|
|||||||
@@ -63,7 +63,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* ARM Neon */
|
/* ARM Neon */
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32) && defined(_MSC_VER)
|
||||||
/* TODO: non-ARM stuff already inported, will this be better option */
|
/* TODO: non-ARM stuff already inported, will this be better option */
|
||||||
/* # include <intrin.h> */
|
/* # include <intrin.h> */
|
||||||
|
|
||||||
|
|||||||
@@ -34,13 +34,11 @@
|
|||||||
#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(0, GLMM_NEGZEROf, 0, GLMM_NEGZEROf)
|
#define glmm_float32x4_SIGNMASK_PNPN GLMM__SIGNMASKf(0, GLMM_NEGZEROf, 0, GLMM_NEGZEROf)
|
||||||
#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, GLMM_NEGZEROf, 0)
|
#define glmm_float32x4_SIGNMASK_NPNP GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, GLMM_NEGZEROf, 0)
|
||||||
#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf)
|
#define glmm_float32x4_SIGNMASK_NPPN GLMM__SIGNMASKf(GLMM_NEGZEROf, 0, 0, GLMM_NEGZEROf)
|
||||||
#define glmm_float32x4_SIGNMASK_NEG wasm_i32x4_const_splat(GLMM_NEGZEROf)
|
#define glmm_float32x4_SIGNMASK_NEG wasm_i32x4_const_splat(GLMM_NEGZEROf)
|
||||||
|
|
||||||
static inline
|
static inline glmm_128 glmm_abs(glmm_128 x) { return wasm_f32x4_abs(x); }
|
||||||
glmm_128
|
static inline glmm_128 glmm_min(glmm_128 a, glmm_128 b) { return wasm_f32x4_pmin(b, a); }
|
||||||
glmm_abs(glmm_128 x) {
|
static inline glmm_128 glmm_max(glmm_128 a, glmm_128 b) { return wasm_f32x4_pmax(b, a); }
|
||||||
return wasm_f32x4_abs(x);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
glmm_128
|
glmm_128
|
||||||
@@ -74,7 +72,7 @@ glmm_128
|
|||||||
glmm_vhmin(glmm_128 v) {
|
glmm_vhmin(glmm_128 v) {
|
||||||
glmm_128 x0, x1, x2;
|
glmm_128 x0, x1, x2;
|
||||||
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
||||||
x1 = wasm_f32x4_pmin(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
x1 = wasm_f32x4_pmin(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
||||||
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
||||||
return wasm_f32x4_pmin(x1, x2);
|
return wasm_f32x4_pmin(x1, x2);
|
||||||
}
|
}
|
||||||
@@ -90,7 +88,7 @@ glmm_128
|
|||||||
glmm_vhmax(glmm_128 v) {
|
glmm_vhmax(glmm_128 v) {
|
||||||
glmm_128 x0, x1, x2;
|
glmm_128 x0, x1, x2;
|
||||||
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
x0 = glmm_shuff1(v, 2, 3, 2, 3); /* [2, 3, 2, 3] */
|
||||||
x1 = wasm_f32x4_pmax(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
x1 = wasm_f32x4_pmax(x0, v); /* [0|2, 1|3, 2|2, 3|3] */
|
||||||
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
x2 = glmm_splat(x1, 1); /* [1|3, 1|3, 1|3, 1|3] */
|
||||||
/* _mm_max_ss */
|
/* _mm_max_ss */
|
||||||
return wasm_i32x4_shuffle(x1, wasm_f32x4_pmax(x1, x2), 4, 1, 2, 3);
|
return wasm_i32x4_shuffle(x1, wasm_f32x4_pmax(x1, x2), 4, 1, 2, 3);
|
||||||
|
|||||||
@@ -74,6 +74,9 @@ glmm_abs(__m128 x) {
|
|||||||
return _mm_andnot_ps(glmm_float32x4_SIGNMASK_NEG, x);
|
return _mm_andnot_ps(glmm_float32x4_SIGNMASK_NEG, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline __m128 glmm_min(__m128 a, __m128 b) { return _mm_min_ps(a, b); }
|
||||||
|
static inline __m128 glmm_max(__m128 a, __m128 b) { return _mm_max_ps(a, b); }
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
__m128
|
__m128
|
||||||
glmm_vhadd(__m128 v) {
|
glmm_vhadd(__m128 v) {
|
||||||
|
|||||||
@@ -653,17 +653,14 @@ CGLM_INLINE
|
|||||||
void
|
void
|
||||||
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_maxadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||||
glmm_load(dest),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
wasm_f32x4_pmax(glmm_load(a), glmm_load(b))));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
_mm_max_ps(glmm_load(a),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
glmm_store(dest, vaddq_f32(glmm_load(dest),
|
||||||
vmaxq_f32(vld1q_f32(a),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
vld1q_f32(b))));
|
|
||||||
#else
|
#else
|
||||||
dest[0] += glm_max(a[0], b[0]);
|
dest[0] += glm_max(a[0], b[0]);
|
||||||
dest[1] += glm_max(a[1], b[1]);
|
dest[1] += glm_max(a[1], b[1]);
|
||||||
@@ -685,17 +682,14 @@ CGLM_INLINE
|
|||||||
void
|
void
|
||||||
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_minadd(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, wasm_f32x4_add(
|
glmm_store(dest, wasm_f32x4_add(glmm_load(dest),
|
||||||
glmm_load(dest),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
wasm_f32x4_pmin(glmm_load(a), glmm_load(b))));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
glmm_store(dest, _mm_add_ps(glmm_load(dest),
|
||||||
_mm_min_ps(glmm_load(a),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vaddq_f32(vld1q_f32(dest),
|
glmm_store(dest, vaddq_f32(glmm_load(dest),
|
||||||
vminq_f32(vld1q_f32(a),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
vld1q_f32(b))));
|
|
||||||
#else
|
#else
|
||||||
dest[0] += glm_min(a[0], b[0]);
|
dest[0] += glm_min(a[0], b[0]);
|
||||||
dest[1] += glm_min(a[1], b[1]);
|
dest[1] += glm_min(a[1], b[1]);
|
||||||
@@ -825,17 +819,14 @@ CGLM_INLINE
|
|||||||
void
|
void
|
||||||
glm_vec4_maxsub(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_maxsub(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, wasm_f32x4_sub(
|
glmm_store(dest, wasm_f32x4_sub(glmm_load(dest),
|
||||||
glmm_load(dest),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
wasm_f32x4_pmax(glmm_load(a), glmm_load(b))));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, _mm_sub_ps(glmm_load(dest),
|
glmm_store(dest, _mm_sub_ps(glmm_load(dest),
|
||||||
_mm_max_ps(glmm_load(a),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(dest),
|
glmm_store(dest, vsubq_f32(glmm_load(dest),
|
||||||
vmaxq_f32(vld1q_f32(a),
|
glmm_max(glmm_load(a), glmm_load(b))));
|
||||||
vld1q_f32(b))));
|
|
||||||
#else
|
#else
|
||||||
dest[0] -= glm_max(a[0], b[0]);
|
dest[0] -= glm_max(a[0], b[0]);
|
||||||
dest[1] -= glm_max(a[1], b[1]);
|
dest[1] -= glm_max(a[1], b[1]);
|
||||||
@@ -857,17 +848,14 @@ CGLM_INLINE
|
|||||||
void
|
void
|
||||||
glm_vec4_minsub(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_minsub(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(dest, wasm_f32x4_sub(
|
glmm_store(dest, wasm_f32x4_sub(glmm_load(dest),
|
||||||
glmm_load(dest),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
wasm_f32x4_pmin(glmm_load(a), glmm_load(b))));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(dest, _mm_sub_ps(glmm_load(dest),
|
glmm_store(dest, _mm_sub_ps(glmm_load(dest),
|
||||||
_mm_min_ps(glmm_load(a),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
glmm_load(b))));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(dest, vsubq_f32(vld1q_f32(dest),
|
glmm_store(dest, vsubq_f32(vld1q_f32(dest),
|
||||||
vminq_f32(vld1q_f32(a),
|
glmm_min(glmm_load(a), glmm_load(b))));
|
||||||
vld1q_f32(b))));
|
|
||||||
#else
|
#else
|
||||||
dest[0] -= glm_min(a[0], b[0]);
|
dest[0] -= glm_min(a[0], b[0]);
|
||||||
dest[1] -= glm_min(a[1], b[1]);
|
dest[1] -= glm_min(a[1], b[1]);
|
||||||
@@ -1031,12 +1019,8 @@ glm_vec4_distance2(vec4 a, vec4 b) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(CGLM_SIMD)
|
||||||
glmm_store(dest, wasm_f32x4_pmax(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, glmm_max(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
|
||||||
glmm_store(dest, _mm_max_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
|
||||||
vst1q_f32(dest, vmaxq_f32(vld1q_f32(a), vld1q_f32(b)));
|
|
||||||
#else
|
#else
|
||||||
dest[0] = glm_max(a[0], b[0]);
|
dest[0] = glm_max(a[0], b[0]);
|
||||||
dest[1] = glm_max(a[1], b[1]);
|
dest[1] = glm_max(a[1], b[1]);
|
||||||
@@ -1055,12 +1039,8 @@ glm_vec4_maxv(vec4 a, vec4 b, vec4 dest) {
|
|||||||
CGLM_INLINE
|
CGLM_INLINE
|
||||||
void
|
void
|
||||||
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
glm_vec4_minv(vec4 a, vec4 b, vec4 dest) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(CGLM_SIMD)
|
||||||
glmm_store(dest, wasm_f32x4_pmin(glmm_load(a), glmm_load(b)));
|
glmm_store(dest, glmm_min(glmm_load(a), glmm_load(b)));
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
|
||||||
glmm_store(dest, _mm_min_ps(glmm_load(a), glmm_load(b)));
|
|
||||||
#elif defined(CGLM_NEON_FP)
|
|
||||||
vst1q_f32(dest, vminq_f32(vld1q_f32(a), vld1q_f32(b)));
|
|
||||||
#else
|
#else
|
||||||
dest[0] = glm_min(a[0], b[0]);
|
dest[0] = glm_min(a[0], b[0]);
|
||||||
dest[1] = glm_min(a[1], b[1]);
|
dest[1] = glm_min(a[1], b[1]);
|
||||||
@@ -1080,14 +1060,13 @@ CGLM_INLINE
|
|||||||
void
|
void
|
||||||
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
glm_vec4_clamp(vec4 v, float minVal, float maxVal) {
|
||||||
#if defined(__wasm__) && defined(__wasm_simd128__)
|
#if defined(__wasm__) && defined(__wasm_simd128__)
|
||||||
glmm_store(v, wasm_f32x4_pmin(
|
glmm_store(v, glmm_min(glmm_max(glmm_load(v), wasm_f32x4_splat(minVal)),
|
||||||
wasm_f32x4_pmax(glmm_load(v), wasm_f32x4_splat(minVal)),
|
wasm_f32x4_splat(maxVal)));
|
||||||
wasm_f32x4_splat(maxVal)));
|
|
||||||
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
#elif defined( __SSE__ ) || defined( __SSE2__ )
|
||||||
glmm_store(v, _mm_min_ps(_mm_max_ps(glmm_load(v), _mm_set1_ps(minVal)),
|
glmm_store(v, glmm_min(glmm_max(glmm_load(v), _mm_set1_ps(minVal)),
|
||||||
_mm_set1_ps(maxVal)));
|
_mm_set1_ps(maxVal)));
|
||||||
#elif defined(CGLM_NEON_FP)
|
#elif defined(CGLM_NEON_FP)
|
||||||
vst1q_f32(v, vminq_f32(vmaxq_f32(vld1q_f32(v), vdupq_n_f32(minVal)),
|
glmm_store(v, glmm_min(glmm_max(vld1q_f32(v), vdupq_n_f32(minVal)),
|
||||||
vdupq_n_f32(maxVal)));
|
vdupq_n_f32(maxVal)));
|
||||||
#else
|
#else
|
||||||
v[0] = glm_clamp(v[0], minVal, maxVal);
|
v[0] = glm_clamp(v[0], minVal, maxVal);
|
||||||
|
|||||||
@@ -10,6 +10,6 @@
|
|||||||
|
|
||||||
#define CGLM_VERSION_MAJOR 0
|
#define CGLM_VERSION_MAJOR 0
|
||||||
#define CGLM_VERSION_MINOR 9
|
#define CGLM_VERSION_MINOR 9
|
||||||
#define CGLM_VERSION_PATCH 2
|
#define CGLM_VERSION_PATCH 3
|
||||||
|
|
||||||
#endif /* cglm_version_h */
|
#endif /* cglm_version_h */
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
project('cglm', 'c',
|
project('cglm', 'c',
|
||||||
version : '0.9.2',
|
version : '0.9.3',
|
||||||
license : 'mit',
|
license : 'mit',
|
||||||
default_options : [
|
default_options : [
|
||||||
'c_std=c11',
|
'c_std=c11',
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ typedef struct test_entry_t {
|
|||||||
} \
|
} \
|
||||||
} while(0);
|
} while(0);
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32) || defined(__MINGW32__) || defined(__MINGW64__)
|
||||||
# define drand48() ((float)(rand() / (RAND_MAX + 1.0)))
|
# define drand48() ((float)(rand() / (RAND_MAX + 1.0)))
|
||||||
# define OK_TEXT "ok:"
|
# define OK_TEXT "ok:"
|
||||||
# define FAIL_TEXT "fail:"
|
# define FAIL_TEXT "fail:"
|
||||||
|
|||||||
Reference in New Issue
Block a user