diff --git a/Makefile.am b/Makefile.am
index 3dcf155..e85faa4 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -111,7 +111,8 @@ cglm_simd_avx_HEADERS = include/cglm/simd/avx/mat4.h \
cglm_simd_neondir=$(includedir)/cglm/simd/neon
cglm_simd_neon_HEADERS = include/cglm/simd/neon/mat4.h \
include/cglm/simd/neon/mat2.h \
- include/cglm/simd/neon/affine.h
+ include/cglm/simd/neon/affine.h \
+ include/cglm/simd/neon/quat.h
cglm_structdir=$(includedir)/cglm/struct
cglm_struct_HEADERS = include/cglm/struct/mat4.h \
diff --git a/include/cglm/quat.h b/include/cglm/quat.h
index 6d38f27..8560ec8 100644
--- a/include/cglm/quat.h
+++ b/include/cglm/quat.h
@@ -63,6 +63,10 @@
# include "simd/sse2/quat.h"
#endif
+#ifdef CGLM_NEON_FP
+# include "simd/neon/quat.h"
+#endif
+
CGLM_INLINE
void
glm_mat4_mulv(mat4 m, vec4 v, vec4 dest);
@@ -412,6 +416,8 @@ glm_quat_mul(versor p, versor q, versor dest) {
*/
#if defined( __SSE__ ) || defined( __SSE2__ )
glm_quat_mul_sse2(p, q, dest);
+#elif defined(CGLM_NEON_FP)
+ glm_quat_mul_neon(p, q, dest);
#else
dest[0] = p[3] * q[0] + p[0] * q[3] + p[1] * q[2] - p[2] * q[1];
dest[1] = p[3] * q[1] - p[0] * q[2] + p[1] * q[3] + p[2] * q[0];
diff --git a/include/cglm/simd/neon/quat.h b/include/cglm/simd/neon/quat.h
new file mode 100644
index 0000000..f6b9e99
--- /dev/null
+++ b/include/cglm/simd/neon/quat.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c), Recep Aslantas.
+ *
+ * MIT License (MIT), http://opensource.org/licenses/MIT
+ * Full license can be found in the LICENSE file
+ */
+
+#ifndef cglm_quat_neon_h
+#define cglm_quat_neon_h
+#if defined(__ARM_NEON_FP)
+
+#include "../../common.h"
+#include "../intrin.h"
+
+CGLM_INLINE
+void
+glm_quat_mul_neon(versor p, versor q, versor dest) {
+ /*
+ + (a1 b2 + b1 a2 + c1 d2 − d1 c2)i
+ + (a1 c2 − b1 d2 + c1 a2 + d1 b2)j
+ + (a1 d2 + b1 c2 − c1 b2 + d1 a2)k
+ a1 a2 − b1 b2 − c1 c2 − d1 d2
+ */
+
+ glmm_128 xp, xq, xqr, r, x, y, z, s2, s3;
+ glmm_128 s1 = {-0.f, 0.f, 0.f, -0.f};
+ float32x2_t qh, ql;
+
+ xp = glmm_load(p); /* 3 2 1 0 */
+ xq = glmm_load(q);
+
+ r = vmulq_f32(glmm_splat_w(xp), xq);
+ x = glmm_splat_x(xp);
+ y = glmm_splat_y(xp);
+ z = glmm_splat_z(xp);
+
+ ql = vget_high_f32(s1);
+ s3 = vcombine_f32(ql, ql);
+ s2 = vzipq_f32(s3, s3).val[0];
+
+ xqr = vrev64q_f32(xq);
+ qh = vget_high_f32(xqr);
+ ql = vget_low_f32(xqr);
+
+ r = glmm_fmadd(glmm_xor(x, s3), vcombine_f32(qh, ql), r);
+
+ r = glmm_fmadd(glmm_xor(y, s2), vcombine_f32(vget_high_f32(xq),
+ vget_low_f32(xq)), r);
+
+ r = glmm_fmadd(glmm_xor(z, s1), vcombine_f32(ql, qh), r);
+
+ glmm_store(dest, r);
+}
+
+#endif
+#endif /* cglm_quat_neon_h */
diff --git a/include/cglm/simd/sse2/quat.h b/include/cglm/simd/sse2/quat.h
index 894d492..ae82885 100644
--- a/include/cglm/simd/sse2/quat.h
+++ b/include/cglm/simd/sse2/quat.h
@@ -41,6 +41,5 @@ glm_quat_mul_sse2(versor p, versor q, versor dest) {
glmm_store(dest, r);
}
-
#endif
#endif /* cglm_quat_simd_h */
diff --git a/win/cglm.vcxproj b/win/cglm.vcxproj
index b166fa7..336ff0b 100644
--- a/win/cglm.vcxproj
+++ b/win/cglm.vcxproj
@@ -93,6 +93,7 @@
+
diff --git a/win/cglm.vcxproj.filters b/win/cglm.vcxproj.filters
index d9f38bb..97c3270 100644
--- a/win/cglm.vcxproj.filters
+++ b/win/cglm.vcxproj.filters
@@ -376,5 +376,8 @@
include\cglm\simd\neon
+
+ include\cglm\simd\neon
+
\ No newline at end of file