diff --git a/include/cglm/simd/arm.h b/include/cglm/simd/arm.h index c980e54..791f4cb 100644 --- a/include/cglm/simd/arm.h +++ b/include/cglm/simd/arm.h @@ -32,36 +32,40 @@ * I'm not sure if glmm_xxxx(), glmm_yyyy()... is better than glmm_0000()... */ +/* Memory layout Register layout (glmm) + 0 1 2 3 -> 3 2 1 0 + */ + SWIZZLE(glmm_0000) { return vdupq_lane_f32(vget_low_f32(v), 0); } SWIZZLE(glmm_1111) { return vdupq_lane_f32(vget_low_f32(v), 1); } SWIZZLE(glmm_2222) { return vdupq_lane_f32(vget_high_f32(v), 0); } SWIZZLE(glmm_3333) { return vdupq_lane_f32(vget_high_f32(v), 1); } -SWIZZLE(glmm_1032) { return vrev64q_f32(v); } +SWIZZLE(glmm_2301) { return vrev64q_f32(v); } -SWIZZLE(glmm_0101) { float32x2_t vt = vget_low_f32(v); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_2323) { float32x2_t vt = vget_high_f32(v); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_1010) { float32x2_t vt = vrev64_f32(vget_low_f32(v)); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_3232) { float32x2_t vt = vrev64_f32(vget_high_f32(v)); return vcombine_f32(vt, vt); } +SWIZZLE(glmm_1010) { float32x2_t vt = vget_low_f32(v); return vcombine_f32(vt, vt); } +SWIZZLE(glmm_3232) { float32x2_t vt = vget_high_f32(v); return vcombine_f32(vt, vt); } +SWIZZLE(glmm_0101) { float32x2_t vt = vrev64_f32(vget_low_f32(v)); return vcombine_f32(vt, vt); } +SWIZZLE(glmm_2323) { float32x2_t vt = vrev64_f32(vget_high_f32(v)); return vcombine_f32(vt, vt); } -SWIZZLE(glmm_0132) { return vcombine_f32(vget_low_f32(v), vrev64_f32(vget_high_f32(v))); } -SWIZZLE(glmm_1023) { return vcombine_f32(vrev64_f32(vget_low_f32(v)), vget_high_f32(v)); } -SWIZZLE(glmm_2310) { return vcombine_f32(vget_high_f32(v), vrev64_f32(vget_low_f32(v))); } -SWIZZLE(glmm_3201) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vget_low_f32(v)); } -SWIZZLE(glmm_3210) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vrev64_f32(vget_low_f32(v))); } +SWIZZLE(glmm_2310) { return vcombine_f32(vget_low_f32(v), vrev64_f32(vget_high_f32(v))); } +SWIZZLE(glmm_3201) { return vcombine_f32(vrev64_f32(vget_low_f32(v)), vget_high_f32(v)); } +SWIZZLE(glmm_0132) { return vcombine_f32(vget_high_f32(v), vrev64_f32(vget_low_f32(v))); } +SWIZZLE(glmm_1023) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vget_low_f32(v)); } +SWIZZLE(glmm_0123) { return vcombine_f32(vrev64_f32(vget_high_f32(v)), vrev64_f32(vget_low_f32(v))); } -SWIZZLE(glmm_0022) { return vtrnq_f32(v, v).val[0]; } -SWIZZLE(glmm_1133) { return vtrnq_f32(v, v).val[1]; } +SWIZZLE(glmm_2200) { return vtrnq_f32(v, v).val[0]; } +SWIZZLE(glmm_3311) { return vtrnq_f32(v, v).val[1]; } -SWIZZLE(glmm_0011) { return vzipq_f32(v, v).val[0]; } -SWIZZLE(glmm_2233) { return vzipq_f32(v, v).val[1]; } +SWIZZLE(glmm_1100) { return vzipq_f32(v, v).val[0]; } +SWIZZLE(glmm_3322) { return vzipq_f32(v, v).val[1]; } -SWIZZLE(glmm_0202) { return vuzpq_f32(v, v).val[0]; } -SWIZZLE(glmm_1313) { return vuzpq_f32(v, v).val[1]; } +SWIZZLE(glmm_2020) { return vuzpq_f32(v, v).val[0]; } +SWIZZLE(glmm_3131) { return vuzpq_f32(v, v).val[1]; } -SWIZZLE(glmm_1230) { return vextq_f32(v, v, 1); } -SWIZZLE(glmm_2301) { return vextq_f32(v, v, 2); } -SWIZZLE(glmm_3012) { return vextq_f32(v, v, 3); } +SWIZZLE(glmm_0321) { return vextq_f32(v, v, 1); } +SWIZZLE(glmm_1032) { return vextq_f32(v, v, 2); } +SWIZZLE(glmm_2103) { return vextq_f32(v, v, 3); } #undef SWIZZLE