diff options
author | Jiong Wang <jiong.wang@arm.com> | 2016-07-08 15:26:51 +0000 |
---|---|---|
committer | Jiong Wang <jiwang@gcc.gnu.org> | 2016-07-08 15:26:51 +0000 |
commit | 8beb9a0dc9e0a11bd582bfaef8cbed5a38adc8a1 (patch) | |
tree | efffb983adc44f8e9f26dd797118ec1e0176ca42 | |
parent | cef4b65070a67ba1afa3359f21c48e4529ac154b (diff) | |
download | gcc-8beb9a0dc9e0a11bd582bfaef8cbed5a38adc8a1.zip gcc-8beb9a0dc9e0a11bd582bfaef8cbed5a38adc8a1.tar.gz gcc-8beb9a0dc9e0a11bd582bfaef8cbed5a38adc8a1.tar.bz2 |
[AArch64] Use fmin/fmax for v[min|max]nm{q} intrinsics
smin/smax will actually honor quiet NaN.
gcc/
* config/aarch64/aarch64-simd-builtins.def (smax): Remove float
variants.
(smin): Likewise.
(fmax): New entry.
(fmin): Likewise.
* config/aarch64/arm_neon.h (vmaxnm_f32): Use
__builtin_aarch64_fmaxv2sf.
(vmaxnmq_f32): Likewise.
(vmaxnmq_f64): Likewise.
(vminnm_f32): Likewise.
(vminnmq_f32): Likewise.
(vminnmq_f64): Likewise.
gcc/testsuite/
* gcc.target/aarch64/simd/vminmaxnm_1.c: New.
From-SVN: r238166
-rw-r--r-- | gcc/ChangeLog | 15 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64-simd-builtins.def | 8 | ||||
-rw-r--r-- | gcc/config/aarch64/arm_neon.h | 12 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c | 82 |
5 files changed, 113 insertions, 8 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e2711cc..1cde332 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2016-07-08 Jiong Wang <jiong.wang@arm.com> + + * config/aarch64/aarch64-simd-builtins.def (smax): Remove float + variants. + (smin): Likewise. + (fmax): New entry. + (fmin): Likewise. + * config/aarch64/arm_neon.h (vmaxnm_f32): Use + __builtin_aarch64_fmaxv2sf. + (vmaxnmq_f32): Likewise. + (vmaxnmq_f64): Likewise. + (vminnm_f32): Likewise. + (vminnmq_f32): Likewise. + (vminnmq_f64): Likewise. + 2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/71806 diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 3e4740c..f1ad325 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -244,13 +244,17 @@ /* Implemented by <maxmin><mode>3. smax variants map to fmaxnm, smax_nan variants map to fmax. */ - BUILTIN_VDQIF (BINOP, smax, 3) - BUILTIN_VDQIF (BINOP, smin, 3) + BUILTIN_VDQ_BHSI (BINOP, smax, 3) + BUILTIN_VDQ_BHSI (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, umax, 3) BUILTIN_VDQ_BHSI (BINOP, umin, 3) BUILTIN_VDQF (BINOP, smax_nan, 3) BUILTIN_VDQF (BINOP, smin_nan, 3) + /* Implemented by <fmaxmin><mode>3. */ + BUILTIN_VDQF (BINOP, fmax, 3) + BUILTIN_VDQF (BINOP, fmin, 3) + /* Implemented by aarch64_<maxmin_uns>p<mode>. */ BUILTIN_VDQ_BHSI (BINOP, smaxp, 0) BUILTIN_VDQ_BHSI (BINOP, sminp, 0) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index ed24b59..b0ab1d3 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -17588,19 +17588,19 @@ vpminnms_f32 (float32x2_t a) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmaxnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_smaxv2sf (__a, __b); + return __builtin_aarch64_fmaxv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmaxnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_smaxv4sf (__a, __b); + return __builtin_aarch64_fmaxv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_smaxv2df (__a, __b); + return __builtin_aarch64_fmaxv2df (__a, __b); } /* vmaxv */ @@ -17818,19 +17818,19 @@ vminq_u32 (uint32x4_t __a, uint32x4_t __b) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vminnm_f32 (float32x2_t __a, float32x2_t __b) { - return __builtin_aarch64_sminv2sf (__a, __b); + return __builtin_aarch64_fminv2sf (__a, __b); } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vminnmq_f32 (float32x4_t __a, float32x4_t __b) { - return __builtin_aarch64_sminv4sf (__a, __b); + return __builtin_aarch64_fminv4sf (__a, __b); } __extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) vminnmq_f64 (float64x2_t __a, float64x2_t __b) { - return __builtin_aarch64_sminv2df (__a, __b); + return __builtin_aarch64_fminv2df (__a, __b); } /* vminv */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ab2537f..0cbcb88 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2016-07-08 Jiong Wang <jiong.wang@arm.com> + + * gcc.target/aarch64/simd/vminmaxnm_1.c: New. + 2016-07-08 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/71806 diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c new file mode 100644 index 0000000..96608eb --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c @@ -0,0 +1,82 @@ +/* Test the `v[min|max]nm{q}_f*' AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-O2" } */ + +#include "arm_neon.h" + +extern void abort (); + +#define CHECK(T, N, R, E) \ + {\ + int i = 0;\ + for (; i < N; i++)\ + if (* (T *) &R[i] != * (T *) &E[i])\ + abort ();\ + } + +int +main (int argc, char **argv) +{ + float32x2_t f32x2_input1 = vdup_n_f32 (-1.0); + float32x2_t f32x2_input2 = vdup_n_f32 (0.0); + float32x2_t f32x2_exp_minnm = vdup_n_f32 (-1.0); + float32x2_t f32x2_exp_maxnm = vdup_n_f32 (0.0); + float32x2_t f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2); + float32x2_t f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2); + + CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); + CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + + f32x2_input1 = vdup_n_f32 (__builtin_nanf ("")); + f32x2_input2 = vdup_n_f32 (1.0); + f32x2_exp_minnm = vdup_n_f32 (1.0); + f32x2_exp_maxnm = vdup_n_f32 (1.0); + f32x2_ret_minnm = vminnm_f32 (f32x2_input1, f32x2_input2); + f32x2_ret_maxnm = vmaxnm_f32 (f32x2_input1, f32x2_input2); + + CHECK (uint32_t, 2, f32x2_ret_minnm, f32x2_exp_minnm); + CHECK (uint32_t, 2, f32x2_ret_maxnm, f32x2_exp_maxnm); + + float32x4_t f32x4_input1 = vdupq_n_f32 (-1024.0); + float32x4_t f32x4_input2 = vdupq_n_f32 (77.0); + float32x4_t f32x4_exp_minnm = vdupq_n_f32 (-1024.0); + float32x4_t f32x4_exp_maxnm = vdupq_n_f32 (77.0); + float32x4_t f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2); + float32x4_t f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2); + + CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); + CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + + f32x4_input1 = vdupq_n_f32 (-__builtin_nanf ("")); + f32x4_input2 = vdupq_n_f32 (-1.0); + f32x4_exp_minnm = vdupq_n_f32 (-1.0); + f32x4_exp_maxnm = vdupq_n_f32 (-1.0); + f32x4_ret_minnm = vminnmq_f32 (f32x4_input1, f32x4_input2); + f32x4_ret_maxnm = vmaxnmq_f32 (f32x4_input1, f32x4_input2); + + CHECK (uint32_t, 4, f32x4_ret_minnm, f32x4_exp_minnm); + CHECK (uint32_t, 4, f32x4_ret_maxnm, f32x4_exp_maxnm); + + float64x2_t f64x2_input1 = vdupq_n_f64 (1.23); + float64x2_t f64x2_input2 = vdupq_n_f64 (4.56); + float64x2_t f64x2_exp_minnm = vdupq_n_f64 (1.23); + float64x2_t f64x2_exp_maxnm = vdupq_n_f64 (4.56); + float64x2_t f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); + float64x2_t f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); + + CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); + CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); + + f64x2_input1 = vdupq_n_f64 (-__builtin_nan ("")); + f64x2_input2 = vdupq_n_f64 (1.0); + f64x2_exp_minnm = vdupq_n_f64 (1.0); + f64x2_exp_maxnm = vdupq_n_f64 (1.0); + f64x2_ret_minnm = vminnmq_f64 (f64x2_input1, f64x2_input2); + f64x2_ret_maxnm = vmaxnmq_f64 (f64x2_input1, f64x2_input2); + + CHECK (uint64_t, 2, f64x2_ret_minnm, f64x2_exp_minnm); + CHECK (uint64_t, 2, f64x2_ret_maxnm, f64x2_exp_maxnm); + + return 0; +} |