diff options
author | Matthew Gretton-Dann <matthew.gretton-dann@arm.com> | 2012-02-28 16:13:52 +0000 |
---|---|---|
committer | Matthew Gretton-Dann <mgretton@gcc.gnu.org> | 2012-02-28 16:13:52 +0000 |
commit | fd92bb80a81651c7eb2159da03b39ba098890bc1 (patch) | |
tree | 07787092a84d40ad621a28faededfc880423c242 /gcc/config | |
parent | 1d548381aa6ea98a2e15bc1af0d1ad44f9c6adf0 (diff) | |
download | gcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.zip gcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.tar.gz gcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.tar.bz2 |
re PR target/51534 (Bad code gen for vcgtq_u32 NEON intrinsic)
PR target/51534
* gcc/config/arm/arm.c (neon_builtin_data): Add entries for vcgeu
and vcgtu.
* gcc/config/arm/arm_neon.h: Regenerate.
* gcc/config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU.
(neon_vcgeu): New insn.
(neon_vcgtu): Likewise.
* gcc/config/arm/neon.ml (s_8_32, u_8_32): New lists.
(ops): Unsigned comparison intrinsics call a different
builtin.
* gcc/testsuite/gcc.target/arm/neon/pr51534.c: New testcase.
From-SVN: r184629
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/arm/arm.c | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm_neon.h | 48 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 26 | ||||
-rw-r--r-- | gcc/config/arm/neon.ml | 32 |
4 files changed, 76 insertions, 32 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 6923d81..21816d6 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19104,7 +19104,9 @@ static neon_builtin_datum neon_builtin_data[] = VAR3 (BINOP, vsubhn, v8hi, v4si, v2di), VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), + VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), + VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si), VAR2 (BINOP, vcage, v2sf, v4sf), VAR2 (BINOP, vcagt, v2sf, v4sf), VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si), diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 9cba0a9..0567895 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -1893,19 +1893,19 @@ vcge_f32 (float32x2_t __a, float32x2_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcge_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcge_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcge_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -1935,19 +1935,19 @@ vcgeq_f32 (float32x4_t __a, float32x4_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -1977,19 +1977,19 @@ vcle_f32 (float32x2_t __a, float32x2_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcle_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0); + return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcle_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0); + return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcle_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0); + return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -2019,19 +2019,19 @@ vcleq_f32 (float32x4_t __a, float32x4_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcleq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0); + return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcleq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0); + return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcleq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0); + return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -2061,19 +2061,19 @@ vcgt_f32 (float32x2_t __a, float32x2_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vcgt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); + return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vcgt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); + return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vcgt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0); + return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -2103,19 +2103,19 @@ vcgtq_f32 (float32x4_t __a, float32x4_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); + return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); + return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0); + return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0); } __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) @@ -2145,19 +2145,19 @@ vclt_f32 (float32x2_t __a, float32x2_t __b) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vclt_u8 (uint8x8_t __a, uint8x8_t __b) { - return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); + return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0); } __extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) vclt_u16 (uint16x4_t __a, uint16x4_t __b) { - return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); + return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vclt_u32 (uint32x2_t __a, uint32x2_t __b) { - return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0); + return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0); } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) @@ -2187,19 +2187,19 @@ vcltq_f32 (float32x4_t __a, float32x4_t __b) __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vcltq_u8 (uint8x16_t __a, uint8x16_t __b) { - return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); + return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0); } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vcltq_u16 (uint16x8_t __a, uint16x8_t __b) { - return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); + return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0); } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vcltq_u32 (uint32x4_t __a, uint32x4_t __b) { - return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0); + return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0); } __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e0d6a0c..078a8fd 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -34,7 +34,9 @@ UNSPEC_VCAGT UNSPEC_VCEQ UNSPEC_VCGE + UNSPEC_VCGEU UNSPEC_VCGT + UNSPEC_VCGTU UNSPEC_VCLS UNSPEC_VCONCAT UNSPEC_VCVT @@ -2146,6 +2148,18 @@ (const_string "neon_int_5")))] ) +(define_insn "neon_vcgeu<mode>" + [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") + (unspec:<V_cmp_result> + [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCGEU))] + "TARGET_NEON" + "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "neon_type" "neon_int_5")] +) + (define_insn "neon_vcgt<mode>" [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w") (unspec:<V_cmp_result> @@ -2165,6 +2179,18 @@ (const_string "neon_int_5")))] ) +(define_insn "neon_vcgtu<mode>" + [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w") + (unspec:<V_cmp_result> + [(match_operand:VDQIW 1 "s_register_operand" "w") + (match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + UNSPEC_VCGTU))] + "TARGET_NEON" + "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" + [(set_attr "neon_type" "neon_int_5")] +) + ;; VCLE and VCLT only support comparisons with immediate zero (register ;; variants are VCGE and VCGT with operands reversed). diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml index b5b9cab..363e55c7 100644 --- a/gcc/config/arm/neon.ml +++ b/gcc/config/arm/neon.ml @@ -700,6 +700,8 @@ let bit_select shape elt = (* Common lists of supported element types. *) +let s_8_32 = [S8; S16; S32] +let u_8_32 = [U8; U16; U32] let su_8_32 = [S8; S16; S32; U8; U16; U32] let su_8_64 = S64 :: U64 :: su_8_32 let su_16_64 = [S16; S32; S64; U16; U32; U64] @@ -777,26 +779,40 @@ let ops = Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; (* Comparison, greater-than or equal. *) - Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32; - Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32; + Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32; + Vcge, [Builtin_name "vcgeu"], All (3, Dreg), "vcge", cmp_sign_matters, u_8_32; + Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32; + Vcge, [Builtin_name "vcgeu"], All (3, Qreg), "vcgeQ", cmp_sign_matters, u_8_32; (* Comparison, less-than or equal. *) Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, - F32 :: su_8_32; + F32 :: s_8_32; + Vcle, [Flipped "vcgeu"], All (3, Dreg), "vcle", cmp_sign_matters, + u_8_32; Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], All (3, Qreg), "vcleQ", cmp_sign_matters, - F32 :: su_8_32; + F32 :: s_8_32; + Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"], + All (3, Qreg), "vcleQ", cmp_sign_matters, + u_8_32; (* Comparison, greater-than. *) - Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32; - Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32; + Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32; + Vcgt, [Builtin_name "vcgtu"], All (3, Dreg), "vcgt", cmp_sign_matters, u_8_32; + Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32; + Vcgt, [Builtin_name "vcgtu"], All (3, Qreg), "vcgtQ", cmp_sign_matters, u_8_32; (* Comparison, less-than. *) Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, - F32 :: su_8_32; + F32 :: s_8_32; + Vclt, [Flipped "vcgtu"], All (3, Dreg), "vclt", cmp_sign_matters, + u_8_32; Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], All (3, Qreg), "vcltQ", cmp_sign_matters, - F32 :: su_8_32; + F32 :: s_8_32; + Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"], + All (3, Qreg), "vcltQ", cmp_sign_matters, + u_8_32; (* Compare absolute greater-than or equal. *) Vcage, [Instruction_name ["vacge"]], |