aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMatthew Gretton-Dann <matthew.gretton-dann@arm.com>2012-02-28 16:13:52 +0000
committerMatthew Gretton-Dann <mgretton@gcc.gnu.org>2012-02-28 16:13:52 +0000
commitfd92bb80a81651c7eb2159da03b39ba098890bc1 (patch)
tree07787092a84d40ad621a28faededfc880423c242 /gcc/config
parent1d548381aa6ea98a2e15bc1af0d1ad44f9c6adf0 (diff)
downloadgcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.zip
gcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.tar.gz
gcc-fd92bb80a81651c7eb2159da03b39ba098890bc1.tar.bz2
re PR target/51534 (Bad code gen for vcgtq_u32 NEON intrinsic)
PR target/51534 * gcc/config/arm/arm.c (neon_builtin_data): Add entries for vcgeu and vcgtu. * gcc/config/arm/arm_neon.h: Regenerate. * gcc/config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU. (neon_vcgeu): New insn. (neon_vcgtu): Likewise. * gcc/config/arm/neon.ml (s_8_32, u_8_32): New lists. (ops): Unsigned comparison intrinsics call a different builtin. * gcc/testsuite/gcc.target/arm/neon/pr51534.c: New testcase. From-SVN: r184629
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/arm/arm.c2
-rw-r--r--gcc/config/arm/arm_neon.h48
-rw-r--r--gcc/config/arm/neon.md26
-rw-r--r--gcc/config/arm/neon.ml32
4 files changed, 76 insertions, 32 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6923d81..21816d6 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -19104,7 +19104,9 @@ static neon_builtin_datum neon_builtin_data[] =
VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR2 (BINOP, vcage, v2sf, v4sf),
VAR2 (BINOP, vcagt, v2sf, v4sf),
VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 9cba0a9..0567895 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -1893,19 +1893,19 @@ vcge_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+ return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+ return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+ return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -1935,19 +1935,19 @@ vcgeq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+ return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+ return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+ return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -1977,19 +1977,19 @@ vcle_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+ return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+ return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+ return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2019,19 +2019,19 @@ vcleq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+ return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+ return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+ return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2061,19 +2061,19 @@ vcgt_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
+ return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
+ return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
+ return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2103,19 +2103,19 @@ vcgtq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
+ return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
+ return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
+ return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
@@ -2145,19 +2145,19 @@ vclt_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
{
- return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
+ return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
{
- return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
+ return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
{
- return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
+ return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
@@ -2187,19 +2187,19 @@ vcltq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
{
- return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
+ return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
{
- return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
+ return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
{
- return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
+ return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index e0d6a0c..078a8fd 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -34,7 +34,9 @@
UNSPEC_VCAGT
UNSPEC_VCEQ
UNSPEC_VCGE
+ UNSPEC_VCGEU
UNSPEC_VCGT
+ UNSPEC_VCGTU
UNSPEC_VCLS
UNSPEC_VCONCAT
UNSPEC_VCVT
@@ -2146,6 +2148,18 @@
(const_string "neon_int_5")))]
)
+(define_insn "neon_vcgeu<mode>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VCGEU))]
+ "TARGET_NEON"
+ "vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
(define_insn "neon_vcgt<mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
(unspec:<V_cmp_result>
@@ -2165,6 +2179,18 @@
(const_string "neon_int_5")))]
)
+(define_insn "neon_vcgtu<mode>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
+ (unspec:<V_cmp_result>
+ [(match_operand:VDQIW 1 "s_register_operand" "w")
+ (match_operand:VDQIW 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VCGTU))]
+ "TARGET_NEON"
+ "vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+ [(set_attr "neon_type" "neon_int_5")]
+)
+
;; VCLE and VCLT only support comparisons with immediate zero (register
;; variants are VCGE and VCGT with operands reversed).
diff --git a/gcc/config/arm/neon.ml b/gcc/config/arm/neon.ml
index b5b9cab..363e55c7 100644
--- a/gcc/config/arm/neon.ml
+++ b/gcc/config/arm/neon.ml
@@ -700,6 +700,8 @@ let bit_select shape elt =
(* Common lists of supported element types. *)
+let s_8_32 = [S8; S16; S32]
+let u_8_32 = [U8; U16; U32]
let su_8_32 = [S8; S16; S32; U8; U16; U32]
let su_8_64 = S64 :: U64 :: su_8_32
let su_16_64 = [S16; S32; S64; U16; U32; U64]
@@ -777,26 +779,40 @@ let ops =
Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
(* Comparison, greater-than or equal. *)
- Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
- Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
+ Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
+ Vcge, [Builtin_name "vcgeu"], All (3, Dreg), "vcge", cmp_sign_matters, u_8_32;
+ Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
+ Vcge, [Builtin_name "vcgeu"], All (3, Qreg), "vcgeQ", cmp_sign_matters, u_8_32;
(* Comparison, less-than or equal. *)
Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
- F32 :: su_8_32;
+ F32 :: s_8_32;
+ Vcle, [Flipped "vcgeu"], All (3, Dreg), "vcle", cmp_sign_matters,
+ u_8_32;
Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
All (3, Qreg), "vcleQ", cmp_sign_matters,
- F32 :: su_8_32;
+ F32 :: s_8_32;
+ Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
+ All (3, Qreg), "vcleQ", cmp_sign_matters,
+ u_8_32;
(* Comparison, greater-than. *)
- Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
- Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
+ Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
+ Vcgt, [Builtin_name "vcgtu"], All (3, Dreg), "vcgt", cmp_sign_matters, u_8_32;
+ Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
+ Vcgt, [Builtin_name "vcgtu"], All (3, Qreg), "vcgtQ", cmp_sign_matters, u_8_32;
(* Comparison, less-than. *)
Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
- F32 :: su_8_32;
+ F32 :: s_8_32;
+ Vclt, [Flipped "vcgtu"], All (3, Dreg), "vclt", cmp_sign_matters,
+ u_8_32;
Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
All (3, Qreg), "vcltQ", cmp_sign_matters,
- F32 :: su_8_32;
+ F32 :: s_8_32;
+ Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
+ All (3, Qreg), "vcltQ", cmp_sign_matters,
+ u_8_32;
(* Compare absolute greater-than or equal. *)
Vcage, [Instruction_name ["vacge"]],