aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorXionghu Luo <luoxhu@linux.ibm.com>2021-10-27 21:21:20 -0500
committerXionghu Luo <luoxhu@linux.ibm.com>2021-10-27 21:21:20 -0500
commit9222481ffc69a6c0b73ec81e1bf04289fa3db0ed (patch)
tree1f5849ca2b67e1a1233eaebe0b17cc63501cbb19 /gcc
parent5720c450fab749664b32dbcd14d0a66f8ba57e5f (diff)
downloadgcc-9222481ffc69a6c0b73ec81e1bf04289fa3db0ed.zip
gcc-9222481ffc69a6c0b73ec81e1bf04289fa3db0ed.tar.gz
gcc-9222481ffc69a6c0b73ec81e1bf04289fa3db0ed.tar.bz2
rs6000: Fix wrong code generation for vec_sel [PR94613]
The vsel instruction is a bit-wise select instruction. Using an IF_THEN_ELSE to express it in RTL is wrong and leads to wrong code being generated in the combine pass. Per element selection is a subset of per bit-wise selection,with the patch the pattern is written using bit operations. But there are 8 different patterns to define "op0 := (op1 & ~op3) | (op2 & op3)": (~op3&op1) | (op3&op2), (~op3&op1) | (op2&op3), (op3&op2) | (~op3&op1), (op2&op3) | (~op3&op1), (op1&~op3) | (op3&op2), (op1&~op3) | (op2&op3), (op3&op2) | (op1&~op3), (op2&op3) | (op1&~op3), The latter 4 cases does not follow canonicalisation rules, non-canonical RTL is invalid RTL in vregs pass. Secondly, combine pass will swap (op1&~op3) to (~op3&op1) by commutative canonical, which could reduce it to the FIRST 4 patterns, but it won't swap (op2&op3) | (~op3&op1) to (~op3&op1) | (op2&op3), so this patch handles it with 4 patterns with different NOT op3 position and check equality inside it. Tested pass on P7, P8 and P9. gcc/ChangeLog: 2021-10-28 Xionghu Luo <luoxhu@linux.ibm.com> PR target/94613 * config/rs6000/altivec.md (*altivec_vsel<mode>): Change to ... (altivec_vsel<mode>): ... this and update define. (*altivec_vsel<mode>_uns): Delete. (altivec_vsel<mode>2): New define_insn. (altivec_vsel<mode>3): Likewise. (altivec_vsel<mode>4): Likewise. * config/rs6000/rs6000-call.c (altivec_expand_vec_sel_builtin): New. (altivec_expand_builtin): Call altivec_expand_vec_sel_builtin to expand vel_sel. * config/rs6000/rs6000.c (rs6000_emit_vector_cond_expr): Use bit-wise selection instead of per element. * config/rs6000/vector.md: * config/rs6000/vsx.md (*vsx_xxsel<mode>): Change to ... (vsx_xxsel<mode>): ... this and update define. (*vsx_xxsel<mode>_uns): Delete. (vsx_xxsel<mode>2): New define_insn. (vsx_xxsel<mode>3): Likewise. (vsx_xxsel<mode>4): Likewise. gcc/testsuite/ChangeLog: 2021-10-28 Xionghu Luo <luoxhu@linux.ibm.com> PR target/94613 * gcc.target/powerpc/pr94613.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/rs6000/altivec.md62
-rw-r--r--gcc/config/rs6000/rs6000-call.c62
-rw-r--r--gcc/config/rs6000/rs6000.c19
-rw-r--r--gcc/config/rs6000/vector.md26
-rw-r--r--gcc/config/rs6000/vsx.md60
-rw-r--r--gcc/testsuite/gcc.target/powerpc/pr94613.c47
6 files changed, 221 insertions, 55 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 51a4760..158b3a7 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -681,26 +681,56 @@
"vcmpgefp %0,%1,%2"
[(set_attr "type" "veccmp")])
-(define_insn "*altivec_vsel<mode>"
+(define_insn "altivec_vsel<mode>"
[(set (match_operand:VM 0 "altivec_register_operand" "=v")
- (if_then_else:VM
- (ne:CC (match_operand:VM 1 "altivec_register_operand" "v")
- (match_operand:VM 4 "zero_constant" ""))
- (match_operand:VM 2 "altivec_register_operand" "v")
- (match_operand:VM 3 "altivec_register_operand" "v")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
- "vsel %0,%3,%2,%1"
+ (ior:VM
+ (and:VM
+ (not:VM (match_operand:VM 3 "altivec_register_operand" "v"))
+ (match_operand:VM 1 "altivec_register_operand" "v"))
+ (and:VM
+ (match_dup 3)
+ (match_operand:VM 2 "altivec_register_operand" "v"))))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsel %0,%1,%2,%3"
[(set_attr "type" "vecmove")])
-(define_insn "*altivec_vsel<mode>_uns"
+(define_insn "altivec_vsel<mode>2"
[(set (match_operand:VM 0 "altivec_register_operand" "=v")
- (if_then_else:VM
- (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v")
- (match_operand:VM 4 "zero_constant" ""))
- (match_operand:VM 2 "altivec_register_operand" "v")
- (match_operand:VM 3 "altivec_register_operand" "v")))]
- "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
- "vsel %0,%3,%2,%1"
+ (ior:VM
+ (and:VM
+ (not:VM (match_operand:VM 3 "altivec_register_operand" "v"))
+ (match_operand:VM 1 "altivec_register_operand" "v"))
+ (and:VM
+ (match_operand:VM 2 "altivec_register_operand" "v")
+ (match_dup 3))))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsel %0,%1,%2,%3"
+ [(set_attr "type" "vecmove")])
+
+(define_insn "altivec_vsel<mode>3"
+ [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+ (ior:VM
+ (and:VM
+ (match_operand:VM 3 "altivec_register_operand" "v")
+ (match_operand:VM 1 "altivec_register_operand" "v"))
+ (and:VM
+ (not:VM (match_dup 3))
+ (match_operand:VM 2 "altivec_register_operand" "v"))))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsel %0,%2,%1,%3"
+ [(set_attr "type" "vecmove")])
+
+(define_insn "altivec_vsel<mode>4"
+ [(set (match_operand:VM 0 "altivec_register_operand" "=v")
+ (ior:VM
+ (and:VM
+ (match_operand:VM 1 "altivec_register_operand" "v")
+ (match_operand:VM 3 "altivec_register_operand" "v"))
+ (and:VM
+ (not:VM (match_dup 3))
+ (match_operand:VM 2 "altivec_register_operand" "v"))))]
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
+ "vsel %0,%2,%1,%3"
[(set_attr "type" "vecmove")])
;; Fused multiply add.
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 6d8727c..783c3c5 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11173,6 +11173,45 @@ altivec_expand_vec_ext_builtin (tree exp, rtx target)
return target;
}
+/* Expand vec_sel builtin. */
+static rtx
+altivec_expand_vec_sel_builtin (enum insn_code icode, tree exp, rtx target)
+{
+ rtx op0, op1, op2, pat;
+ tree arg0, arg1, arg2;
+
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ op0 = expand_normal (arg0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ op1 = expand_normal (arg1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
+ op2 = expand_normal (arg2);
+
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
+ machine_mode mode2 = insn_data[icode].operand[3].mode;
+
+ if (target == 0 || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+ if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
+ op2 = copy_to_mode_reg (mode2, op2);
+
+ pat = GEN_FCN (icode) (target, op0, op1, op2, op2);
+ if (pat)
+ emit_insn (pat);
+ else
+ return NULL_RTX;
+
+ return target;
+}
+
/* Expand the builtin in EXP and store the result in TARGET. Store
true in *EXPANDEDP if we found a builtin to expand. */
static rtx
@@ -11358,6 +11397,29 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
emit_insn (pat);
return NULL_RTX;
+ case ALTIVEC_BUILTIN_VSEL_2DF:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv2df, exp,
+ target);
+ case ALTIVEC_BUILTIN_VSEL_2DI:
+ case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv2di, exp,
+ target);
+ case ALTIVEC_BUILTIN_VSEL_4SF:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv4sf, exp,
+ target);
+ case ALTIVEC_BUILTIN_VSEL_4SI:
+ case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv4si, exp,
+ target);
+ case ALTIVEC_BUILTIN_VSEL_8HI:
+ case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv8hi, exp,
+ target);
+ case ALTIVEC_BUILTIN_VSEL_16QI:
+ case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
+ return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv16qi, exp,
+ target);
+
case ALTIVEC_BUILTIN_DSSALL:
emit_insn (gen_altivec_dssall ());
return NULL_RTX;
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index bac959f..1dcb9b1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16033,9 +16033,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
machine_mode dest_mode = GET_MODE (dest);
machine_mode mask_mode = GET_MODE (cc_op0);
enum rtx_code rcode = GET_CODE (cond);
- machine_mode cc_mode = CCmode;
rtx mask;
- rtx cond2;
bool invert_move = false;
if (VECTOR_UNIT_NONE_P (dest_mode))
@@ -16075,8 +16073,6 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
case GEU:
case LTU:
case LEU:
- /* Mark unsigned tests with CCUNSmode. */
- cc_mode = CCUNSmode;
/* Invert condition to avoid compound test if necessary. */
if (rcode == GEU || rcode == LEU)
@@ -16096,6 +16092,9 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
if (!mask)
return 0;
+ if (mask_mode != dest_mode)
+ mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
+
if (invert_move)
std::swap (op_true, op_false);
@@ -16135,13 +16134,11 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
if (!REG_P (op_false) && !SUBREG_P (op_false))
op_false = force_reg (dest_mode, op_false);
- cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
- CONST0_RTX (dest_mode));
- emit_insn (gen_rtx_SET (dest,
- gen_rtx_IF_THEN_ELSE (dest_mode,
- cond2,
- op_true,
- op_false)));
+ rtx tmp = gen_rtx_IOR (dest_mode,
+ gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
+ op_false),
+ gen_rtx_AND (dest_mode, mask, op_true));
+ emit_insn (gen_rtx_SET (dest, tmp));
return 1;
}
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 7e36c788..062aef7 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -916,23 +916,21 @@
;; which is in the reverse order that we want
(define_expand "vector_select_<mode>"
[(set (match_operand:VEC_L 0 "vlogical_operand")
- (if_then_else:VEC_L
- (ne:CC (match_operand:VEC_L 3 "vlogical_operand")
- (match_dup 4))
- (match_operand:VEC_L 2 "vlogical_operand")
- (match_operand:VEC_L 1 "vlogical_operand")))]
- "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
- "operands[4] = CONST0_RTX (<MODE>mode);")
+ (ior:VEC_L
+ (and:VEC_L (not:VEC_L (match_operand:VEC_L 3 "vlogical_operand"))
+ (match_operand:VEC_L 1 "vlogical_operand"))
+ (and:VEC_L (match_dup 3)
+ (match_operand:VEC_L 2 "vlogical_operand"))))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)")
(define_expand "vector_select_<mode>_uns"
[(set (match_operand:VEC_L 0 "vlogical_operand")
- (if_then_else:VEC_L
- (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand")
- (match_dup 4))
- (match_operand:VEC_L 2 "vlogical_operand")
- (match_operand:VEC_L 1 "vlogical_operand")))]
- "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
- "operands[4] = CONST0_RTX (<MODE>mode);")
+ (ior:VEC_L
+ (and:VEC_L (not:VEC_L (match_operand:VEC_L 3 "vlogical_operand"))
+ (match_operand:VEC_L 1 "vlogical_operand"))
+ (and:VEC_L (match_dup 3)
+ (match_operand:VEC_L 2 "vlogical_operand"))))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)")
;; Expansions that compare vectors producing a vector result and a predicate,
;; setting CR6 to indicate a combined status
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 346f1a9..73fd2ce 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2185,30 +2185,62 @@
[(set_attr "type" "<VStype_simple>")])
;; Vector select
-(define_insn "*vsx_xxsel<mode>"
+(define_insn "vsx_xxsel<mode>"
[(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (if_then_else:VSX_L
- (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 4 "zero_constant" ""))
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+ (ior:VSX_L
+ (and:VSX_L
+ (not:VSX_L (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa"))
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa"))
+ (and:VSX_L
+ (match_dup 3)
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa"))))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxsel %x0,%x3,%x2,%x1"
+ "xxsel %x0,%x1,%x2,%x3"
[(set_attr "type" "vecmove")
(set_attr "isa" "<VSisa>")])
-(define_insn "*vsx_xxsel<mode>_uns"
+(define_insn "vsx_xxsel<mode>2"
[(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (if_then_else:VSX_L
- (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 4 "zero_constant" ""))
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
+ (ior:VSX_L
+ (and:VSX_L
+ (not:VSX_L (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa"))
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa"))
+ (and:VSX_L
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+ (match_dup 3))))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
- "xxsel %x0,%x3,%x2,%x1"
+ "xxsel %x0,%x1,%x2,%x3"
[(set_attr "type" "vecmove")
(set_attr "isa" "<VSisa>")])
+(define_insn "vsx_xxsel<mode>3"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (ior:VSX_L
+ (and:VSX_L
+ (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa"))
+ (and:VSX_L
+ (not:VSX_L (match_dup 3))
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa"))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxsel %x0,%x2,%x1,%x3"
+ [(set_attr "type" "vecmove")
+ (set_attr "isa" "<VSisa>")])
+
+(define_insn "vsx_xxsel<mode>4"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+ (ior:VSX_L
+ (and:VSX_L
+ (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+ (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa"))
+ (and:VSX_L
+ (not:VSX_L (match_dup 3))
+ (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa"))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxsel %x0,%x2,%x1,%x3"
+ [(set_attr "type" "vecmove")
+ (set_attr "isa" "<VSisa>")])
+
;; Copy sign
(define_insn "vsx_copysign<mode>3"
[(set (match_operand:VSX_F 0 "vsx_register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94613.c b/gcc/testsuite/gcc.target/powerpc/pr94613.c
new file mode 100644
index 0000000..13cab13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94613.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-O2 -maltivec" } */
+
+#include <altivec.h>
+
+/* The initial implementation of vec_sel used an IF_THEN_ELSE rtx.
+ This did NOT match what the vsel instruction does. vsel is a
+ bit-wise operation. Using IF_THEN_ELSE made the + operation to be
+ simplified away in combine. A plus operation affects other bits in
+ the same element. Hence per-element simplifications are wrong for
+ vsel. */
+vector unsigned char __attribute__((noinline))
+foo (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+ return vec_sel (a + b, c, a);
+}
+
+vector unsigned char __attribute__((noinline))
+foor (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+ return vec_sel (c, a + b, ~a);
+}
+
+vector unsigned char __attribute__((noinline))
+bar (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+ return vec_sel (a | b, c, a);
+}
+
+int
+main ()
+{
+ vector unsigned char v = (vector unsigned char){ 1 };
+
+ if (foo (v, v, v)[0] != 3)
+ __builtin_abort ();
+
+ if (bar (v, v, v)[0] != 1)
+ __builtin_abort ();
+
+ if (foor (v, v, v)[0] != 3)
+ __builtin_abort ();
+
+ return 0;
+}
+