diff options
author | Peter Bergner <bergner@linux.ibm.com> | 2021-12-14 14:50:41 -0600 |
---|---|---|
committer | Peter Bergner <bergner@linux.ibm.com> | 2021-12-14 15:00:00 -0600 |
commit | 15c02ab2569b3c4e27d6f133c013b15a9fa70177 (patch) | |
tree | a9b858798df601d4bb12573763514d01f2861fb2 | |
parent | ca39102e10643a6b3f07d06934cc0907ba83d9ee (diff) | |
download | gcc-15c02ab2569b3c4e27d6f133c013b15a9fa70177.zip gcc-15c02ab2569b3c4e27d6f133c013b15a9fa70177.tar.gz gcc-15c02ab2569b3c4e27d6f133c013b15a9fa70177.tar.bz2 |
rs6000: Do not allow combining of multiple assemble quads [PR103548]
The compiler will gladly CSE the result of two __builtin_mma_build_acc
calls with the same four vector arguments, leading to illegal MMA
code being generated. The fix here is to make the mma_assemble_acc
pattern use a unspec_volatile to stop the CSE from happening.
2021-12-14 Peter Bergner <bergner@linux.ibm.com>
gcc/
PR target/103548
* config/rs6000/mma.md (UNSPEC_MMA_ASSEMBLE): Rename unspec from this...
(UNSPEC_VSX_ASSEMBLE): ...to this.
(UNSPECV_MMA_ASSEMBLE): New unspecv.
(vsx_assemble_pair): Use UNSPEC_VSX_ASSEMBLE.
(*vsx_assemble_pair): Likewise.
(mma_assemble_acc): Use UNSPECV_MMA_ASSEMBLE.
(*mma_assemble_acc): Likewise.
* config/rs6000/rs6000.c (rs6000_split_multireg_move): Handle
UNSPEC_VOLATILE. Use UNSPEC_VSX_ASSEMBLE and UNSPECV_MMA_ASSEMBLE.
gcc/testsuite/
PR target/103548
* gcc.target/powerpc/mma-builtin-10-pair.c: New test.
* gcc.target/powerpc/mma-builtin-10-quad.c: New test.
-rw-r--r-- | gcc/config/rs6000/mma.md | 38 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 6 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c | 21 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c | 23 |
4 files changed, 68 insertions, 20 deletions
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index fa08160..8a26205 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -29,7 +29,7 @@ ;; Constants for creating unspecs (define_c_enum "unspec" - [UNSPEC_MMA_ASSEMBLE + [UNSPEC_VSX_ASSEMBLE UNSPEC_MMA_EXTRACT UNSPEC_MMA_PMXVBF16GER2 UNSPEC_MMA_PMXVBF16GER2NN @@ -94,7 +94,8 @@ ]) (define_c_enum "unspecv" - [UNSPECV_MMA_XXSETACCZ + [UNSPECV_MMA_ASSEMBLE + UNSPECV_MMA_XXSETACCZ ]) ;; MMA instructions with 1 accumulator argument @@ -333,7 +334,7 @@ { rtx src = gen_rtx_UNSPEC (OOmode, gen_rtvec (2, operands[1], operands[2]), - UNSPEC_MMA_ASSEMBLE); + UNSPEC_VSX_ASSEMBLE); emit_move_insn (operands[0], src); DONE; }) @@ -345,7 +346,7 @@ [(set (match_operand:OO 0 "vsx_register_operand" "=&wa") (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")] - UNSPEC_MMA_ASSEMBLE))] + UNSPEC_VSX_ASSEMBLE))] "TARGET_MMA" "#" "&& reload_completed" @@ -353,7 +354,7 @@ { rtx src = gen_rtx_UNSPEC (OOmode, gen_rtvec (2, operands[1], operands[2]), - UNSPEC_MMA_ASSEMBLE); + UNSPEC_VSX_ASSEMBLE); rs6000_split_multireg_move (operands[0], src); DONE; }) @@ -399,10 +400,10 @@ (match_operand:V16QI 4 "mma_assemble_input_operand")] "TARGET_MMA" { - rtx src = gen_rtx_UNSPEC (XOmode, - gen_rtvec (4, operands[1], operands[2], - operands[3], operands[4]), - UNSPEC_MMA_ASSEMBLE); + rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, + gen_rtvec (4, operands[1], operands[2], + operands[3], operands[4]), + UNSPECV_MMA_ASSEMBLE); emit_move_insn (operands[0], src); DONE; }) @@ -412,21 +413,22 @@ (define_insn_and_split "*mma_assemble_acc" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") - (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] - UNSPEC_MMA_ASSEMBLE))] + (unspec_volatile:XO + [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa") + (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")] + UNSPECV_MMA_ASSEMBLE))] "TARGET_MMA && fpr_reg_operand (operands[0], XOmode)" "#" "&& reload_completed" [(const_int 0)] { - rtx src = gen_rtx_UNSPEC (XOmode, - gen_rtvec (4, operands[1], operands[2], - operands[3], operands[4]), - UNSPEC_MMA_ASSEMBLE); + rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode, + gen_rtvec (4, operands[1], operands[2], + operands[3], operands[4]), + UNSPECV_MMA_ASSEMBLE); rs6000_split_multireg_move (operands[0], src); DONE; }) diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 70df511..9fc1577 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -27071,9 +27071,11 @@ rs6000_split_multireg_move (rtx dst, rtx src) return; } - if (GET_CODE (src) == UNSPEC) + if (GET_CODE (src) == UNSPEC + || GET_CODE (src) == UNSPEC_VOLATILE) { - gcc_assert (XINT (src, 1) == UNSPEC_MMA_ASSEMBLE); + gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE + || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE); gcc_assert (REG_P (dst)); if (GET_MODE (src) == XOmode) gcc_assert (FP_REGNO_P (REGNO (dst))); diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c new file mode 100644 index 0000000..d8748d8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-pair.c @@ -0,0 +1,21 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_pair *dst, vec_t *src) +{ + __vector_pair pair0, pair1; + /* Adjacent loads should be combined into one lxvp instruction + and identical build pairs should be combined. */ + __builtin_vsx_build_pair (&pair0, src[0], src[1]); + __builtin_vsx_build_pair (&pair1, src[0], src[1]); + dst[0] = pair0; + dst[2] = pair1; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 2 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c new file mode 100644 index 0000000..02342c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/mma-builtin-10-quad.c @@ -0,0 +1,23 @@ +/* { dg-require-effective-target power10_ok } */ +/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +foo (__vector_quad *dst, vec_t *src) +{ + __vector_quad quad0, quad1; + /* Adjacent loads should be combined into two lxvp instructions. + and identical build accs should not be combined. */ + __builtin_mma_build_acc (&quad0, src[0], src[1], src[2], src[3]); + __builtin_mma_build_acc (&quad1, src[0], src[1], src[2], src[3]); + dst[0] = quad0; + dst[2] = quad1; +} + +/* { dg-final { scan-assembler-not {\mlxv\M} } } */ +/* { dg-final { scan-assembler-not {\mstxv\M} } } */ +/* { dg-final { scan-assembler-times {\mlxvp\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxxmtacc\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxmfacc\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxvp\M} 4 } } */ |