diff options
author | Peter Bergner <bergner@linux.ibm.com> | 2022-05-17 21:09:29 -0500 |
---|---|---|
committer | Peter Bergner <bergner@linux.ibm.com> | 2022-05-17 21:10:27 -0500 |
commit | c6e36f05fbb081abb068958d8900ad34b303a70b (patch) | |
tree | fbb1fbe9de9e09c20aa02d853a0e89c27337f7d1 /gcc | |
parent | 3d9439b1bb76c186958d5b86f0076f8b3017b8a2 (diff) | |
download | gcc-c6e36f05fbb081abb068958d8900ad34b303a70b.zip gcc-c6e36f05fbb081abb068958d8900ad34b303a70b.tar.gz gcc-c6e36f05fbb081abb068958d8900ad34b303a70b.tar.bz2 |
rs6000: Prefer assigning the MMA vector operands to altivec registers [PR105556]
When optimizing the DGEMM kernel in OpenBLAS to use MMA, the MMA code
uses all 8 accumulators, which overlap all vs0-vs31 vector registers.
Current trunk assigns one of the normal vector inputs to one of the MMA
instructions, which forces us to spill one of the accumulators to memory,
leading to poor performance. The solution here is to replace the "wa"
constraints for the vector input operands in the MMA instruction patterns
with "v,?wa" so that we prefer using the altivec registers vs32-vs63
over the vs0-vs31 registers.
2022-05-17 Peter Bergner <bergner@linux.ibm.com>
Segher Boessenkool <segher@kernel.crashing.org>
gcc/
PR target/105556
* config/rs6000/mma.md (mma_<vv>, mma_<avv>, mma_<pv>, mma_<apv>,
mma_<vvi4i4i8>, mma_<avvi4i4i8>, mma_<vvi4i4i2>, mma_<avvi4i4i2>,
mma_<vvi4i4>, mma_<avvi4i4>, mma_<pvi4i2>, mma_<apvi4i2>,
mma_<vvi4i4i4>, mma_<avvi4i4i4>): Replace "wa" constraints with "v,?wa".
Update other operands accordingly.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/rs6000/mma.md | 150 |
1 files changed, 75 insertions, 75 deletions
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 907c9d6..a183b6a 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -490,50 +490,50 @@ [(set_attr "type" "mma")]) (define_insn "mma_<vv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] "TARGET_MMA" "<vv> %A0,%x1,%x2" [(set_attr "type" "mma")]) (define_insn "mma_<avv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] "TARGET_MMA" "<avv> %A0,%x2,%x3" [(set_attr "type" "mma")]) (define_insn "mma_<pv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] "TARGET_MMA" "<pv> %A0,%x1,%x2" [(set_attr "type" "mma")]) (define_insn "mma_<apv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:OO 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] "TARGET_MMA" "<apv> %A0,%x2,%x3" [(set_attr "type" "mma")]) (define_insn "mma_<vvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "u8bit_cint_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "u8bit_cint_operand" "n,n")] MMA_VVI4I4I8))] "TARGET_MMA" "<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5" @@ -541,13 +541,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "u8bit_cint_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "u8bit_cint_operand" "n,n")] MMA_AVVI4I4I8))] "TARGET_MMA" "<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6" @@ -555,12 +555,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_VVI4I4I2))] "TARGET_MMA" "<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5" @@ -568,13 +568,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_3_operand" "n,n")] MMA_AVVI4I4I2))] "TARGET_MMA" "<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6" @@ -582,11 +582,11 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n")] MMA_VVI4I4))] "TARGET_MMA" "<vvi4i4> %A0,%x1,%x2,%3,%4" @@ -594,12 +594,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4))] "TARGET_MMA" "<avvi4i4> %A0,%x2,%x3,%4,%5" @@ -607,11 +607,11 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<pvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_3_operand" "n,n")] MMA_PVI4I2))] "TARGET_MMA" "<pvi4i2> %A0,%x1,%x2,%3,%4" @@ -619,12 +619,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<apvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:OO 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_3_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:OO 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_3_operand" "n,n")] MMA_APVI4I2))] "TARGET_MMA" "<apvi4i2> %A0,%x2,%x3,%4,%5" @@ -632,12 +632,12 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<vvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "wa") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:SI 3 "const_0_to_15_operand" "n") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:SI 3 "const_0_to_15_operand" "n,n") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n")] MMA_VVI4I4I4))] "TARGET_MMA" "<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5" @@ -645,13 +645,13 @@ (set_attr "prefixed" "yes")]) (define_insn "mma_<avvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0") - (match_operand:V16QI 2 "vsx_register_operand" "wa") - (match_operand:V16QI 3 "vsx_register_operand" "wa") - (match_operand:SI 4 "const_0_to_15_operand" "n") - (match_operand:SI 5 "const_0_to_15_operand" "n") - (match_operand:SI 6 "const_0_to_15_operand" "n")] + [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") + (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") + (match_operand:SI 4 "const_0_to_15_operand" "n,n") + (match_operand:SI 5 "const_0_to_15_operand" "n,n") + (match_operand:SI 6 "const_0_to_15_operand" "n,n")] MMA_AVVI4I4I4))] "TARGET_MMA" "<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6" |