diff options
author | Julian Brown <julian@codesourcery.com> | 2021-06-29 03:57:31 -0700 |
---|---|---|
committer | Julian Brown <julian@codesourcery.com> | 2021-06-29 08:19:56 -0700 |
commit | 8f332122589f97b9c974b168ca5b0b186296f0e4 (patch) | |
tree | f21af58f4219a55bd1d0b5c762f2d8831fd18990 | |
parent | 53b1d1691857a1d3e28566d05bb434fa555c4e8a (diff) | |
download | gcc-8f332122589f97b9c974b168ca5b0b186296f0e4.zip gcc-8f332122589f97b9c974b168ca5b0b186296f0e4.tar.gz gcc-8f332122589f97b9c974b168ca5b0b186296f0e4.tar.bz2 |
amdgcn: Add [us]mulsid3/muldi3 patterns
This patch improves 64-bit multiplication for AMD GCN: patterns for
unsigned and signed 32x32->64 bit multiplication have been added, and
also 64x64->64 bit multiplication is now open-coded rather than calling
a library function (which may be a win for code size as well as speed:
the function calling sequence isn't particularly concise for GCN).
This version of the patch uses define_insn_and_split in order to keep
multiply operations together during RTL optimisations up to register
allocation: this appears to produce more compact code via inspection on
small test cases than the previous approach using an expander.
The DImode multiply implementation is lost from libgcc if we build it
for DImode/TImode rather than SImode/DImode, a change we make in a later
patch in this series.
2021-06-29 Julian Brown <julian@codesourcery.com>
gcc/
* config/gcn/gcn.md (<su>mulsidi3, <su>mulsidi3_reg, <su>mulsidi3_imm,
muldi3): Add patterns.
-rw-r--r-- | gcc/config/gcn/gcn.md | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index d1d4998..82f7a46 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1457,6 +1457,100 @@ (set_attr "length" "4,8,8") (set_attr "gcn_version" "gcn5,gcn5,*")]) +(define_expand "<su>mulsidi3" + [(set (match_operand:DI 0 "register_operand" "") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "")) + (any_extend:DI + (match_operand:SI 2 "nonmemory_operand" ""))))] + "" +{ + if (can_create_pseudo_p () + && !TARGET_GCN5 + && !gcn_inline_immediate_operand (operands[2], SImode)) + operands[2] = force_reg (SImode, operands[2]); + + if (REG_P (operands[2])) + emit_insn (gen_<su>mulsidi3_reg (operands[0], operands[1], operands[2])); + else + emit_insn (gen_<su>mulsidi3_imm (operands[0], operands[1], operands[2])); + + DONE; +}) + +(define_insn_and_split "<su>mulsidi3_reg" + [(set (match_operand:DI 0 "register_operand" "=&Sg, &v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "%Sg, v")) + (any_extend:DI + (match_operand:SI 2 "register_operand" "Sg,vSv"))))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,*")]) + +(define_insn_and_split "<su>mulsidi3_imm" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg,&v") + (mult:DI (any_extend:DI + (match_operand:SI 1 "register_operand" "Sg, Sg, v")) + (match_operand:DI 2 "gcn_32bit_immediate_operand" + "A, B, A")))] + "TARGET_GCN5 || gcn_inline_immediate_operand (operands[2], SImode)" + "#" + "&& reload_completed" + [(const_int 0)] + { + rtx dstlo = gen_lowpart (SImode, operands[0]); + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2])); + emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2])); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*")]) + +(define_insn_and_split "muldi3" + [(set (match_operand:DI 0 "register_operand" "=&Sg,&Sg, &v,&v") + (mult:DI (match_operand:DI 1 "register_operand" "%Sg, Sg, v, v") + (match_operand:DI 2 "nonmemory_operand" "Sg, i,vSv, A"))) + (clobber (match_scratch:SI 3 "=&Sg,&Sg,&v,&v")) + (clobber (match_scratch:BI 4 "=cs, cs, X, X")) + (clobber (match_scratch:DI 5 "=X, X,cV,cV"))] + "" + "#" + "reload_completed" + [(const_int 0)] + { + rtx tmp = operands[3]; + rtx dsthi = gen_highpart_mode (SImode, DImode, operands[0]); + rtx op1lo = gcn_operand_part (DImode, operands[1], 0); + rtx op1hi = gcn_operand_part (DImode, operands[1], 1); + rtx op2lo = gcn_operand_part (DImode, operands[2], 0); + rtx op2hi = gcn_operand_part (DImode, operands[2], 1); + emit_insn (gen_umulsidi3 (operands[0], op1lo, op2lo)); + emit_insn (gen_mulsi3 (tmp, op1lo, op2hi)); + rtx add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + rtx clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + rtx clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + emit_insn (gen_mulsi3 (tmp, op1hi, op2lo)); + add = gen_rtx_SET (dsthi, gen_rtx_PLUS (SImode, dsthi, tmp)); + clob1 = gen_rtx_CLOBBER (VOIDmode, operands[4]); + clob2 = gen_rtx_CLOBBER (VOIDmode, operands[5]); + add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2)); + emit_insn (add); + DONE; + } + [(set_attr "gcn_version" "gcn5,gcn5,*,*")]) + (define_insn "<u>mulhisi3" [(set (match_operand:SI 0 "register_operand" "=v") (mult:SI |