diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2020-07-31 11:27:24 +0100 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2020-08-04 15:45:22 +0100 |
commit | 19fad46721af338b24aff48127ae654673bffa7b (patch) | |
tree | eb86e68fa4014f08e0a04d800877d3898a9e6b25 /gcc | |
parent | 843710c037c1396dcdbc23e5b6b456b8ae6e2b8b (diff) | |
download | gcc-19fad46721af338b24aff48127ae654673bffa7b.zip gcc-19fad46721af338b24aff48127ae654673bffa7b.tar.gz gcc-19fad46721af338b24aff48127ae654673bffa7b.tar.bz2 |
amdgcn: TImode shifts
Implement TImode shifts in the backend.
The middle-end support that does it for other architectures doesn't work for
GCN because BITS_PER_WORD==32, meaning that TImode is quad-word, not
double-word.
gcc/ChangeLog:
* config/gcn/gcn.md ("<expander>ti3"): New.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn.md | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 8cfb3a8..ed98d2d 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -1539,6 +1539,111 @@ (set_attr "length" "8")]) ;; }}} +;; {{{ ALU: generic 128-bit binop + +; TImode shifts can't be synthesized by the middle-end +(define_expand "<expander>ti3" + [(set (match_operand:TI 0 "register_operand") + (vec_and_scalar_nocom:TI + (match_operand:TI 1 "gcn_alu_operand") + (match_operand:SI 2 "gcn_alu_operand")))] + "" + { + rtx dest = operands[0]; + rtx src = operands[1]; + rtx shift = operands[2]; + + enum {ashr, lshr, ashl} shiftop = <expander>; + rtx (*inverse_shift_fn) (rtx, rtx, rtx) + = (shiftop == ashl ? gen_lshrdi3 : gen_ashldi3); + rtx (*logical_shift_fn) (rtx, rtx, rtx) + = (shiftop == ashl ? gen_ashldi3 : gen_lshrdi3); + + /* We shift "from" one subreg "to" the other, according to shiftop. */ + int from = (shiftop == ashl ? 0 : 8); + int to = (shiftop == ashl ? 8 : 0); + rtx destfrom = simplify_gen_subreg (DImode, dest, TImode, from); + rtx destto = simplify_gen_subreg (DImode, dest, TImode, to); + rtx srcfrom = simplify_gen_subreg (DImode, src, TImode, from); + rtx srcto = simplify_gen_subreg (DImode, src, TImode, to); + + int shiftval = (CONST_INT_P (shift) ? INTVAL (shift) : -1); + enum {RUNTIME, ZERO, SMALL, LARGE} shiftcomparison + = (!CONST_INT_P (shift) ? RUNTIME + : shiftval == 0 ? ZERO + : shiftval < 64 ? SMALL + : LARGE); + + rtx large_label, zero_label, exit_label; + + if (shiftcomparison == RUNTIME) + { + zero_label = gen_label_rtx (); + large_label = gen_label_rtx (); + exit_label = gen_label_rtx (); + + rtx cond = gen_rtx_EQ (VOIDmode, shift, const0_rtx); + emit_insn (gen_cbranchsi4 (cond, shift, const0_rtx, zero_label)); + + rtx sixtyfour = GEN_INT (64); + cond = gen_rtx_GE (VOIDmode, shift, sixtyfour); + emit_insn (gen_cbranchsi4 (cond, shift, sixtyfour, large_label)); + } + + if (shiftcomparison == SMALL || shiftcomparison == RUNTIME) + { + /* Shift both parts by the same amount, then patch in the bits that + cross the boundary. + This does *not* work for zero-length shifts. */ + rtx tmpto1 = gen_reg_rtx (DImode); + rtx tmpto2 = gen_reg_rtx (DImode); + emit_insn (gen_<expander>di3 (destfrom, srcfrom, shift)); + emit_insn (logical_shift_fn (tmpto1, srcto, shift)); + rtx lessershiftval = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (lessershiftval, GEN_INT (64), shift)); + emit_insn (inverse_shift_fn (tmpto2, srcfrom, lessershiftval)); + emit_insn (gen_iordi3 (destto, tmpto1, tmpto2)); + } + + if (shiftcomparison == RUNTIME) + { + emit_jump_insn (gen_jump (exit_label)); + emit_barrier (); + + emit_label (zero_label); + } + + if (shiftcomparison == ZERO || shiftcomparison == RUNTIME) + emit_move_insn (dest, src); + + if (shiftcomparison == RUNTIME) + { + emit_jump_insn (gen_jump (exit_label)); + emit_barrier (); + + emit_label (large_label); + } + + if (shiftcomparison == LARGE || shiftcomparison == RUNTIME) + { + /* Do the shift within one part, and set the other part appropriately. + Shifts of 128+ bits are an error. */ + rtx lessershiftval = gen_reg_rtx (SImode); + emit_insn (gen_subsi3 (lessershiftval, shift, GEN_INT (64))); + emit_insn (gen_<expander>di3 (destto, srcfrom, lessershiftval)); + if (shiftop == ashr) + emit_insn (gen_ashrdi3 (destfrom, srcfrom, GEN_INT (63))); + else + emit_move_insn (destfrom, const0_rtx); + } + + if (shiftcomparison == RUNTIME) + emit_label (exit_label); + + DONE; + }) + +;; }}} ;; {{{ Atomics ; Each compute unit has it's own L1 cache. The L2 cache is shared between |