aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@baylibre.com>2025-06-12 16:58:33 +0000
committerAndrew Stubbs <ams@baylibre.com>2025-07-30 13:00:37 +0000
commit4ff3d0d40af181140ecad42fb23047e374967f4d (patch)
treeb0e462b57d1586fd2dc1719ceb879c1994440b39 /gcc
parent9ebe1b5d26fda4fffb014d55c84db4239a955e27 (diff)
downloadgcc-4ff3d0d40af181140ecad42fb23047e374967f4d.zip
gcc-4ff3d0d40af181140ecad42fb23047e374967f4d.tar.gz
gcc-4ff3d0d40af181140ecad42fb23047e374967f4d.tar.bz2
amdgcn: add DImode offsets for gather/scatter
Add new variant of he gather_load and scatter_store instructions that take the offsets in DImode. This is not the natural width for offsets in the instruction set, but we can use them to compute a vector of absolute addresses, which does work. This enables the autovectorizer to use gather/scatter in a number of additional scenarios (one of which shows up in the SPEC HPC lbm benchmark). gcc/ChangeLog: * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New. (scatter_store<mode><vndi>): New. (mask_gather_load<mode><vndi>): New. (mask_scatter_store<mode><vndi>): New. * config/gcn/gcn.cc (gcn_expand_scaled_offsets): Support DImode. (cherry picked from commit 351fa55c58a036f148d13bca972e687a0bacd113)
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/gcn/gcn-valu.md81
-rw-r--r--gcc/config/gcn/gcn.cc34
2 files changed, 103 insertions, 12 deletions
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index dfa6b15..3899117 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1133,6 +1133,23 @@
DONE;
})
+(define_expand "gather_load<mode><vndi>"
+ [(match_operand:V_MOV 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), NULL);
+
+ emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "gather<mode>_expr<exec>"
[(set (match_operand:V_MOV 0 "register_operand")
@@ -1259,6 +1276,23 @@
DONE;
})
+(define_expand "scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), NULL);
+
+ emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
; Allow any address expression
(define_expand "scatter<mode>_expr<exec_scatter>"
[(set (mem:BLK (scratch))
@@ -4222,6 +4256,32 @@
DONE;
})
+(define_expand "mask_gather_load<mode><vndi>"
+ [(set:V_MOV (match_operand:V_MOV 0 "register_operand")
+ (unspec:V_MOV
+ [(match_operand:DI 1 "register_operand")
+ (match_operand:<VnDI> 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "")
+ (match_operand:V_MOV 6 "maskload_else_operand")]
+ UNSPEC_GATHER))]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), exec);
+
+ emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx,
+ gcn_gen_undef (<MODE>mode),
+ exec));
+ DONE;
+ })
+
(define_expand "mask_scatter_store<mode><vnsi>"
[(match_operand:DI 0 "register_operand")
(match_operand:<VnSI> 1 "register_operand")
@@ -4250,6 +4310,27 @@
DONE;
})
+(define_expand "mask_scatter_store<mode><vndi>"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:<VnDI> 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:V_MOV 4 "register_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), exec);
+
+ emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx, exec));
+ DONE;
+ })
+
(define_code_iterator cond_op [plus minus mult])
(define_expand "cond_<expander><mode>"
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 702c7cc..5c0f764 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -2308,36 +2308,46 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
Return values.
ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses.
- ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */
+ ADDR_SPACE_GLOBAL - return VnSImode vector of offsets.
+ 64-bit offsets - return VnDImode vector of absolute addresses. */
rtx
gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale,
bool unsigned_p, rtx exec)
{
int vf = GET_MODE_NUNITS (GET_MODE (offsets));
- rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode));
- rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode));
+ rtx scaled_offsets = gen_reg_rtx (GET_MODE (offsets));
+ rtx abs_addr = gen_reg_rtx (VnMODE (vf, DImode));
+ bool use_di = GET_MODE_INNER (GET_MODE (scaled_offsets)) == DImode;
if (CONST_INT_P (scale)
&& INTVAL (scale) > 0
&& exact_log2 (INTVAL (scale)) >= 0)
- emit_insn (gen_ashlvNsi3 (tmpsi, offsets,
- GEN_INT (exact_log2 (INTVAL (scale))),
- NULL, exec));
+ emit_insn (gen_ashlvNm3 (scaled_offsets, offsets,
+ GEN_INT (exact_log2 (INTVAL (scale))),
+ NULL, exec));
else
- emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec));
+ emit_insn (gen_mulvNm3_dup (scaled_offsets, scale, offsets, NULL, exec));
+ /* No instructions support DImode offsets. */
+ if (use_di)
+ {
+ emit_insn (gen_addvNdi3_dup (abs_addr, base, scaled_offsets, NULL, exec));
+ return abs_addr;
+ }
/* "Global" instructions do not support negative register offsets. */
- if (as == ADDR_SPACE_FLAT || !unsigned_p)
+ else if (as == ADDR_SPACE_FLAT || !unsigned_p)
{
if (unsigned_p)
- emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec));
+ emit_insn (gen_addvNdi3_zext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
else
- emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec));
- return tmpdi;
+ emit_insn (gen_addvNdi3_sext_dup2 (abs_addr, scaled_offsets, base,
+ NULL, exec));
+ return abs_addr;
}
else if (as == ADDR_SPACE_GLOBAL)
- return tmpsi;
+ return scaled_offsets;
gcc_unreachable ();
}