aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-03-01 15:32:50 +0000
committerAndrew Stubbs <ams@codesourcery.com>2023-03-17 13:03:00 +0000
commit61453f32d28c60bb0f996a6d60b2fdc52e67c093 (patch)
tree06cb5fcc348272a0c666dc376fcb421f23d0c6ef
parentc64e8a52af5f05d03b6d68e2fe6deb60f71adc6c (diff)
downloadgcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.zip
gcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.tar.gz
gcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.tar.bz2
amdgcn: vec_extract no-op insns
Just using move insn for no-op conversions triggers special move handling in IRA which declares that subreg of vectors aren't valid and routes everything through memory. These patterns make the vec_select explicit and all is well. gcc/ChangeLog: * config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New. * config/gcn/gcn-valu.md (V_1REG_ALT): New. (V_2REG_ALT): New. (vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop): New. (vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop): New. (vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Use new patterns. * config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New. * config/gcn/predicates.md (ascending_zero_int_parallel): New.
-rw-r--r--gcc/ChangeLog.omp11
-rw-r--r--gcc/config/gcn/gcn-protos.h1
-rw-r--r--gcc/config/gcn/gcn-valu.md61
-rw-r--r--gcc/config/gcn/gcn.cc18
-rw-r--r--gcc/config/gcn/predicates.md7
5 files changed, 89 insertions, 9 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 8326798..6d88f48 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,14 @@
+2023-03-17 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New.
+ * config/gcn/gcn-valu.md (V_1REG_ALT): New.
+ (V_2REG_ALT): New.
+ (vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop): New.
+ (vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop): New.
+ (vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Use new patterns.
+ * config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New.
+ * config/gcn/predicates.md (ascending_zero_int_parallel): New.
+
2023-03-06 Paul-Antoine Arras <pa@codesourcery.com>
Backported from master:
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index f9a1fc0..3625a58 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -70,6 +70,7 @@ extern reg_class gcn_regno_reg_class (int regno);
extern bool gcn_scalar_flat_address_p (rtx);
extern bool gcn_scalar_flat_mem_p (rtx);
extern bool gcn_sgpr_move_p (rtx, rtx);
+extern bool gcn_stepped_zero_int_parallel_p (rtx op, int step);
extern bool gcn_valid_move_p (machine_mode, rtx, rtx);
extern rtx gcn_vec_constant (machine_mode, int);
extern rtx gcn_vec_constant (machine_mode, rtx);
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 35c1087..1bfd358 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -49,6 +49,13 @@
V16QI V16HI V16SI V16HF V16SF
V32QI V32HI V32SI V32HF V32SF
V64QI V64HI V64SI V64HF V64SF])
+(define_mode_iterator V_1REG_ALT
+ [V2QI V2HI V2SI V2HF V2SF
+ V4QI V4HI V4SI V4HF V4SF
+ V8QI V8HI V8SI V8HF V8SF
+ V16QI V16HI V16SI V16HF V16SF
+ V32QI V32HI V32SI V32HF V32SF
+ V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_INT_1REG
[V2QI V2HI V2SI
@@ -80,6 +87,13 @@
V16DI V16DF
V32DI V32DF
V64DI V64DF])
+(define_mode_iterator V_2REG_ALT
+ [V2DI V2DF
+ V4DI V4DF
+ V8DI V8DF
+ V16DI V16DF
+ V32DI V32DF
+ V64DI V64DF])
; Vector modes with native support
(define_mode_iterator V_noQI
@@ -788,11 +802,36 @@
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
+(define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
+ [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
+ (vec_select:V_1REG_ALT
+ (match_operand:V_1REG 1 "register_operand" " 0,v")
+ (match_operand 2 "ascending_zero_int_parallel" "")))]
+ "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode)
+ && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode"
+ "@
+ ; in-place extract %0
+ v_mov_b32\t%L0, %L1"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "0,8")])
+
+(define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop"
+ [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
+ (vec_select:V_2REG_ALT
+ (match_operand:V_2REG 1 "register_operand" " 0,v")
+ (match_operand 2 "ascending_zero_int_parallel" "")))]
+ "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode)
+ && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode"
+ "@
+ ; in-place extract %0
+ v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "0,8")])
+
(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
- [(set (match_operand:V_ALL_ALT 0 "register_operand")
- (vec_select:V_ALL_ALT
- (match_operand:V_ALL 1 "register_operand")
- (parallel [(match_operand 2 "immediate_operand")])))]
+ [(match_operand:V_ALL_ALT 0 "register_operand")
+ (match_operand:V_ALL 1 "register_operand")
+ (match_operand 2 "immediate_operand")]
"MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
&& <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
{
@@ -802,8 +841,12 @@
if (firstlane == 0)
{
- /* A plain move will do. */
- tmp = operands[1];
+ rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode,
+ rtvec_alloc (numlanes));
+ for (int i = 0; i < numlanes; i++)
+ XVECEXP (parallel, 0, i) = GEN_INT (i);
+ emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop
+ (operands[0], operands[1], parallel));
} else {
/* FIXME: optimize this by using DPP where available. */
@@ -815,10 +858,10 @@
tmp = gen_reg_rtx (<V_ALL:MODE>mode);
emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
get_exec (<V_ALL:MODE>mode)));
- }
- emit_move_insn (operands[0],
- gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
+ emit_move_insn (operands[0],
+ gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
+ }
DONE;
})
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index a0c55fb..7403dfe 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -1433,6 +1433,24 @@ CODE_FOR_OP (reload_out)
#undef CODE_FOR_OP
#undef CODE_FOR
+/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
+ series with step STEP. */
+
+bool
+gcn_stepped_zero_int_parallel_p (rtx op, int step)
+{
+ if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
+ return false;
+
+ unsigned HOST_WIDE_INT base = 0;
+ for (int i = 0; i < XVECLEN (op, 0); ++i)
+ if (!CONST_INT_P (XVECEXP (op, 0, i))
+ || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
+ return false;
+
+ return true;
+}
+
/* }}} */
/* {{{ Addresses, pointers and moves. */
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index 030c280..e567fed 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -197,3 +197,10 @@
(ior (match_operand 0 "gcn_ds_memory_operand")
(and (match_code "unspec")
(match_test "XINT (op, 1) == UNSPEC_VECTOR")))))
+
+(define_predicate "ascending_zero_int_parallel"
+ (match_code "parallel")
+{
+ return gcn_stepped_zero_int_parallel_p (op, 1);
+})
+