diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2023-03-01 15:32:50 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-03-17 13:03:00 +0000 |
commit | 61453f32d28c60bb0f996a6d60b2fdc52e67c093 (patch) | |
tree | 06cb5fcc348272a0c666dc376fcb421f23d0c6ef | |
parent | c64e8a52af5f05d03b6d68e2fe6deb60f71adc6c (diff) | |
download | gcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.zip gcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.tar.gz gcc-61453f32d28c60bb0f996a6d60b2fdc52e67c093.tar.bz2 |
amdgcn: vec_extract no-op insns
Just using move insn for no-op conversions triggers special move handling in
IRA which declares that subreg of vectors aren't valid and routes everything
through memory. These patterns make the vec_select explicit and all is well.
gcc/ChangeLog:
* config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New.
* config/gcn/gcn-valu.md (V_1REG_ALT): New.
(V_2REG_ALT): New.
(vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop): New.
(vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop): New.
(vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Use new patterns.
* config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New.
* config/gcn/predicates.md (ascending_zero_int_parallel): New.
-rw-r--r-- | gcc/ChangeLog.omp | 11 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 61 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 18 | ||||
-rw-r--r-- | gcc/config/gcn/predicates.md | 7 |
5 files changed, 89 insertions, 9 deletions
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index 8326798..6d88f48 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,14 @@ +2023-03-17 Andrew Stubbs <ams@codesourcery.com> + + * config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New. + * config/gcn/gcn-valu.md (V_1REG_ALT): New. + (V_2REG_ALT): New. + (vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop): New. + (vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop): New. + (vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Use new patterns. + * config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New. + * config/gcn/predicates.md (ascending_zero_int_parallel): New. + 2023-03-06 Paul-Antoine Arras <pa@codesourcery.com> Backported from master: diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index f9a1fc0..3625a58 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -70,6 +70,7 @@ extern reg_class gcn_regno_reg_class (int regno); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); +extern bool gcn_stepped_zero_int_parallel_p (rtx op, int step); extern bool gcn_valid_move_p (machine_mode, rtx, rtx); extern rtx gcn_vec_constant (machine_mode, int); extern rtx gcn_vec_constant (machine_mode, rtx); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 35c1087..1bfd358 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -49,6 +49,13 @@ V16QI V16HI V16SI V16HF V16SF V32QI V32HI V32SI V32HF V32SF V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V_1REG_ALT + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG [V2QI V2HI V2SI @@ -80,6 +87,13 @@ V16DI V16DF V32DI V32DF V64DI V64DF]) +(define_mode_iterator V_2REG_ALT + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI @@ -788,11 +802,36 @@ (set_attr "exec" "none") (set_attr "laneselect" "yes")]) +(define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop" + [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_1REG_ALT + (match_operand:V_1REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode) + && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + +(define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop" + [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_2REG_ALT + (match_operand:V_2REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode) + && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + (define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>" - [(set (match_operand:V_ALL_ALT 0 "register_operand") - (vec_select:V_ALL_ALT - (match_operand:V_ALL 1 "register_operand") - (parallel [(match_operand 2 "immediate_operand")])))] + [(match_operand:V_ALL_ALT 0 "register_operand") + (match_operand:V_ALL 1 "register_operand") + (match_operand 2 "immediate_operand")] "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode) && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode" { @@ -802,8 +841,12 @@ if (firstlane == 0) { - /* A plain move will do. */ - tmp = operands[1]; + rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode, + rtvec_alloc (numlanes)); + for (int i = 0; i < numlanes; i++) + XVECEXP (parallel, 0, i) = GEN_INT (i); + emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop + (operands[0], operands[1], parallel)); } else { /* FIXME: optimize this by using DPP where available. */ @@ -815,10 +858,10 @@ tmp = gen_reg_rtx (<V_ALL:MODE>mode); emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1], get_exec (<V_ALL:MODE>mode))); - } - emit_move_insn (operands[0], - gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0)); + emit_move_insn (operands[0], + gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0)); + } DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index a0c55fb..7403dfe 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1433,6 +1433,24 @@ CODE_FOR_OP (reload_out) #undef CODE_FOR_OP #undef CODE_FOR +/* Return true if OP is a PARALLEL of CONST_INTs that form a linear + series with step STEP. */ + +bool +gcn_stepped_zero_int_parallel_p (rtx op, int step) +{ + if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0))) + return false; + + unsigned HOST_WIDE_INT base = 0; + for (int i = 0; i < XVECLEN (op, 0); ++i) + if (!CONST_INT_P (XVECEXP (op, 0, i)) + || UINTVAL (XVECEXP (op, 0, i)) != base + i * step) + return false; + + return true; +} + /* }}} */ /* {{{ Addresses, pointers and moves. */ diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md index 030c280..e567fed 100644 --- a/gcc/config/gcn/predicates.md +++ b/gcc/config/gcn/predicates.md @@ -197,3 +197,10 @@ (ior (match_operand 0 "gcn_ds_memory_operand") (and (match_code "unspec") (match_test "XINT (op, 1) == UNSPEC_VECTOR"))))) + +(define_predicate "ascending_zero_int_parallel" + (match_code "parallel") +{ + return gcn_stepped_zero_int_parallel_p (op, 1); +}) + |