diff options
author | Andrew Stubbs <ams@codesourcery.com> | 2023-03-01 15:32:50 +0000 |
---|---|---|
committer | Andrew Stubbs <ams@codesourcery.com> | 2023-03-23 11:00:54 +0000 |
commit | db80ccd34365c14e529111c94b93d3fb201b6eef (patch) | |
tree | 40155abe9aeb7e9f81e91ea15484c9fafc9e26bf /gcc | |
parent | 484c41c747d95f9cee15a33b75b32ae2e7eb45f3 (diff) | |
download | gcc-db80ccd34365c14e529111c94b93d3fb201b6eef.zip gcc-db80ccd34365c14e529111c94b93d3fb201b6eef.tar.gz gcc-db80ccd34365c14e529111c94b93d3fb201b6eef.tar.bz2 |
amdgcn: vec_extract no-op insns
Just using move insn for no-op conversions triggers special move handling in
IRA which declares that subreg of vectors aren't valid and routes everything
through memory. These patterns make the vec_select explicit and all is well.
gcc/ChangeLog:
* config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New.
* config/gcn/gcn-valu.md (V_1REG_ALT): New.
(V_2REG_ALT): New.
(vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop): New.
(vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop): New.
(vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Use new patterns.
* config/gcn/gcn.cc (gcn_stepped_zero_int_parallel_p): New.
* config/gcn/predicates.md (ascending_zero_int_parallel): New.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 61 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 18 | ||||
-rw-r--r-- | gcc/config/gcn/predicates.md | 7 |
4 files changed, 78 insertions, 9 deletions
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index d7862b2..287ce17 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -75,6 +75,7 @@ extern reg_class gcn_regno_reg_class (int regno); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); +extern bool gcn_stepped_zero_int_parallel_p (rtx op, int step); extern bool gcn_valid_move_p (machine_mode, rtx, rtx); extern rtx gcn_vec_constant (machine_mode, int); extern rtx gcn_vec_constant (machine_mode, rtx); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 787d770..334b6b0 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -49,6 +49,13 @@ V16QI V16HI V16SI V16HF V16SF V32QI V32HI V32SI V32HF V32SF V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V_1REG_ALT + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG [V2QI V2HI V2SI @@ -80,6 +87,13 @@ V16DI V16DF V32DI V32DF V64DI V64DF]) +(define_mode_iterator V_2REG_ALT + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI @@ -788,11 +802,36 @@ (set_attr "exec" "none") (set_attr "laneselect" "yes")]) +(define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop" + [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_1REG_ALT + (match_operand:V_1REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (<V_1REG_ALT:MODE>mode) < MODE_VF (<V_1REG:MODE>mode) + && <V_1REG_ALT:SCALAR_MODE>mode == <V_1REG:SCALAR_MODE>mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + +(define_insn "vec_extract<V_2REG:mode><V_2REG_ALT:mode>_nop" + [(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v") + (vec_select:V_2REG_ALT + (match_operand:V_2REG 1 "register_operand" " 0,v") + (match_operand 2 "ascending_zero_int_parallel" "")))] + "MODE_VF (<V_2REG_ALT:MODE>mode) < MODE_VF (<V_2REG:MODE>mode) + && <V_2REG_ALT:SCALAR_MODE>mode == <V_2REG:SCALAR_MODE>mode" + "@ + ; in-place extract %0 + v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" + [(set_attr "type" "vmult") + (set_attr "length" "0,8")]) + (define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>" - [(set (match_operand:V_ALL_ALT 0 "register_operand") - (vec_select:V_ALL_ALT - (match_operand:V_ALL 1 "register_operand") - (parallel [(match_operand 2 "immediate_operand")])))] + [(match_operand:V_ALL_ALT 0 "register_operand") + (match_operand:V_ALL 1 "register_operand") + (match_operand 2 "immediate_operand")] "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode) && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode" { @@ -802,8 +841,12 @@ if (firstlane == 0) { - /* A plain move will do. */ - tmp = operands[1]; + rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode, + rtvec_alloc (numlanes)); + for (int i = 0; i < numlanes; i++) + XVECEXP (parallel, 0, i) = GEN_INT (i); + emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop + (operands[0], operands[1], parallel)); } else { /* FIXME: optimize this by using DPP where available. */ @@ -815,10 +858,10 @@ tmp = gen_reg_rtx (<V_ALL:MODE>mode); emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1], get_exec (<V_ALL:MODE>mode))); - } - emit_move_insn (operands[0], - gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0)); + emit_move_insn (operands[0], + gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0)); + } DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index aca17a1..5bf88e9 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -1422,6 +1422,24 @@ CODE_FOR_OP (reload_out) #undef CODE_FOR_OP #undef CODE_FOR +/* Return true if OP is a PARALLEL of CONST_INTs that form a linear + series with step STEP. */ + +bool +gcn_stepped_zero_int_parallel_p (rtx op, int step) +{ + if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0))) + return false; + + unsigned HOST_WIDE_INT base = 0; + for (int i = 0; i < XVECLEN (op, 0); ++i) + if (!CONST_INT_P (XVECEXP (op, 0, i)) + || UINTVAL (XVECEXP (op, 0, i)) != base + i * step) + return false; + + return true; +} + /* }}} */ /* {{{ Addresses, pointers and moves. */ diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md index a20acf7..5554a06 100644 --- a/gcc/config/gcn/predicates.md +++ b/gcc/config/gcn/predicates.md @@ -197,3 +197,10 @@ (ior (match_operand 0 "gcn_ds_memory_operand") (and (match_code "unspec") (match_test "XINT (op, 1) == UNSPEC_VECTOR"))))) + +(define_predicate "ascending_zero_int_parallel" + (match_code "parallel") +{ + return gcn_stepped_zero_int_parallel_p (op, 1); +}) + |