aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2022-10-28 12:38:43 +0100
committerAndrew Stubbs <ams@codesourcery.com>2022-10-31 12:20:52 +0000
commitf539029c1ce6fb9163422d1a8b6ac12a2554eaa2 (patch)
tree137b983b21f3fa14ce7244b62d4978753f5623d7
parent12a1085644c6c5446eece41d255ca1fd569149d4 (diff)
downloadgcc-f539029c1ce6fb9163422d1a8b6ac12a2554eaa2.zip
gcc-f539029c1ce6fb9163422d1a8b6ac12a2554eaa2.tar.gz
gcc-f539029c1ce6fb9163422d1a8b6ac12a2554eaa2.tar.bz2
amdgcn: multi-size vector reductions
Add support for vector reductions for any vector width by switching iterators and generalising the code slightly. There's no one-instruction way to move an item from lane 31 to lane 0 (63, 15, 7, 3, and 1 are all fine though), and vec_extract is probably fewer cycles anyway, so now we always reduce to an SGPR. gcc/ChangeLog: * config/gcn/gcn-valu.md (V64_SI): Delete iterator. (V64_DI): Likewise. (V64_1REG): Likewise. (V64_INT_1REG): Likewise. (V64_2REG): Likewise. (V64_ALL): Likewise. (V64_FP): Likewise. (reduc_<reduc_op>_scal_<mode>): Use V_ALL. Use gen_vec_extract. (fold_left_plus_<mode>): Use V_FP. (*<reduc_op>_dpp_shr_<mode>): Use V_1REG. (*<reduc_op>_dpp_shr_<mode>): Use V_DI. (*plus_carry_dpp_shr_<mode>): Use V_INT_1REG. (*plus_carry_in_dpp_shr_<mode>): Use V_SI. (*plus_carry_dpp_shr_<mode>): Use V_DI. (mov_from_lane63_<mode>): Delete. (mov_from_lane63_<mode>): Delete. * config/gcn/gcn.cc (gcn_expand_reduc_scalar): Support partial vectors. * config/gcn/gcn.md (unspec): Remove UNSPEC_MOV_FROM_LANE63.
-rw-r--r--gcc/config/gcn/gcn-valu.md111
-rw-r--r--gcc/config/gcn/gcn.cc27
-rw-r--r--gcc/config/gcn/gcn.md1
3 files changed, 45 insertions, 94 deletions
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 00c0e3b..6274d2e 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -32,11 +32,6 @@
(define_mode_iterator V_DF
[V2DF V4DF V8DF V16DF V32DF V64DF])
-(define_mode_iterator V64_SI
- [V64SI])
-(define_mode_iterator V64_DI
- [V64DI])
-
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
[V2QI V2HI
@@ -77,13 +72,6 @@
V32HF V32SF
V64HF V64SF])
-; V64_* modes are for where more general support is unimplemented
-; (e.g. reductions)
-(define_mode_iterator V64_1REG
- [V64QI V64HI V64SI V64HF V64SF])
-(define_mode_iterator V64_INT_1REG
- [V64QI V64HI V64SI])
-
; Vector modes for two vector registers
(define_mode_iterator V_2REG
[V2DI V2DF
@@ -93,9 +81,6 @@
V32DI V32DF
V64DI V64DF])
-(define_mode_iterator V64_2REG
- [V64DI V64DF])
-
; Vector modes with native support
(define_mode_iterator V_noQI
[V2HI V2HF V2SI V2SF V2DI V2DF
@@ -158,11 +143,6 @@
V32HF V32SF V32DF
V64HF V64SF V64DF])
-(define_mode_iterator V64_ALL
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
-(define_mode_iterator V64_FP
- [V64HF V64SF V64DF])
-
(define_mode_attr scalar_mode
[(V2QI "qi") (V2HI "hi") (V2SI "si")
(V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
@@ -3528,15 +3508,16 @@
(define_expand "reduc_<reduc_op>_scal_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand")
(unspec:<SCALAR_MODE>
- [(match_operand:V64_ALL 1 "register_operand")]
+ [(match_operand:V_ALL 1 "register_operand")]
REDUC_UNSPEC))]
""
{
rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
<reduc_unspec>);
- /* The result of the reduction is in lane 63 of tmp. */
- emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
+ rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+ emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
+ last_lane));
DONE;
})
@@ -3547,7 +3528,7 @@
(define_expand "fold_left_plus_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
- (match_operand:V64_FP 2 "gcn_alu_operand")]
+ (match_operand:V_FP 2 "gcn_alu_operand")]
"can_create_pseudo_p ()
&& (flag_openacc || flag_openmp
|| flag_associative_math)"
@@ -3563,11 +3544,11 @@
})
(define_insn "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V64_1REG 0 "register_operand" "=v")
- (unspec:V64_1REG
- [(match_operand:V64_1REG 1 "register_operand" "v")
- (match_operand:V64_1REG 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_1REG 0 "register_operand" "=v")
+ (unspec:V_1REG
+ [(match_operand:V_1REG 1 "register_operand" "v")
+ (match_operand:V_1REG 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_UNSPEC))]
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
@@ -3580,11 +3561,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V64_DI 0 "register_operand" "=v")
- (unspec:V64_DI
- [(match_operand:V64_DI 1 "register_operand" "v")
- (match_operand:V64_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_DI 0 "register_operand" "=v")
+ (unspec:V_DI
+ [(match_operand:V_DI 1 "register_operand" "v")
+ (match_operand:V_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@@ -3609,10 +3590,10 @@
; Special cases for addition.
(define_insn "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v")
- (unspec:V64_INT_1REG
- [(match_operand:V64_INT_1REG 1 "register_operand" "v")
- (match_operand:V64_INT_1REG 2 "register_operand" "v")
+ [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
+ (unspec:V_INT_1REG
+ [(match_operand:V_INT_1REG 1 "register_operand" "v")
+ (match_operand:V_INT_1REG 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
@@ -3626,12 +3607,12 @@
(set_attr "length" "8")])
(define_insn "*plus_carry_in_dpp_shr_<mode>"
- [(set (match_operand:V64_SI 0 "register_operand" "=v")
- (unspec:V64_SI
- [(match_operand:V64_SI 1 "register_operand" "v")
- (match_operand:V64_SI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")
- (match_operand:DI 4 "register_operand" "cV")]
+ [(set (match_operand:V_SI 0 "register_operand" "=v")
+ (unspec:V_SI
+ [(match_operand:V_SI 1 "register_operand" "v")
+ (match_operand:V_SI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3644,11 +3625,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V64_DI 0 "register_operand" "=v")
- (unspec:V64_DI
- [(match_operand:V64_DI 1 "register_operand" "v")
- (match_operand:V64_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_DI 0 "register_operand" "=v")
+ (unspec:V_DI
+ [(match_operand:V_DI 1 "register_operand" "v")
+ (match_operand:V_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3675,38 +3656,6 @@
[(set_attr "type" "vmult")
(set_attr "length" "16")])
-; Instructions to move a scalar value from lane 63 of a vector register.
-(define_insn "mov_from_lane63_<mode>"
- [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
- (unspec:<SCALAR_MODE>
- [(match_operand:V64_1REG 1 "register_operand" " v,v")]
- UNSPEC_MOV_FROM_LANE63))]
- ""
- "@
- v_readlane_b32\t%0, %1, 63
- v_mov_b32\t%0, %1 wave_ror:1"
- [(set_attr "type" "vop3a,vop_dpp")
- (set_attr "exec" "none,*")
- (set_attr "length" "8")])
-
-(define_insn "mov_from_lane63_<mode>"
- [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
- (unspec:<SCALAR_MODE>
- [(match_operand:V64_2REG 1 "register_operand" " v,v")]
- UNSPEC_MOV_FROM_LANE63))]
- ""
- "@
- v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
- * if (REGNO (operands[0]) <= REGNO (operands[1])) \
- return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
- \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
- else \
- return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
- \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
- [(set_attr "type" "vop3a,vop_dpp")
- (set_attr "exec" "none,*")
- (set_attr "length" "8")])
-
;; }}}
;; {{{ Miscellaneous
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index a561976..b9d9170f 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4918,23 +4918,25 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
The vector register SRC of mode MODE is reduced using the operation given
by UNSPEC, and the scalar result is returned in lane 63 of a vector
- register. */
-/* FIXME: Implement reductions for sizes other than V64.
- (They're currently disabled in the machine description.) */
+ register (or lane 31, 15, 7, 3, 1 for partial vectors). */
rtx
gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
{
machine_mode orig_mode = mode;
+ machine_mode scalar_mode = GET_MODE_INNER (mode);
+ int vf = GET_MODE_NUNITS (mode);
bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
+ || unspec == UNSPEC_SMIN_DPP_SHR
|| unspec == UNSPEC_SMAX_DPP_SHR
|| unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR)
- && (mode == V64DImode
- || mode == V64DFmode))
+ && (scalar_mode == DImode
+ || scalar_mode == DFmode))
|| (unspec == UNSPEC_PLUS_DPP_SHR
- && mode == V64DFmode));
+ && scalar_mode == DFmode));
rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
+ : unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
: unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
: unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
: unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
@@ -4944,23 +4946,23 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
|| unspec == UNSPEC_SMAX_DPP_SHR
|| unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR)
- && (mode == V64QImode
- || mode == V64HImode));
+ && (scalar_mode == QImode
+ || scalar_mode == HImode));
bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR);
bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
- && (TARGET_GCN3 || mode == V64DImode);
+ && (TARGET_GCN3 || scalar_mode == DImode);
if (use_plus_carry)
unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
if (use_extends)
{
- rtx tmp = gen_reg_rtx (V64SImode);
+ mode = VnMODE (vf, SImode);
+ rtx tmp = gen_reg_rtx (mode);
convert_move (tmp, src, unsignedp);
src = tmp;
- mode = V64SImode;
}
/* Perform reduction by first performing the reduction operation on every
@@ -4968,7 +4970,8 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
iteration (thereby effectively reducing every 4 lanes) and so on until
all lanes are reduced. */
rtx in, out = force_reg (mode, src);
- for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
+ int iterations = exact_log2 (vf);
+ for (int i = 0, shift = 1; i < iterations; i++, shift <<= 1)
{
rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
in = out;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index a3c9523..6c1a438 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -78,7 +78,6 @@
UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
UNSPEC_MOV_DPP_SHR
- UNSPEC_MOV_FROM_LANE63
UNSPEC_GATHER
UNSPEC_SCATTER
UNSPEC_RCP