;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
;; This file is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3 of the License, or (at your option)
;; any later version.
;; This file is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; {{{ Vector iterators
; SV iterators include both scalar and vector modes.
; Vector modes for specific types
(define_mode_iterator V_QI
[V2QI V4QI V8QI V16QI V32QI V64QI])
(define_mode_iterator V_HI
[V2HI V4HI V8HI V16HI V32HI V64HI])
(define_mode_iterator V_HF
[V2HF V4HF V8HF V16HF V32HF V64HF])
(define_mode_iterator V_SI
[V2SI V4SI V8SI V16SI V32SI V64SI])
(define_mode_iterator V_SF
[V2SF V4SF V8SF V16SF V32SF V64SF])
(define_mode_iterator V_DI
[V2DI V4DI V8DI V16DI V32DI V64DI])
(define_mode_iterator V_DF
[V2DF V4DF V8DF V16DF V32DF V64DF])
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
[V2QI V2HI
V4QI V4HI
V8QI V8HI
V16QI V16HI
V32QI V32HI
V64QI V64HI])
; Vector modes for one vector register
(define_mode_iterator V_1REG
[V2QI V2HI V2SI V2HF V2SF
V4QI V4HI V4SI V4HF V4SF
V8QI V8HI V8SI V8HF V8SF
V16QI V16HI V16SI V16HF V16SF
V32QI V32HI V32SI V32HF V32SF
V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_1REG_ALT
[V2QI V2HI V2SI V2HF V2SF
V4QI V4HI V4SI V4HF V4SF
V8QI V8HI V8SI V8HF V8SF
V16QI V16HI V16SI V16HF V16SF
V32QI V32HI V32SI V32HF V32SF
V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator V_INT_1REG
[V2QI V2HI V2SI
V4QI V4HI V4SI
V8QI V8HI V8SI
V16QI V16HI V16SI
V32QI V32HI V32SI
V64QI V64HI V64SI])
(define_mode_iterator V_INT_1REG_ALT
[V2QI V2HI V2SI
V4QI V4HI V4SI
V8QI V8HI V8SI
V16QI V16HI V16SI
V32QI V32HI V32SI
V64QI V64HI V64SI])
(define_mode_iterator V_FP_1REG
[V2HF V2SF
V4HF V4SF
V8HF V8SF
V16HF V16SF
V32HF V32SF
V64HF V64SF])
; Vector modes for two vector registers
(define_mode_iterator V_2REG
[V2DI V2DF
V4DI V4DF
V8DI V8DF
V16DI V16DF
V32DI V32DF
V64DI V64DF])
(define_mode_iterator V_2REG_ALT
[V2DI V2DF
V4DI V4DF
V8DI V8DF
V16DI V16DF
V32DI V32DF
V64DI V64DF])
; Vector modes for four vector registers
(define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
(define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
; Vector modes with native support
(define_mode_iterator V_noQI
[V2HI V2HF V2SI V2SF V2DI V2DF
V4HI V4HF V4SI V4SF V4DI V4DF
V8HI V8HF V8SI V8SF V8DI V8DF
V16HI V16HF V16SI V16SF V16DI V16DF
V32HI V32HF V32SI V32SF V32DI V32DF
V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_noHI
[V2HF V2SI V2SF V2DI V2DF
V4HF V4SI V4SF V4DI V4DF
V8HF V8SI V8SF V8DI V8DF
V16HF V16SI V16SF V16DI V16DF
V32HF V32SI V32SF V32DI V32DF
V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT_noQI
[V2HI V2SI V2DI
V4HI V4SI V4DI
V8HI V8SI V8DI
V16HI V16SI V16DI
V32HI V32SI V32DI
V64HI V64SI V64DI])
(define_mode_iterator V_INT_noHI
[V2SI V2DI
V4SI V4DI
V8SI V8DI
V16SI V16DI
V32SI V32DI
V64SI V64DI])
(define_mode_iterator SV_SFDF
[SF DF
V2SF V2DF
V4SF V4DF
V8SF V8DF
V16SF V16DF
V32SF V32DF
V64SF V64DF])
; All modes in which we want to do more than just moves.
(define_mode_iterator V_ALL
[V2QI V2HI V2HF V2SI V2SF V2DI V2DF
V4QI V4HI V4HF V4SI V4SF V4DI V4DF
V8QI V8HI V8HF V8SI V8SF V8DI V8DF
V16QI V16HI V16HF V16SI V16SF V16DI V16DF
V32QI V32HI V32HF V32SI V32SF V32DI V32DF
V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_ALL_ALT
[V2QI V2HI V2HF V2SI V2SF V2DI V2DF
V4QI V4HI V4HF V4SI V4SF V4DI V4DF
V8QI V8HI V8HF V8SI V8SF V8DI V8DF
V16QI V16HI V16HF V16SI V16SF V16DI V16DF
V32QI V32HI V32HF V32SI V32SF V32DI V32DF
V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
(define_mode_iterator V_INT
[V2QI V2HI V2SI V2DI
V4QI V4HI V4SI V4DI
V8QI V8HI V8SI V8DI
V16QI V16HI V16SI V16DI
V32QI V32HI V32SI V32DI
V64QI V64HI V64SI V64DI])
(define_mode_iterator V_FP
[V2HF V2SF V2DF
V4HF V4SF V4DF
V8HF V8SF V8DF
V16HF V16SF V16DF
V32HF V32SF V32DF
V64HF V64SF V64DF])
(define_mode_iterator SV_FP
[HF SF DF
V2HF V2SF V2DF
V4HF V4SF V4DF
V8HF V8SF V8DF
V16HF V16SF V16DF
V32HF V32SF V32DF
V64HF V64SF V64DF])
; All modes that need moves, including those without many insns.
(define_mode_iterator V_MOV
[V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
(define_mode_iterator V_MOV_ALT
[V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
(define_mode_attr scalar_mode
[(QI "qi") (HI "hi") (SI "si") (TI "ti")
(HF "hf") (SF "sf") (DI "di") (DF "df")
(V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
(V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
(V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
(V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
(V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
(V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
(V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
(V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
(V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
(V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
(V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
[(QI "QI") (HI "HI") (SI "SI") (TI "TI")
(HF "HF") (SF "SF") (DI "DI") (DF "DF")
(V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
(V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
(V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
(V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
(V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
(V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
(V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
(V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
(V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
(V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
(V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
(define_mode_attr vnsi
[(QI "si") (HI "si") (SI "si") (TI "si")
(HF "si") (SF "si") (DI "si") (DF "si")
(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
(V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
(V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
(V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
(V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
(V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
(V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
(V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
(V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
(V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
(V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
(define_mode_attr VnSI
[(QI "SI") (HI "SI") (SI "SI") (TI "SI")
(HF "SI") (SF "SI") (DI "SI") (DF "SI")
(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
(V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
(V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
(V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
(V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
(V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
(V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
(V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
(V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
(V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
(V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
(define_mode_attr vndi
[(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
(V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
(V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
(V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
(V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
(V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
(V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
(V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
(V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
(V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
(V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
(define_mode_attr VnDI
[(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
(V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
(V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
(V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
(V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
(V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
(V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
(V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
(V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
(V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
(V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
(define_mode_attr sdwa
[(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
(V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD")
(V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD")
(V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD")
(V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD")
(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
;; }}}
;; {{{ Substitutions
(define_subst_attr "exec" "vec_merge"
"" "_exec")
(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
"" "_exec")
(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
"" "_exec")
(define_subst_attr "exec_scatter" "scatter_store"
"" "_exec")
(define_subst "vec_merge"
[(set (match_operand:V_MOV 0)
(match_operand:V_MOV 1))]
""
[(set (match_dup 0)
(vec_merge:V_MOV
(match_dup 1)
(match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
(define_subst "vec_merge_with_clobber"
[(set (match_operand:V_MOV 0)
(match_operand:V_MOV 1))
(clobber (match_operand 2))]
""
[(set (match_dup 0)
(vec_merge:V_MOV
(match_dup 1)
(match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))
(clobber (match_dup 2))])
(define_subst "vec_merge_with_vcc"
[(set (match_operand:V_MOV 0)
(match_operand:V_MOV 1))
(set (match_operand:DI 2)
(match_operand:DI 3))]
""
[(parallel
[(set (match_dup 0)
(vec_merge:V_MOV
(match_dup 1)
(match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 5 "gcn_exec_reg_operand" "e")))
(set (match_dup 2)
(and:DI (match_dup 3)
(reg:DI EXEC_REG)))])])
(define_subst "scatter_store"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand 0)
(match_operand 1)
(match_operand 2)
(match_operand 3)]
UNSPEC_SCATTER))]
""
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 0)
(match_dup 1)
(match_dup 2)
(match_dup 3)
(match_operand:DI 4 "gcn_exec_reg_operand" "e")]
UNSPEC_SCATTER))])
;; }}}
;; {{{ Vector moves
; This is the entry point for all vector register moves. Memory accesses can
; come this way also, but will more usually use the reload_in/out,
; gather/scatter, maskload/store, etc.
(define_expand "mov"
[(set (match_operand:V_MOV 0 "nonimmediate_operand")
(match_operand:V_MOV 1 "general_operand"))]
""
{
/* Bitwise reinterpret casts via SUBREG don't work with GCN vector
registers, but we can convert the MEM to a mode that does work. */
if (MEM_P (operands[0]) && !SUBREG_P (operands[0])
&& SUBREG_P (operands[1])
&& GET_MODE_SIZE (GET_MODE (operands[1]))
== GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))))
{
rtx src = SUBREG_REG (operands[1]);
rtx mem = copy_rtx (operands[0]);
PUT_MODE_RAW (mem, GET_MODE (src));
emit_move_insn (mem, src);
DONE;
}
if (MEM_P (operands[1]) && !SUBREG_P (operands[1])
&& SUBREG_P (operands[0])
&& GET_MODE_SIZE (GET_MODE (operands[0]))
== GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0]))))
{
rtx dest = SUBREG_REG (operands[0]);
rtx mem = copy_rtx (operands[1]);
PUT_MODE_RAW (mem, GET_MODE (dest));
emit_move_insn (dest, mem);
DONE;
}
/* SUBREG of MEM is not supported. */
gcc_assert ((!SUBREG_P (operands[0])
|| !MEM_P (SUBREG_REG (operands[0])))
&& (!SUBREG_P (operands[1])
|| !MEM_P (SUBREG_REG (operands[1]))));
if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
{
operands[1] = force_reg (mode, operands[1]);
rtx scratch = gen_rtx_SCRATCH (mode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
scratch);
emit_insn (gen_scatter_expr (expr, operands[1], a, v));
DONE;
}
else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
{
rtx scratch = gen_rtx_SCRATCH (mode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
scratch);
emit_insn (gen_gather_expr (operands[0], expr, a, v));
DONE;
}
else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
{
gcc_assert (!reload_completed);
rtx scratch = gen_reg_rtx (mode);
emit_insn (gen_mov_sgprbase (operands[0], operands[1], scratch));
DONE;
}
})
; A pseudo instruction that helps LRA use the "U0" constraint.
(define_insn "mov_unspec"
[(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
(match_operand:V_MOV 1 "gcn_unspec_operand" " U"))]
""
""
[(set_attr "type" "unknown")
(set_attr "length" "0")])
(define_insn "*mov"
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
(match_operand:V_1REG 1 "general_operand"))]
""
{@ [cons: =0, 1; attrs: type, length, cdna]
[v ,vA;vop1 ,4,* ] v_mov_b32\t%0, %1
[v ,B ;vop1 ,8,* ] ^
[v ,a ;vop3p_mai,8,* ] v_accvgpr_read_b32\t%0, %1
[$a ,v ;vop3p_mai,8,* ] v_accvgpr_write_b32\t%0, %1
[a ,a ;vop1 ,4,cdna2] v_accvgpr_mov_b32\t%0, %1
})
(define_insn "mov_exec"
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
(vec_merge:V_1REG
(match_operand:V_1REG 1 "general_operand")
(match_operand:V_1REG 2 "gcn_alu_or_unspec_operand")
(match_operand:DI 3 "register_operand")))
(clobber (match_scratch: 4))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{@ [cons: =0, 1, 2, 3, =4; attrs: type, length]
[v,vA,U0,e ,X ;vop1 ,4 ] v_mov_b32\t%0, %1
[v,B ,U0,e ,X ;vop1 ,8 ] v_mov_b32\t%0, %1
[v,v ,vA,cV,X ;vop2 ,4 ] v_cndmask_b32\t%0, %2, %1, vcc
[v,vA,vA,Sv,X ;vop3a,8 ] v_cndmask_b32\t%0, %2, %1, %3
[v,m ,U0,e ,&v;* ,16] #
[m,v ,U0,e ,&v;* ,16] #
})
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
; (vec_merge:V_1REG
; (match_operand:V_1REG 1 "general_operand" "vA,B, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
; (clobber (match_scratch: 3 "=X,X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; v_mov_b32\t%0, %1
; v_mov_b32\t%0, %1
; #
; #"
; [(set_attr "type" "vop1,vop1,*,*")
; (set_attr "length" "4,8,16,16")])
(define_insn "*mov"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v,$a,a")
(match_operand:V_2REG 1 "general_operand" "vDB,a, v,a"))]
""
"@
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
else \
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_accvgpr_read_b32\t%L0, %L1\;v_accvgpr_read_b32\t%H0, %H1\"; \
else \
return \"v_accvgpr_read_b32\t%H0, %H1\;v_accvgpr_read_b32\t%L0, %L1\";
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\"; \
else \
return \"v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%L0, %L1\";
* if (REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_accvgpr_mov_b32\t%L0, %L1\;v_accvgpr_mov_b32\t%H0, %H1\"; \
else \
return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
[(set_attr "type" "vmult,vmult,vmult,vmult")
(set_attr "length" "16,16,16,8")
(set_attr "cdna" "*,*,*,cdna2")])
(define_insn "mov_exec"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, v, v, m")
(vec_merge:V_2REG
(match_operand:V_2REG 1 "general_operand" "vDB, v0, v0, m, v")
(match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
" U0,vDA0,vDA0,U0,U0")
(match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
(clobber (match_scratch: 4 "= X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
case 1:
return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
"v_cndmask_b32\t%H0, %H2, %H1, vcc";
case 2:
return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
"v_cndmask_b32\t%H0, %H2, %H1, %3";
}
else
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
case 1:
return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
"v_cndmask_b32\t%L0, %L2, %L1, vcc";
case 2:
return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
"v_cndmask_b32\t%L0, %L2, %L1, %3";
}
return "#";
}
[(set_attr "type" "vmult,vmult,vmult,*,*")
(set_attr "length" "16,16,16,16,16")])
(define_insn "*mov_4reg"
[(set (match_operand:V_4REG 0 "nonimmediate_operand")
(match_operand:V_4REG 1 "general_operand"))]
""
{@ [cons: =0, 1; attrs: type, length, cdna]
[v ,vDB;vmult,16,* ] v_mov_b32\t%L0, %L1\; v_mov_b32\t%H0, %H1\; v_mov_b32\t%J0, %J1\; v_mov_b32\t%K0, %K1
[v ,a ;vmult,32,* ] v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
[$a,v ;vmult,32,* ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
[a ,a ;vmult,32,cdna2] v_accvgpr_mov_b32\t%L0, %L1\; v_accvgpr_mov_b32\t%H0, %H1\; v_accvgpr_mov_b32\t%J0, %J1\; v_accvgpr_mov_b32\t%K0, %K1
})
(define_insn "mov_exec"
[(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, v, v, m")
(vec_merge:V_4REG
(match_operand:V_4REG 1 "general_operand" "vDB, v0, v0, m, v")
(match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
" U0,vDA0,vDA0,U0,U0")
(match_operand:DI 3 "register_operand" " e, cV, Sv, e, e")))
(clobber (match_scratch: 4 "= X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
"v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
case 1:
return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
"v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
"v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
"v_cndmask_b32\t%K0, %K2, %K1, vcc";
case 2:
return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
"v_cndmask_b32\t%H0, %H2, %H1, %3\;"
"v_cndmask_b32\t%J0, %J2, %J1, %3\;"
"v_cndmask_b32\t%K0, %K2, %K1, %3";
}
else
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
"v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
case 1:
return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
"v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
"v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
"v_cndmask_b32\t%K0, %K2, %K1, vcc";
case 2:
return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
"v_cndmask_b32\t%L0, %L2, %L1, %3\;"
"v_cndmask_b32\t%J0, %J2, %J1, %3\;"
"v_cndmask_b32\t%K0, %K2, %K1, %3";
}
return "#";
}
[(set_attr "type" "vmult,vmult,vmult,*,*")
(set_attr "length" "32")])
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
; (vec_merge:V_2REG
; (match_operand:V_2REG 1 "general_operand" "vDB, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
; (clobber (match_scratch: 3 "=X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
; else \
; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
; #
; #"
; [(set_attr "type" "vmult,*,*")
; (set_attr "length" "16,16,16")])
; A SGPR-base load looks like:
; v, Sv
;
; There's no hardware instruction that corresponds to this, but vector base
; addresses are placed in an SGPR because it is easier to add to a vector.
; We also have a temporary vT, and the vector v1 holding numbered lanes.
;
; Rewrite as:
; vT = v1 << log2(element-size)
; vT += Sv
; flat_load v, vT
(define_insn "@mov_sgprbase"
[(set (match_operand:V_1REG 0 "nonimmediate_operand")
(unspec:V_1REG
[(match_operand:V_1REG 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_operand: 2 "register_operand"))]
"lra_in_progress || reload_completed"
{@ [cons: =0, 1, =2; attrs: type, length, cdna]
[v,vA,&v;vop1,4 ,* ] v_mov_b32\t%0, %1
[v,vB,&v;vop1,8 ,* ] ^
[v,m ,&v;* ,12,* ] #
[m,v ,&v;* ,12,* ] #
[a,m ,&v;* ,12,cdna2] #
[m,a ,&v;* ,12,cdna2] #
})
(define_insn "@mov_sgprbase"
[(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m, a, m")
(unspec:V_2REG
[(match_operand:V_2REG 1 "general_operand" "vDB, m, v, m, a")]
UNSPEC_SGPRBASE))
(clobber (match_operand: 2 "register_operand" "=&v,&v,&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
else \
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
#
#
#
#"
[(set_attr "type" "vmult,*,*,*,*")
(set_attr "length" "8,12,12,12,12")
(set_attr "cdna" "*,*,*,cdna2,cdna2")])
(define_insn "@mov_sgprbase"
[(set (match_operand:V_4REG 0 "nonimmediate_operand")
(unspec:V_4REG
[(match_operand:V_4REG 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_operand: 2 "register_operand"))]
"lra_in_progress || reload_completed"
{@ [cons: =0, 1, =2; attrs: type, length]
[v,vDB,&v;vmult,8 ] v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
[v,m ,&v;* ,12] #
[m,v ,&v;* ,12] #
})
; Expand scalar addresses into gather/scatter patterns
(define_split
[(set (match_operand:V_MOV 0 "memory_operand")
(unspec:V_MOV
[(match_operand:V_MOV 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch: 2))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:V_MOV 0 "memory_operand")
(vec_merge:V_MOV
(match_operand:V_MOV 1 "general_operand")
(match_operand:V_MOV 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch: 4))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1)
(match_dup 6) (match_dup 7) (match_dup 3)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[0],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:V_MOV 0 "nonimmediate_operand")
(unspec:V_MOV
[(match_operand:V_MOV 1 "memory_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch: 2))]
""
[(set (match_dup 0)
(unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
(define_split
[(set (match_operand:V_MOV 0 "nonimmediate_operand")
(vec_merge:V_MOV
(match_operand:V_MOV 1 "memory_operand")
(match_operand:V_MOV 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch: 4))]
""
[(set (match_dup 0)
(vec_merge:V_MOV
(unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER)
(match_dup 2)
(match_dup 3)))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[1],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
; TODO: Add zero/sign extending variants.
;; }}}
;; {{{ Lane moves
; v_writelane and v_readlane work regardless of exec flags.
; We allow source to be scratch.
;
; FIXME these should take A immediates
(define_insn "*vec_set"
[(set (match_operand:V_1REG 0 "register_operand" "= v")
(vec_merge:V_1REG
(vec_duplicate:V_1REG
(match_operand: 1 "register_operand" " Sv"))
(match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
; FIXME: 64bit operations really should be splitters, but I am not sure how
; to represent vertical subregs.
(define_insn "*vec_set"
[(set (match_operand:V_2REG 0 "register_operand" "= v")
(vec_merge:V_2REG
(vec_duplicate:V_2REG
(match_operand: 1 "register_operand" " Sv"))
(match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_expand "vec_set"
[(set (match_operand:V_MOV 0 "register_operand")
(vec_merge:V_MOV
(vec_duplicate:V_MOV
(match_operand: 1 "register_operand"))
(match_dup 0)
(ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
"")
(define_insn "*vec_set_1"
[(set (match_operand:V_1REG 0 "register_operand" "=v")
(vec_merge:V_1REG
(vec_duplicate:V_1REG
(match_operand: 1 "register_operand" "Sv"))
(match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (mode))"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %0, %1, %2";
}
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "*vec_set_1"
[(set (match_operand:V_2REG 0 "register_operand" "=v")
(vec_merge:V_2REG
(vec_duplicate:V_2REG
(match_operand: 1 "register_operand" "Sv"))
(match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (mode))"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
}
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_duplicate"
[(set (match_operand:V_1REG 0 "register_operand" "=v")
(vec_duplicate:V_1REG
(match_operand: 1 "gcn_alu_operand" "SvB")))]
""
"v_mov_b32\t%0, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "vec_duplicate"
[(set (match_operand:V_2REG 0 "register_operand" "= v")
(vec_duplicate:V_2REG
(match_operand: 1 "gcn_alu_operand" "SvDB")))]
""
"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
[(set_attr "type" "vop3a")
(set_attr "length" "16")])
(define_insn "vec_duplicate"
[(set (match_operand:V_4REG 0 "register_operand" "= v")
(vec_duplicate:V_4REG
(match_operand: 1 "gcn_alu_operand" "SvDB")))]
""
"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
[(set_attr "type" "mult")
(set_attr "length" "32")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=Sg")
(vec_select:
(match_operand:V_1REG 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
""
"v_readlane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=&Sg")
(vec_select:
(match_operand:V_2REG 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
""
"v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=&Sg")
(vec_select:
(match_operand:V_4REG 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
""
"v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "32")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_extract_nop"
[(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
(vec_select:V_1REG_ALT
(match_operand:V_1REG 1 "register_operand" " 0,v")
(match_operand 2 "ascending_zero_int_parallel" "")))]
"MODE_VF (mode) < MODE_VF (mode)
&& mode == mode
/* This comment silences a warning for operands[2]. */"
"@
; in-place extract %0
v_mov_b32\t%L0, %L1"
[(set_attr "type" "vmult")
(set_attr "length" "0,8")])
(define_insn "vec_extract_nop"
[(set (match_operand:V_2REG_ALT 0 "register_operand" "=v,v")
(vec_select:V_2REG_ALT
(match_operand:V_2REG 1 "register_operand" " 0,v")
(match_operand 2 "ascending_zero_int_parallel" "")))]
"MODE_VF (mode) < MODE_VF (mode)
&& mode == mode
/* This comment silences a warning for operands[2]. */"
"@
; in-place extract %0
v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
[(set_attr "type" "vmult")
(set_attr "length" "0,8")])
(define_insn "vec_extract_nop"
[(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
(vec_select:V_4REG_ALT
(match_operand:V_4REG 1 "register_operand" " 0,v")
(match_operand 2 "ascending_zero_int_parallel" "")))]
"MODE_VF (mode) < MODE_VF (mode)
&& mode == mode"
"@
; in-place extract %0
v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
[(set_attr "type" "vmult")
(set_attr "length" "0,16")])
(define_expand "vec_extract"
[(match_operand:V_MOV_ALT 0 "register_operand")
(match_operand:V_MOV 1 "register_operand")
(match_operand 2 "immediate_operand")]
"MODE_VF (mode) < MODE_VF (mode)
&& mode == mode
&& (!TARGET_WAVE64_COMPAT || MODE_VF (mode) <= 32)"
{
int numlanes = GET_MODE_NUNITS (mode);
int firstlane = INTVAL (operands[2]) * numlanes;
rtx tmp;
if (firstlane == 0)
{
rtx parallel = gen_rtx_PARALLEL (mode,
rtvec_alloc (numlanes));
for (int i = 0; i < numlanes; i++)
XVECEXP (parallel, 0, i) = GEN_INT (i);
emit_insn (gen_vec_extract_nop
(operands[0], operands[1], parallel));
} else {
/* FIXME: optimize this by using DPP where available. */
rtx permutation = gen_reg_rtx (mode);
emit_insn (gen_vec_series (permutation,
GEN_INT (firstlane*4),
GEN_INT (4)));
tmp = gen_reg_rtx (mode);
emit_insn (gen_ds_bpermute (tmp, permutation, operands[1],
get_exec (mode)));
emit_move_insn (operands[0],
gen_rtx_SUBREG (mode, tmp, 0));
}
DONE;
})
(define_expand "extract_last_"
[(match_operand: 0 "register_operand")
(match_operand:DI 1 "gcn_alu_operand")
(match_operand:V_MOV 2 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
rtx mask = operands[1];
rtx vect = operands[2];
rtx tmpreg = gen_reg_rtx (SImode);
emit_insn (gen_clzdi2 (tmpreg, mask));
emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
emit_insn (gen_vec_extract (dst, vect, tmpreg));
DONE;
})
(define_expand "fold_extract_last_"
[(match_operand: 0 "register_operand")
(match_operand: 1 "gcn_alu_operand")
(match_operand:DI 2 "gcn_alu_operand")
(match_operand:V_MOV 3 "register_operand")]
"can_create_pseudo_p ()"
{
rtx dst = operands[0];
rtx default_value = operands[1];
rtx mask = operands[2];
rtx vect = operands[3];
rtx else_label = gen_label_rtx ();
rtx end_label = gen_label_rtx ();
rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
emit_insn (gen_extract_last_ (dst, mask, vect));
emit_jump_insn (gen_jump (end_label));
emit_barrier ();
emit_label (else_label);
emit_move_insn (dst, default_value);
emit_label (end_label);
DONE;
})
(define_expand "vec_init"
[(match_operand:V_MOV 0 "register_operand")
(match_operand 1)]
""
{
gcn_expand_vector_init (operands[0], operands[1]);
DONE;
})
(define_expand "vec_init"
[(match_operand:V_MOV 0 "register_operand")
(match_operand:V_MOV_ALT 1)]
"mode == mode
&& MODE_VF (mode) < MODE_VF (mode)"
{
gcn_expand_vector_init (operands[0], operands[1]);
DONE;
})
;; }}}
;; {{{ Scatter / Gather
;; GCN does not have an instruction for loading a vector from contiguous
;; memory so *all* loads and stores are eventually converted to scatter
;; or gather.
;;
;; GCC does not permit MEM to hold vectors of addresses, so we must use an
;; unspec. The unspec formats are as follows:
;;
;; (unspec:V??
;; [()
;; ()
;; ()
;; (mem:BLK (scratch))]
;; UNSPEC_GATHER)
;;
;; (unspec:BLK
;; [()
;; (