/* ACLE support for AArch64 SME.
Copyright (C) 2023-2024 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "tree.h"
#include "rtl.h"
#include "tm_p.h"
#include "memmodel.h"
#include "insn-codes.h"
#include "optabs.h"
#include "recog.h"
#include "expr.h"
#include "basic-block.h"
#include "function.h"
#include "fold-const.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "gimplify.h"
#include "explow.h"
#include "emit-rtl.h"
#include "aarch64-sve-builtins.h"
#include "aarch64-sve-builtins-shapes.h"
#include "aarch64-sve-builtins-base.h"
#include "aarch64-sve-builtins-sme.h"
#include "aarch64-sve-builtins-functions.h"
using namespace aarch64_sve;
namespace {
class load_store_za_zt0_base : public function_base
{
public:
tree
memory_scalar_type (const function_instance &) const override
{
return void_type_node;
}
};
class read_write_za_base : public function_base
{
public:
constexpr read_write_za_base (int unspec) : m_unspec (unspec) {}
rtx
expand (function_expander &e) const override
{
auto za_mode = e.vector_mode (0);
auto z_mode = e.tuple_mode (1);
auto icode = (za_mode == VNx1TImode
? code_for_aarch64_sme (m_unspec, za_mode, z_mode)
: code_for_aarch64_sme (m_unspec, z_mode, z_mode));
return e.use_exact_insn (icode);
}
int m_unspec;
};
using load_za_base = add_call_properties;
using store_za_base = add_call_properties;
/* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE.
The intrinsic has a vnum parameter at index ARGNO. Return true if the
vnum argument is a constant that is a valid ZA offset for the underlying
instruction. */
static bool
has_in_range_vnum_arg (function_expander &e, machine_mode mem_mode,
unsigned int argno)
{
return (e.mode_suffix_id == MODE_vnum
&& CONST_INT_P (e.args[argno])
&& UINTVAL (e.args[argno]) < 16 / GET_MODE_UNIT_SIZE (mem_mode));
}
/* E is a ZA load or store intrinsic that uses instruction ICODE. Add a
32-bit operand that gives the total ZA slice. (The instruction hard-codes
the constant offset to 0, so there is no operand for that.)
Argument ARGNO is the intrinsic's slice argument. If the intrinsic is
a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand,
which must be added to the slice argument. */
static void
add_load_store_slice_operand (function_expander &e, insn_code icode,
unsigned int argno, unsigned int vnum_argno)
{
rtx base = e.args[argno];
if (e.mode_suffix_id == MODE_vnum)
{
rtx vnum = force_lowpart_subreg (SImode, e.args[vnum_argno], DImode);
base = simplify_gen_binary (PLUS, SImode, base, vnum);
}
e.add_input_operand (icode, base);
}
/* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is
the index of the base argument. */
static void
add_load_store_operand (function_expander &e, unsigned int base_argno)
{
auto mode = e.vector_mode (0);
rtx base = e.get_contiguous_base (mode, base_argno, base_argno + 1,
AARCH64_FL_SM_ON);
auto mem = gen_rtx_MEM (mode, force_reg (Pmode, base));
set_mem_align (mem, BITS_PER_UNIT);
e.add_fixed_operand (mem);
}
/* Expand ZA LDR or STR intrinsic E. There are two underlying instructions:
- BASE_CODE has a zero ZA slice offset
- VNUM_CODE has a constant operand for the ZA slice offset. */
static rtx
expand_ldr_str_za (function_expander &e, insn_code base_code,
insn_code vnum_code)
{
if (has_in_range_vnum_arg (e, VNx16QImode, 2))
{
rtx mem_offset = aarch64_sme_vq_immediate (Pmode,
UINTVAL (e.args[2]) * 16,
AARCH64_ISA_MODE);
e.add_input_operand (vnum_code, e.args[0]);
e.add_input_operand (vnum_code, e.args[2]);
e.add_input_operand (vnum_code, e.args[1]);
e.add_input_operand (vnum_code, mem_offset);
return e.generate_insn (vnum_code);
}
else
{
rtx base = e.get_contiguous_base (VNx16QImode, 1, 2, AARCH64_FL_SM_ON);
add_load_store_slice_operand (e, base_code, 0, 2);
e.add_input_operand (base_code, base);
return e.generate_insn (base_code);
}
}
/* Use instruction ICODE to expand ZT0 load or store E. */
static rtx
expand_ldr_str_zt0 (function_expander &e, insn_code icode)
{
rtx base = e.convert_to_pmode (e.args[1]);
rtx mem = gen_rtx_MEM (V8DImode, force_reg (Pmode, base));
e.add_fixed_operand (mem);
return e.generate_insn (icode);
}
/* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec.
IS_LOAD is true if E is a load, false if it is a store. */
static rtx
expand_ld1_st1 (function_expander &e, int unspec, bool is_load)
{
bool is_vnum = has_in_range_vnum_arg (e, e.vector_mode (0), 4);
auto icode = (is_vnum
? code_for_aarch64_sme_plus (unspec, e.vector_mode (0))
: code_for_aarch64_sme (unspec, e.vector_mode (0)));
if (!is_load)
add_load_store_operand (e, 3);
e.add_input_operand (icode, e.args[0]);
if (is_vnum)
{
e.add_input_operand (icode, e.args[1]);
e.add_input_operand (icode, e.args[4]);
}
else
add_load_store_slice_operand (e, icode, 1, 4);
e.add_input_operand (icode, e.args[2]);
if (is_load)
add_load_store_operand (e, 3);
return e.generate_insn (icode);
}
class arm_has_sme_impl : public function_base
{
gimple *
fold (gimple_folder &f) const override
{
if (TARGET_SME)
return f.fold_to_cstu (1);
return nullptr;
}
rtx
expand (function_expander &e) const override
{
if (TARGET_SME)
return const1_rtx;
emit_insn (gen_aarch64_get_sme_state ());
return expand_simple_binop (DImode, LSHIFTRT,
gen_rtx_REG (DImode, R0_REGNUM),
gen_int_mode (63, QImode),
e.possible_target, true, OPTAB_LIB_WIDEN);
}
};
class arm_in_streaming_mode_impl : public function_base
{
gimple *
fold (gimple_folder &f) const override
{
if (TARGET_STREAMING)
return f.fold_to_cstu (1);
if (TARGET_NON_STREAMING)
return f.fold_to_cstu (0);
return nullptr;
}
rtx
expand (function_expander &e) const override
{
if (TARGET_STREAMING)
return const1_rtx;
if (TARGET_NON_STREAMING)
return const0_rtx;
rtx reg;
if (TARGET_SME)
{
reg = gen_reg_rtx (DImode);
emit_insn (gen_aarch64_read_svcr (reg));
}
else
{
emit_insn (gen_aarch64_get_sme_state ());
reg = gen_rtx_REG (DImode, R0_REGNUM);
}
return expand_simple_binop (DImode, AND, reg, gen_int_mode (1, DImode),
e.possible_target, true, OPTAB_LIB_WIDEN);
}
};
/* Implements svcnts[bhwd]. */
class svcnts_bhwd_impl : public function_base
{
public:
constexpr svcnts_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {}
unsigned int
get_shift () const
{
return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode));
}
gimple *
fold (gimple_folder &f) const override
{
if (TARGET_STREAMING)
return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
return nullptr;
}
rtx
expand (function_expander &e) const override
{
rtx cntsb = aarch64_sme_vq_immediate (DImode, 16, AARCH64_ISA_MODE);
auto shift = get_shift ();
if (!shift)
return cntsb;
return expand_simple_binop (DImode, LSHIFTRT, cntsb,
gen_int_mode (shift, QImode),
e.possible_target, true, OPTAB_LIB_WIDEN);
}
/* The mode of the vector associated with the [bhwd] suffix. */
machine_mode m_ref_mode;
};
class svld1_za_impl : public load_za_base
{
public:
constexpr svld1_za_impl (int unspec) : m_unspec (unspec) {}
rtx
expand (function_expander &e) const override
{
return expand_ld1_st1 (e, m_unspec, true);
}
int m_unspec;
};
class svldr_za_impl : public load_za_base
{
public:
rtx
expand (function_expander &e) const override
{
return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_ldr0,
code_for_aarch64_sme_ldrn (Pmode));
}
};
class svldr_zt_impl : public load_store_za_zt0_base
{
public:
unsigned int
call_properties (const function_instance &) const override
{
return CP_READ_MEMORY | CP_WRITE_ZT0;
}
rtx
expand (function_expander &e) const override
{
return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_ldr_zt0);
}
};
class svluti_lane_zt_impl : public read_zt0
{
public:
CONSTEXPR svluti_lane_zt_impl (unsigned int bits) : m_bits (bits) {}
rtx
expand (function_expander &e) const override
{
auto mode = e.tuple_mode (0);
e.args.ordered_remove (0);
return e.use_exact_insn (code_for_aarch64_sme_lut (m_bits, mode));
}
unsigned int m_bits;
};
class svread_za_impl : public function_base
{
public:
unsigned int
call_properties (const function_instance &) const override
{
return CP_READ_ZA;
}
rtx
expand (function_expander &e) const override
{
machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode));
return aarch64_sve_reinterpret (e.result_mode (), res);
}
};
using svread_za_tile_impl = add_call_properties;
class svst1_za_impl : public store_za_base
{
public:
constexpr svst1_za_impl (int unspec) : m_unspec (unspec) {}
rtx
expand (function_expander &e) const override
{
return expand_ld1_st1 (e, m_unspec, false);
}
int m_unspec;
};
class svstr_za_impl : public store_za_base
{
public:
rtx
expand (function_expander &e) const override
{
return expand_ldr_str_za (e, CODE_FOR_aarch64_sme_str0,
code_for_aarch64_sme_strn (Pmode));
}
};
class svstr_zt_impl : public load_store_za_zt0_base
{
public:
unsigned int
call_properties (const function_instance &) const override
{
return CP_WRITE_MEMORY | CP_READ_ZT0;
}
rtx
expand (function_expander &e) const override
{
return expand_ldr_str_zt0 (e, CODE_FOR_aarch64_sme_str_zt0);
}
};
class svsudot_za_impl : public read_write_za
{
public:
rtx
expand (function_expander &e) const override
{
if (e.mode_suffix_id == MODE_single)
{
auto icode = code_for_aarch64_sme_single_sudot (e.vector_mode (0),
e.tuple_mode (1));
return e.use_exact_insn (icode);
}
std::swap (e.args[1], e.args[2]);
return e.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT,
e.vector_mode (0),
e.tuple_mode (1)));
}
};
class svundef_za_impl : public write_za
{
public:
rtx
expand (function_expander &) const override
{
rtx target = gen_rtx_REG (VNx16QImode, ZA_REGNUM);
emit_clobber (copy_rtx (target));
return const0_rtx;
}
};
class svwrite_za_impl : public function_base
{
public:
unsigned int
call_properties (const function_instance &) const override
{
return CP_WRITE_ZA;
}
rtx
expand (function_expander &e) const override
{
machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
e.args[1] = aarch64_sve_reinterpret (mode, e.args[1]);
return e.use_exact_insn (code_for_aarch64_sme_write (mode));
}
};
using svwrite_za_tile_impl = add_call_properties;
class svzero_mask_za_impl : public write_za
{
public:
rtx
expand (function_expander &e) const override
{
return e.use_exact_insn (CODE_FOR_aarch64_sme_zero_za);
}
};
class svzero_za_impl : public write_za
{
public:
rtx
expand (function_expander &) const override
{
emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode)));
return const0_rtx;
}
};
class svzero_zt_impl : public write_zt0
{
public:
rtx
expand (function_expander &) const override
{
emit_insn (gen_aarch64_sme_zero_zt0 ());
return const0_rtx;
}
};
} /* end anonymous namespace */
namespace aarch64_sve {
FUNCTION (arm_has_sme, arm_has_sme_impl, )
FUNCTION (arm_in_streaming_mode, arm_in_streaming_mode_impl, )
FUNCTION (svadd_za, sme_1mode_function, (UNSPEC_SME_ADD, UNSPEC_SME_ADD,
UNSPEC_SME_FADD))
FUNCTION (svadd_write_za, sme_1mode_function, (UNSPEC_SME_ADD_WRITE,
UNSPEC_SME_ADD_WRITE, -1))
FUNCTION (svaddha_za, sme_1mode_function, (UNSPEC_SME_ADDHA,
UNSPEC_SME_ADDHA, -1))
FUNCTION (svaddva_za, sme_1mode_function, (UNSPEC_SME_ADDVA,
UNSPEC_SME_ADDVA, -1))
FUNCTION (svbmopa_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPA, -1))
FUNCTION (svbmops_za, sme_2mode_function, (-1, UNSPEC_SME_BMOPS, -1))
FUNCTION (svcntsb, svcnts_bhwd_impl, (VNx16QImode))
FUNCTION (svcntsd, svcnts_bhwd_impl, (VNx2DImode))
FUNCTION (svcntsh, svcnts_bhwd_impl, (VNx8HImode))
FUNCTION (svcntsw, svcnts_bhwd_impl, (VNx4SImode))
FUNCTION (svdot_za, sme_2mode_function, (UNSPEC_SME_SDOT, UNSPEC_SME_UDOT,
UNSPEC_SME_FDOT))
FUNCTION (svdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SDOT,
UNSPEC_SME_UDOT,
UNSPEC_SME_FDOT))
FUNCTION (svld1_hor_za, svld1_za_impl, (UNSPEC_SME_LD1_HOR))
FUNCTION (svld1_ver_za, svld1_za_impl, (UNSPEC_SME_LD1_VER))
FUNCTION (svldr_za, svldr_za_impl, )
FUNCTION (svldr_zt, svldr_zt_impl, )
FUNCTION (svluti2_lane_zt, svluti_lane_zt_impl, (2))
FUNCTION (svluti4_lane_zt, svluti_lane_zt_impl, (4))
FUNCTION (svmla_za, sme_2mode_function, (UNSPEC_SME_SMLA, UNSPEC_SME_UMLA,
UNSPEC_SME_FMLA))
FUNCTION (svmla_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLA,
UNSPEC_SME_UMLA,
UNSPEC_SME_FMLA))
FUNCTION (svmls_za, sme_2mode_function, (UNSPEC_SME_SMLS, UNSPEC_SME_UMLS,
UNSPEC_SME_FMLS))
FUNCTION (svmls_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SMLS,
UNSPEC_SME_UMLS,
UNSPEC_SME_FMLS))
FUNCTION (svmopa_za, sme_2mode_function, (UNSPEC_SME_SMOPA, UNSPEC_SME_UMOPA,
UNSPEC_SME_FMOPA))
FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
UNSPEC_SME_FMOPS))
FUNCTION (svread_za, svread_za_impl,)
FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
FUNCTION (svstr_za, svstr_za_impl, )
FUNCTION (svstr_zt, svstr_zt_impl, )
FUNCTION (svsub_za, sme_1mode_function, (UNSPEC_SME_SUB, UNSPEC_SME_SUB,
UNSPEC_SME_FSUB))
FUNCTION (svsub_write_za, sme_1mode_function, (UNSPEC_SME_SUB_WRITE,
UNSPEC_SME_SUB_WRITE, -1))
FUNCTION (svsudot_za, svsudot_za_impl,)
FUNCTION (svsudot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUDOT, -1, -1))
FUNCTION (svsuvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SUVDOT,
-1, -1))
FUNCTION (svsumopa_za, sme_2mode_function, (UNSPEC_SME_SUMOPA, -1, -1))
FUNCTION (svsumops_za, sme_2mode_function, (UNSPEC_SME_SUMOPS, -1, -1))
FUNCTION (svundef_za, svundef_za_impl, )
FUNCTION (svusdot_za, sme_2mode_function, (-1, UNSPEC_SME_USDOT, -1))
FUNCTION (svusdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USDOT, -1))
FUNCTION (svusvdot_lane_za, sme_2mode_lane_function, (-1, UNSPEC_SME_USVDOT,
-1))
FUNCTION (svusmopa_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPA, -1))
FUNCTION (svusmops_za, sme_2mode_function, (-1, UNSPEC_SME_USMOPS, -1))
FUNCTION (svvdot_lane_za, sme_2mode_lane_function, (UNSPEC_SME_SVDOT,
UNSPEC_SME_UVDOT,
UNSPEC_SME_FVDOT))
FUNCTION (svwrite_za, svwrite_za_impl,)
FUNCTION (svwrite_hor_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_HOR))
FUNCTION (svwrite_ver_za, svwrite_za_tile_impl, (UNSPEC_SME_WRITE_VER))
FUNCTION (svzero_mask_za, svzero_mask_za_impl, )
FUNCTION (svzero_za, svzero_za_impl, )
FUNCTION (svzero_zt, svzero_zt_impl, )
} /* end namespace aarch64_sve */