aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/aarch64-ldp-fusion.cc
blob: ced14cee5519d2a8c145f7a3872370e5e6e0da48 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// LoadPair fusion optimization pass for AArch64.
// Copyright (C) 2023-2025 Free Software Foundation, Inc.
//
// This file is part of GCC.
//
// GCC is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// GCC is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with GCC; see the file COPYING3.  If not see
// <http://www.gnu.org/licenses/>.

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "tm_p.h"
#include "rtl-iter.h"
#include "tree-pass.h"
#include "insn-attr.h"
#include "pair-fusion.h"

static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;

struct aarch64_pair_fusion : public pair_fusion
{
  bool fpsimd_op_p (rtx reg_op, machine_mode mem_mode,
		    bool load_p) override final
  {
    // Before RA, we use the modes, noting that stores of constant zero
    // operands use GPRs (even in non-integer modes).  After RA, we use
    // the hard register numbers.
    return reload_completed
      ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
      : (GET_MODE_CLASS (mem_mode) != MODE_INT
	 && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
  }

  bool pair_mem_insn_p (rtx_insn *rti, bool &load_p) override final;

  bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) override final
  {
    return aarch64_mem_ok_with_ldpstp_policy_model (base_mem,
						    load_p,
						    GET_MODE (base_mem));
  }

  bool pair_operand_mode_ok_p (machine_mode mode) override final;

  rtx gen_pair (rtx *pats, rtx writeback, bool load_p) override final;

  bool pair_reg_operand_ok_p (bool load_p, rtx reg_op,
			      machine_mode mode) override final
  {
    return (load_p
	    ? aarch64_ldp_reg_operand (reg_op, mode)
	    : aarch64_stp_reg_operand (reg_op, mode));
  }

  int pair_mem_alias_check_limit () override final
  {
    return aarch64_ldp_alias_check_limit;
  }

  bool should_handle_writeback (writeback_type which) override final
  {
    if (which == writeback_type::ALL)
      return aarch64_ldp_writeback > 1;
    else
      return aarch64_ldp_writeback;
  }

  bool track_loads_p () override final
  {
    return aarch64_tune_params.ldp_policy_model
	   != AARCH64_LDP_STP_POLICY_NEVER;
  }

  bool track_stores_p () override final
  {
    return aarch64_tune_params.stp_policy_model
	   != AARCH64_LDP_STP_POLICY_NEVER;
  }

  bool pair_mem_in_range_p (HOST_WIDE_INT offset) override final
  {
    return (offset >= LDP_MIN_IMM && offset <= LDP_MAX_IMM);
  }

  rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem, rtx regs[2],
				  bool load_p) override final;

  rtx destructure_pair (rtx regs[2], rtx pattern, bool load_p) override final;
};

bool
aarch64_pair_fusion::pair_mem_insn_p (rtx_insn *rti, bool &load_p)
{
  rtx pat = PATTERN (rti);
  if (GET_CODE (pat) == PARALLEL
      && XVECLEN (pat, 0) == 2)
    {
      const auto attr = get_attr_ldpstp (rti);
      if (attr == LDPSTP_NONE)
	return false;

      load_p = (attr == LDPSTP_LDP);
      gcc_checking_assert (load_p || attr == LDPSTP_STP);
      return true;
    }
  return false;
}

rtx
aarch64_pair_fusion::gen_pair (rtx *pats, rtx writeback, bool load_p)
{
  rtx pair_pat;

  if (writeback)
    {
      auto patvec = gen_rtvec (3, writeback, pats[0], pats[1]);
      return gen_rtx_PARALLEL (VOIDmode, patvec);
    }
  else if (load_p)
    return aarch64_gen_load_pair (XEXP (pats[0], 0),
				  XEXP (pats[1], 0),
				  XEXP (pats[0], 1));
  else
    return aarch64_gen_store_pair (XEXP (pats[0], 0),
				   XEXP (pats[0], 1),
				   XEXP (pats[1], 1));
  return pair_pat;
}

// Return true if we should consider forming ldp/stp insns from memory
// accesses with operand mode MODE at this stage in compilation.
bool
aarch64_pair_fusion::pair_operand_mode_ok_p (machine_mode mode)
{
  if (!aarch64_ldpstp_operand_mode_p (mode))
    return false;

  // We don't pair up TImode accesses before RA because TImode is
  // special in that it can be allocated to a pair of GPRs or a single
  // FPR, and the RA is best placed to make that decision.
  return reload_completed || mode != TImode;
}

// Given a pair mode MODE, return a canonical mode to be used for a single
// operand of such a pair.  Currently we only use this when promoting a
// non-writeback pair into a writeback pair, as it isn't otherwise clear
// which mode to use when storing a modeless CONST_INT.
static machine_mode
aarch64_operand_mode_for_pair_mode (machine_mode mode)
{
  switch (mode)
    {
    case E_V2x4QImode:
      return SImode;
    case E_V2x8QImode:
      return DImode;
    case E_V2x16QImode:
      return V16QImode;
    default:
      gcc_unreachable ();
    }
}

// Given a load pair insn in PATTERN, unpack the insn, storing
// the registers in REGS and returning the mem.
static rtx
aarch64_destructure_load_pair (rtx regs[2], rtx pattern)
{
  rtx mem = NULL_RTX;

  for (int i = 0; i < 2; i++)
    {
      rtx pat = XVECEXP (pattern, 0, i);
      regs[i] = XEXP (pat, 0);
      rtx unspec = XEXP (pat, 1);
      gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
      rtx this_mem = XVECEXP (unspec, 0, 0);
      if (mem)
	gcc_checking_assert (rtx_equal_p (mem, this_mem));
      else
	{
	  gcc_checking_assert (MEM_P (this_mem));
	  mem = this_mem;
	}
    }

  return mem;
}

// Given a store pair insn in PATTERN, unpack the insn, storing
// the register operands in REGS, and returning the mem.
static rtx
aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
{
  rtx mem = XEXP (pattern, 0);
  rtx unspec = XEXP (pattern, 1);
  gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
  for (int i = 0; i < 2; i++)
    regs[i] = XVECEXP (unspec, 0, i);
  return mem;
}

rtx
aarch64_pair_fusion::destructure_pair (rtx regs[2], rtx pattern, bool load_p)
{
  if (load_p)
    return aarch64_destructure_load_pair (regs, pattern);
  else
    return aarch64_destructure_store_pair (regs, pattern);
}

rtx
aarch64_pair_fusion::gen_promote_writeback_pair (rtx wb_effect, rtx pair_mem,
						 rtx regs[2],
						 bool load_p)
{
  auto op_mode = aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem));

  machine_mode modes[2];
  for (int i = 0; i < 2; i++)
    {
      machine_mode mode = GET_MODE (regs[i]);
      if (load_p)
	gcc_checking_assert (mode != VOIDmode);
      else if (mode == VOIDmode)
	mode = op_mode;

      modes[i] = mode;
    }

  const auto op_size = GET_MODE_SIZE (modes[0]);
  gcc_checking_assert (known_eq (op_size, GET_MODE_SIZE (modes[1])));

  rtx pats[2];
  for (int i = 0; i < 2; i++)
    {
      rtx mem = adjust_address_nv (pair_mem, modes[i], op_size * i);
      pats[i] = load_p
	? gen_rtx_SET (regs[i], mem)
	: gen_rtx_SET (mem, regs[i]);
    }

  return gen_rtx_PARALLEL (VOIDmode,
			   gen_rtvec (3, wb_effect, pats[0], pats[1]));
}

namespace {

const pass_data pass_data_ldp_fusion =
{
  RTL_PASS, /* type */
  "ldp_fusion", /* name */
  OPTGROUP_NONE, /* optinfo_flags */
  TV_NONE, /* tv_id */
  0, /* properties_required */
  0, /* properties_provided */
  0, /* properties_destroyed */
  0, /* todo_flags_start */
  TODO_df_finish, /* todo_flags_finish */
};

class pass_ldp_fusion : public rtl_opt_pass
{
public:
  pass_ldp_fusion (gcc::context *ctx)
    : rtl_opt_pass (pass_data_ldp_fusion, ctx)
    {}

  opt_pass *clone () override { return new pass_ldp_fusion (m_ctxt); }

  bool gate (function *) final override
    {
      if (!optimize || optimize_debug)
	return false;

      if (reload_completed)
	return flag_aarch64_late_ldp_fusion;
      else
	return flag_aarch64_early_ldp_fusion;
    }

  unsigned execute (function *) final override
    {
      aarch64_pair_fusion pass;
      pass.run ();
      return 0;
    }
};

} // anon namespace

rtl_opt_pass *
make_pass_ldp_fusion (gcc::context *ctx)
{
  return new pass_ldp_fusion (ctx);
}