1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
|
/* A representation of vector permutation indices.
Copyright (C) 2017-2023 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "vec-perm-indices.h"
#include "tree.h"
#include "fold-const.h"
#include "tree-vector-builder.h"
#include "backend.h"
#include "rtl.h"
#include "memmodel.h"
#include "emit-rtl.h"
#include "selftest.h"
#include "rtx-vector-builder.h"
/* Switch to a new permutation vector that selects between NINPUTS vector
inputs that have NELTS_PER_INPUT elements each. Take the elements of the
new permutation vector from ELEMENTS, clamping each one to be in range. */
void
vec_perm_indices::new_vector (const vec_perm_builder &elements,
unsigned int ninputs,
poly_uint64 nelts_per_input)
{
m_ninputs = ninputs;
m_nelts_per_input = nelts_per_input;
/* If the vector has a constant number of elements, expand the
encoding and clamp each element. E.g. { 0, 2, 4, ... } might
wrap halfway if there is only one vector input, and we want
the wrapped form to be the canonical one.
If the vector has a variable number of elements, just copy
the encoding. In that case the unwrapped form is canonical
and there is no way of representing the wrapped form. */
poly_uint64 full_nelts = elements.full_nelts ();
unsigned HOST_WIDE_INT copy_nelts;
if (full_nelts.is_constant (©_nelts))
m_encoding.new_vector (full_nelts, copy_nelts, 1);
else
{
copy_nelts = elements.encoded_nelts ();
m_encoding.new_vector (full_nelts, elements.npatterns (),
elements.nelts_per_pattern ());
}
unsigned int npatterns = m_encoding.npatterns ();
for (unsigned int i = 0; i < npatterns; ++i)
m_encoding.quick_push (clamp (elements.elt (i)));
/* Use the fact that:
(a + b) % c == ((a % c) + (b % c)) % c
to simplify the clamping of variable-length vectors. */
for (unsigned int i = npatterns; i < copy_nelts; ++i)
{
element_type step = clamp (elements.elt (i)
- elements.elt (i - npatterns));
m_encoding.quick_push (clamp (m_encoding[i - npatterns] + step));
}
m_encoding.finalize ();
}
/* Switch to a new permutation vector that selects the same input elements
as ORIG, but with each element split into FACTOR pieces. For example,
if ORIG is { 1, 2, 0, 3 } and FACTOR is 2, the new permutation is
{ 2, 3, 4, 5, 0, 1, 6, 7 }. */
void
vec_perm_indices::new_expanded_vector (const vec_perm_indices &orig,
unsigned int factor)
{
m_ninputs = orig.m_ninputs;
m_nelts_per_input = orig.m_nelts_per_input * factor;
m_encoding.new_vector (orig.m_encoding.full_nelts () * factor,
orig.m_encoding.npatterns () * factor,
orig.m_encoding.nelts_per_pattern ());
unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; ++i)
{
element_type base = orig.m_encoding[i] * factor;
for (unsigned int j = 0; j < factor; ++j)
m_encoding.quick_push (base + j);
}
m_encoding.finalize ();
}
/* Check whether we can switch to a new permutation vector that
selects the same input elements as ORIG, but with each element
built up from FACTOR pieces. Return true if yes, otherwise
return false. Every FACTOR permutation indexes should be
continuous separately and the first one of each batch should
be able to exactly modulo FACTOR. For example, if ORIG is
{ 2, 3, 4, 5, 0, 1, 6, 7 } and FACTOR is 2, the new permutation
is { 1, 2, 0, 3 }. */
bool
vec_perm_indices::new_shrunk_vector (const vec_perm_indices &orig,
unsigned int factor)
{
gcc_assert (factor > 0);
if (maybe_lt (orig.m_nelts_per_input, factor))
return false;
poly_uint64 nelts;
/* Invalid if vector units number isn't multiple of factor. */
if (!multiple_p (orig.m_nelts_per_input, factor, &nelts))
return false;
/* Only handle the case that npatterns is multiple of factor.
FIXME: Try to see whether we can reshape it by factor npatterns. */
if (orig.m_encoding.npatterns () % factor != 0)
return false;
unsigned int encoded_nelts = orig.m_encoding.encoded_nelts ();
auto_vec<element_type, 32> encoding (encoded_nelts);
/* Separate all encoded elements into batches by size factor,
then ensure the first element of each batch is multiple of
factor and all elements in each batch is consecutive from
the first one. */
for (unsigned int i = 0; i < encoded_nelts; i += factor)
{
element_type first = orig.m_encoding[i];
element_type new_index;
if (!multiple_p (first, factor, &new_index))
return false;
for (unsigned int j = 1; j < factor; ++j)
if (maybe_ne (first + j, orig.m_encoding[i + j]))
return false;
encoding.quick_push (new_index);
}
m_ninputs = orig.m_ninputs;
m_nelts_per_input = nelts;
poly_uint64 full_nelts = exact_div (orig.m_encoding.full_nelts (), factor);
unsigned int npatterns = orig.m_encoding.npatterns () / factor;
m_encoding.new_vector (full_nelts, npatterns,
orig.m_encoding.nelts_per_pattern ());
m_encoding.splice (encoding);
m_encoding.finalize ();
return true;
}
/* Rotate the inputs of the permutation right by DELTA inputs. This changes
the values of the permutation vector but it doesn't change the way that
the elements are encoded. */
void
vec_perm_indices::rotate_inputs (int delta)
{
element_type element_delta = delta * m_nelts_per_input;
for (unsigned int i = 0; i < m_encoding.length (); ++i)
m_encoding[i] = clamp (m_encoding[i] + element_delta);
}
/* Return true if index OUT_BASE + I * OUT_STEP selects input
element IN_BASE + I * IN_STEP. For example, the call to test
whether a permute reverses a vector of N elements would be:
series_p (0, 1, N - 1, -1)
which would return true for { N - 1, N - 2, N - 3, ... }.
The calls to test for an interleaving of elements starting
at N1 and N2 would be:
series_p (0, 2, N1, 1) && series_p (1, 2, N2, 1).
which would return true for { N1, N2, N1 + 1, N2 + 1, ... }. */
bool
vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
element_type in_base, element_type in_step) const
{
/* Check the base value. */
if (maybe_ne (clamp (m_encoding.elt (out_base)), clamp (in_base)))
return false;
element_type full_nelts = m_encoding.full_nelts ();
unsigned int npatterns = m_encoding.npatterns ();
/* Calculate which multiple of OUT_STEP elements we need to get
back to the same pattern. */
unsigned int cycle_length = least_common_multiple (out_step, npatterns);
/* Check the steps. */
in_step = clamp (in_step);
out_base += out_step;
unsigned int limit = 0;
for (;;)
{
/* Succeed if we've checked all the elements in the vector. */
if (known_ge (out_base, full_nelts))
return true;
if (out_base >= npatterns)
{
/* We've got to the end of the "foreground" values. Check
2 elements from each pattern in the "background" values. */
if (limit == 0)
limit = out_base + cycle_length * 2;
else if (out_base >= limit)
return true;
}
element_type v0 = m_encoding.elt (out_base - out_step);
element_type v1 = m_encoding.elt (out_base);
if (maybe_ne (clamp (v1 - v0), in_step))
return false;
out_base += out_step;
}
}
/* Return true if all elements of the permutation vector are in the range
[START, START + SIZE). */
bool
vec_perm_indices::all_in_range_p (element_type start, element_type size) const
{
/* Check the first two elements of each pattern. */
unsigned int npatterns = m_encoding.npatterns ();
unsigned int nelts_per_pattern = m_encoding.nelts_per_pattern ();
unsigned int base_nelts = npatterns * MIN (nelts_per_pattern, 2);
for (unsigned int i = 0; i < base_nelts; ++i)
if (!known_in_range_p (m_encoding[i], start, size))
return false;
/* For stepped encodings, check the full range of the series. */
if (nelts_per_pattern == 3)
{
element_type limit = input_nelts ();
/* The number of elements in each pattern beyond the first two
that we checked above. */
poly_int64 step_nelts = exact_div (m_encoding.full_nelts (),
npatterns) - 2;
for (unsigned int i = 0; i < npatterns; ++i)
{
/* BASE1 has been checked but BASE2 hasn't. */
element_type base1 = m_encoding[i + npatterns];
element_type base2 = m_encoding[i + base_nelts];
/* The step to add to get from BASE1 to each subsequent value. */
element_type step = clamp (base2 - base1);
/* STEP has no inherent sign, so a value near LIMIT can
act as a negative step. The series is in range if it
is in range according to one of the two interpretations.
Since we're dealing with clamped values, ELEMENT_TYPE is
wide enough for overflow not to be a problem. */
element_type headroom_down = base1 - start;
element_type headroom_up = size - headroom_down - 1;
HOST_WIDE_INT diff;
if ((!step.is_constant (&diff)
|| maybe_lt (headroom_up, diff * step_nelts))
&& (!(limit - step).is_constant (&diff)
|| maybe_lt (headroom_down, diff * step_nelts)))
return false;
}
}
return true;
}
/* Try to read the contents of VECTOR_CST CST as a constant permutation
vector. Return true and add the elements to BUILDER on success,
otherwise return false without modifying BUILDER. */
bool
tree_to_vec_perm_builder (vec_perm_builder *builder, tree cst)
{
unsigned int encoded_nelts = vector_cst_encoded_nelts (cst);
for (unsigned int i = 0; i < encoded_nelts; ++i)
if (!tree_fits_poly_int64_p (VECTOR_CST_ENCODED_ELT (cst, i)))
return false;
builder->new_vector (TYPE_VECTOR_SUBPARTS (TREE_TYPE (cst)),
VECTOR_CST_NPATTERNS (cst),
VECTOR_CST_NELTS_PER_PATTERN (cst));
for (unsigned int i = 0; i < encoded_nelts; ++i)
builder->quick_push (tree_to_poly_int64 (VECTOR_CST_ENCODED_ELT (cst, i)));
return true;
}
/* Return a VECTOR_CST of type TYPE for the permutation vector in INDICES. */
tree
vec_perm_indices_to_tree (tree type, const vec_perm_indices &indices)
{
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), indices.length ()));
tree_vector_builder sel (type, indices.encoding ().npatterns (),
indices.encoding ().nelts_per_pattern ());
unsigned int encoded_nelts = sel.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; i++)
sel.quick_push (build_int_cst (TREE_TYPE (type), indices[i]));
return sel.build ();
}
/* Return a CONST_VECTOR of mode MODE that contains the elements of
INDICES. */
rtx
vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
{
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& known_eq (GET_MODE_NUNITS (mode), indices.length ()));
rtx_vector_builder sel (mode, indices.encoding ().npatterns (),
indices.encoding ().nelts_per_pattern ());
unsigned int encoded_nelts = sel.encoded_nelts ();
for (unsigned int i = 0; i < encoded_nelts; i++)
sel.quick_push (gen_int_mode (indices[i], GET_MODE_INNER (mode)));
return sel.build ();
}
#if CHECKING_P
namespace selftest {
/* Test a 12-element vector. */
static void
test_vec_perm_12 (void)
{
vec_perm_builder builder (12, 12, 1);
for (unsigned int i = 0; i < 4; ++i)
{
builder.quick_push (i * 5);
builder.quick_push (3 + i);
builder.quick_push (2 + 3 * i);
}
vec_perm_indices indices (builder, 1, 12);
ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
}
/* Run selftests for this file. */
void
vec_perm_indices_cc_tests ()
{
test_vec_perm_12 ();
}
} // namespace selftest
#endif
|