diff options
author | Tamar Christina <tamar.christina@arm.com> | 2025-10-18 08:22:50 +0100 |
---|---|---|
committer | Tamar Christina <tamar.christina@arm.com> | 2025-10-18 08:24:18 +0100 |
commit | 75fb400d2950e1f743f133ece8fb3abe815faf13 (patch) | |
tree | 8840d4593e42e92aac7b2d227002fe947ac0951e /libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c | |
parent | 25c8a8d4318d0fa25d79b4b9f60865da2d6c5e60 (diff) | |
download | gcc-master.zip gcc-master.tar.gz gcc-master.tar.bz2 |
SVE2p1 adds 2-way dotproduct which we can use when we have to do a single step
widening addition. This is useful for instance when the value to be widened
does not come from a load. For example for
int foo2_int(unsigned short *x, unsigned short * restrict y) {
int sum = 0;
for (int i = 0; i < 8000; i++)
{
x[i] = x[i] + y[i];
sum += x[i];
}
return sum;
}
we used to generate
.L12:
ld1h z30.h, p7/z, [x0, x2, lsl 1]
ld1h z29.h, p7/z, [x1, x2, lsl 1]
add z30.h, z30.h, z29.h
uaddwb z31.s, z31.s, z30.h
uaddwt z31.s, z31.s, z30.h
st1h z30.h, p7, [x0, x2, lsl 1]
mov x3, x2
inch x2
cmp w2, w4
bls .L12
inch x3
uaddv d31, p7, z31.s
but with +sve2p1
.L12:
ld1h z31.h, p7/z, [x0, x2, lsl 1]
ld1h z29.h, p7/z, [x1, x2, lsl 1]
add z31.h, z31.h, z29.h
udot z30.s, z31.h, z28.h
st1h z31.h, p7, [x0, x2, lsl 1]
mov x3, x2
inch x2
cmp w2, w4
bls .L12
inch x3
uaddv d30, p7, z30.s
gcc/ChangeLog:
PR middle-end/122069
* config/aarch64/aarch64-sve2.md
(widen_ssum<mode><Vnarrow>3): Update.
(widen_usum<mode><Vnarrow>3): Update.
gcc/testsuite/ChangeLog:
PR middle-end/122069
* gcc.target/aarch64/sve2/pr122069_3.c: New test.
* gcc.target/aarch64/sve2/pr122069_4.c: New test.
Diffstat (limited to 'libgomp/testsuite/libgomp.c/declare-variant-4-gfx1101.c')
0 files changed, 0 insertions, 0 deletions