diff options
author | Richard Biener <rguenther@suse.de> | 2021-04-01 09:29:14 +0200 |
---|---|---|
committer | Richard Biener <rguenther@suse.de> | 2021-04-01 10:23:25 +0200 |
commit | b75c4e1384c021ca94fc8e8db8e517e802b820f3 (patch) | |
tree | fd4e8bbf8d3e46b610e4684f7dc90569f7fc962a | |
parent | 19d71674616e6494a60432a2a28adcd762a6c877 (diff) | |
download | gcc-b75c4e1384c021ca94fc8e8db8e517e802b820f3.zip gcc-b75c4e1384c021ca94fc8e8db8e517e802b820f3.tar.gz gcc-b75c4e1384c021ca94fc8e8db8e517e802b820f3.tar.bz2 |
tree-optimization/99856 - fix overwideing pattern creation
This fixes an omission of promoting a bit-precision required precision
to a vector element precision.
2021-04-01 Richard Biener <rguenther@suse.de>
PR tree-optimization/99856
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Promote
precision to vector element precision.
* gcc.dg/vect/pr99856.c: New testcase.
-rw-r--r-- | gcc/testsuite/gcc.dg/vect/pr99856.c | 33 | ||||
-rw-r--r-- | gcc/tree-vect-patterns.c | 1 |
2 files changed, 34 insertions, 0 deletions
diff --git a/gcc/testsuite/gcc.dg/vect/pr99856.c b/gcc/testsuite/gcc.dg/vect/pr99856.c new file mode 100644 index 0000000..e5d2a45 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr99856.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target vect_unpack } */ +/* { dg-require-effective-target vect_pack_trunc } */ + +#define SHIFTFORDIV255(a)\ + ((((a) >> 8) + a) >> 8) + +#define DIV255(a)\ + SHIFTFORDIV255(a + 0x80) + +typedef unsigned char uint8_t; + +void +opSourceOver_premul(uint8_t* restrict Rrgba, + const uint8_t* restrict Srgba, + const uint8_t* restrict Drgba, int len) +{ + Rrgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + Srgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + Drgba = __builtin_assume_aligned (Rrgba, __BIGGEST_ALIGNMENT__); + int i = 0; + for (; i < len*4; i += 4) + { + uint8_t Sa = Srgba[i + 3]; + Rrgba[i + 0] = DIV255(Srgba[i + 0] * 255 + Drgba[i + 0] * (255 - Sa)); + Rrgba[i + 1] = DIV255(Srgba[i + 1] * 255 + Drgba[i + 1] * (255 - Sa)); + Rrgba[i + 2] = DIV255(Srgba[i + 2] * 255 + Drgba[i + 2] * (255 - Sa)); + Rrgba[i + 3] = DIV255(Srgba[i + 3] * 255 + Drgba[i + 3] * (255 - Sa)); + } +} + +/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */ diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index b575b45..803de3f 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1705,6 +1705,7 @@ vect_recog_over_widening_pattern (vec_info *vinfo, /* Apply the minimum efficient precision we just calculated. */ if (new_precision < min_precision) new_precision = min_precision; + new_precision = vect_element_precision (new_precision); if (new_precision >= TYPE_PRECISION (type)) return NULL; |