aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-12-19 10:39:06 +0800
committerliuhongt <hongtao.liu@intel.com>2024-01-11 09:20:17 +0800
commit6686e16fda419067b91614835dd743ebb82717ea (patch)
treefac882981a734386575829fe9bb93df289e8748d /gcc
parent96a9355a3d5b24f010fa6ad0b51bba5cc3f334f1 (diff)
downloadgcc-6686e16fda419067b91614835dd743ebb82717ea.zip
gcc-6686e16fda419067b91614835dd743ebb82717ea.tar.gz
gcc-6686e16fda419067b91614835dd743ebb82717ea.tar.bz2
Optimize A < B ? A : B to MIN_EXPR.
Similar for A < B ? B : A to MAX_EXPR. There're codes in the frontend to optimize such pattern but failed to handle testcase in the PR since it's exposed at gimple level when folding backend builtins. pr95906 now can be optimized to MAX_EXPR as it's commented in the testcase. // FIXME: this should further optimize to a MAX_EXPR typedef signed char v16i8 __attribute__((vector_size(16))); v16i8 f(v16i8 a, v16i8 b) gcc/ChangeLog: PR target/104401 * match.pd (VEC_COND_EXPR: A < B ? A : B -> MIN_EXPR): New patten match. gcc/testsuite/ChangeLog: * gcc.target/i386/pr104401.c: New test. * gcc.dg/tree-ssa/pr95906.c: Adjust testcase.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/match.pd21
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/pr95906.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr104401.c27
3 files changed, 49 insertions, 2 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index d75babd..876a9d1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -5684,6 +5684,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (VECTOR_TYPE_P (type))
(view_convert @c0)
(convert @c0))))))))
+
+/* This is for VEC_COND_EXPR
+ Optimize A < B ? A : B to MIN (A, B)
+ A > B ? A : B to MAX (A, B). */
+(for cmp (lt le ungt unge gt ge unlt unle)
+ minmax (min min min min max max max max)
+ MINMAX (MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR)
+ (simplify
+ (vec_cond (cmp @0 @1) @0 @1)
+ (if (VECTOR_INTEGER_TYPE_P (type)
+ && target_supports_op_p (type, MINMAX, optab_vector))
+ (minmax @0 @1))))
+
+(for cmp (lt le ungt unge gt ge unlt unle)
+ minmax (max max max max min min min min)
+ MINMAX (MAX_EXPR MAX_EXPR MAX_EXPR MAX_EXPR MIN_EXPR MIN_EXPR MIN_EXPR MIN_EXPR)
+ (simplify
+ (vec_cond (cmp @0 @1) @1 @0)
+ (if (VECTOR_INTEGER_TYPE_P (type)
+ && target_supports_op_p (type, MINMAX, optab_vector))
+ (minmax @0 @1))))
#endif
(for cnd (cond vec_cond)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
index 3d820a5..d15670f 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95906.c
@@ -1,7 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-forwprop3-raw -w -Wno-psabi" } */
-// FIXME: this should further optimize to a MAX_EXPR
typedef signed char v16i8 __attribute__((vector_size(16)));
v16i8 f(v16i8 a, v16i8 b)
{
@@ -10,4 +9,4 @@ v16i8 f(v16i8 a, v16i8 b)
}
/* { dg-final { scan-tree-dump-not "bit_(and|ior)_expr" "forwprop3" } } */
-/* { dg-final { scan-tree-dump-times "vec_cond_expr" 1 "forwprop3" } } */
+/* { dg-final { scan-tree-dump-times "max_expr" 1 "forwprop3" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr104401.c b/gcc/testsuite/gcc.target/i386/pr104401.c
new file mode 100644
index 0000000..8ce7ff8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr104401.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse4.1" } */
+/* { dg-final { scan-assembler-times "pminsd" 2 } } */
+/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
+
+#include <smmintrin.h>
+
+__m128i min32(__m128i value, __m128i input)
+{
+ return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(value, input));
+}
+
+__m128i max32(__m128i value, __m128i input)
+{
+ return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(value, input));
+}
+
+__m128i min32_1(__m128i value, __m128i input)
+{
+ return _mm_blendv_epi8(input, value, _mm_cmpgt_epi32(input, value));
+}
+
+__m128i max32_1(__m128i value, __m128i input)
+{
+ return _mm_blendv_epi8(input, value, _mm_cmplt_epi32(input, value));
+}
+