aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2023-10-18 09:32:55 +0100
committerTamar Christina <tamar.christina@arm.com>2023-10-18 09:53:47 +0100
commit4b39aeef594f311e2c1715f15608f1d7ebc2d868 (patch)
treebc0f36b93c71bb595b54948fd82d3ecc4cc6bb7d
parentb588dcb77e96d77777eb5647cba9e8f454e314dc (diff)
downloadgcc-4b39aeef594f311e2c1715f15608f1d7ebc2d868.zip
gcc-4b39aeef594f311e2c1715f15608f1d7ebc2d868.tar.gz
gcc-4b39aeef594f311e2c1715f15608f1d7ebc2d868.tar.bz2
middle-end: Fold vec_cond into conditional ternary or binary operation when sharing operand [PR109154]
When we have a vector conditional on a masked target which is doing a selection on the result of a conditional operation where one of the operands of the conditional operation is the other operand of the select, then we can fold the vector conditional into the operation. Concretely this transforms c = mask1 ? (masked_op mask2 a b) : b into c = masked_op (mask1 & mask2) a b The mask is then propagated upwards by the compiler. In the SVE case we don't end up needing a mask AND here since `mask2` will end up in the instruction creating `mask` which gives us a natural &. Such transformations are more common now in GCC 13+ as PRE has not started unsharing of common code in case it can make one branch fully independent. e.g. in this case `b` becomes a loop invariant value after PRE. This transformation removes the extra select for masked architectures but doesn't fix the general case. gcc/ChangeLog: PR tree-optimization/109154 * match.pd: Add new cond_op rule. gcc/testsuite/ChangeLog: PR tree-optimization/109154 * gcc.target/aarch64/sve/pre_cond_share_1.c: New test.
-rw-r--r--gcc/match.pd24
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c132
2 files changed, 156 insertions, 0 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 067328a..a56838f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8970,6 +8970,30 @@ and,
&& fold_real_zero_addition_p (type, NULL_TREE, @5, 0)))
(IFN_COND_LEN_ADD @1 @0 @2 @0 @3 @4)))
+/* Detect simplification for vector condition folding where
+
+ c = mask1 ? (masked_op mask2 a b) : b
+
+ into
+
+ c = masked_op (mask1 & mask2) a b
+
+ where the operation can be partially applied to one operand. */
+
+(for cond_op (COND_BINARY)
+ (simplify
+ (vec_cond @0
+ (cond_op:s @1 @2 @3 @4) @3)
+ (cond_op (bit_and @1 @0) @2 @3 @4)))
+
+/* And same for ternary expressions. */
+
+(for cond_op (COND_TERNARY)
+ (simplify
+ (vec_cond @0
+ (cond_op:s @1 @2 @3 @4 @5) @4)
+ (cond_op (bit_and @1 @0) @2 @3 @4 @5)))
+
/* For pointers @0 and @2 and nonnegative constant offset @1, look for
expressions like:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
new file mode 100644
index 0000000..b51d0f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pre_cond_share_1.c
@@ -0,0 +1,132 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fdump-tree-optimized" } */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <math.h>
+#include <float.h>
+
+typedef struct __attribute__((__packed__)) _Atom {
+ float x, y, z;
+ int32_t type;
+} Atom;
+
+typedef struct __attribute__((__packed__)) _FFParams {
+ int32_t hbtype;
+ float radius;
+ float hphb;
+ float elsc;
+} FFParams;
+
+#ifndef PPWI
+#define PPWI (64)
+#endif
+
+#ifndef ITERS
+#define ITERS 8
+#endif
+
+#define DIFF_TOLERANCE_PCT 0.025f
+
+#define POSES_SIZE 393216
+#define PROTEIN_SIZE 938
+#define LIGAND_SIZE 26
+#define FORCEFIELD_SIZE 34
+
+#define ZERO 0.0f
+#define QUARTER 0.25f
+#define HALF 0.5f
+#define ONE 1.0f
+#define TWO 2.0f
+#define FOUR 4.0f
+#define CNSTNT 45.0f
+
+// Energy evaluation parameters
+#define HBTYPE_F 70
+#define HBTYPE_E 69
+#define HARDNESS 38.0f
+#define NPNPDIST 5.5f
+#define NPPDIST 1.0f
+
+void
+fasten_main(size_t group, size_t ntypes, size_t nposes, size_t natlig, size_t natpro, //
+ const Atom *protein, const Atom *ligand, //
+ const float *transforms_0, const float *transforms_1, const float *transforms_2, //
+ const float *transforms_3, const float *transforms_4, const float *transforms_5, //
+ const FFParams *forcefield, float *energies //
+) {
+
+ float etot[PPWI];
+ float lpos_x[PPWI];
+
+ for (int l = 0; l < PPWI; l++) {
+ etot[l] = 0.f;
+ lpos_x[l] = 0.f;
+ }
+
+ // Loop over ligand atoms
+ for (int il = 0; il < natlig; il++) {
+ // Load ligand atom data
+ const Atom l_atom = ligand[il];
+ const FFParams l_params = forcefield[l_atom.type];
+ const int lhphb_ltz = l_params.hphb < 0.f;
+ const int lhphb_gtz = l_params.hphb > 0.f;
+
+ // Transform ligand atom
+
+ // Loop over protein atoms
+ for (int ip = 0; ip < natpro; ip++) {
+ // Load protein atom data
+ const Atom p_atom = protein[ip];
+ const FFParams p_params = forcefield[p_atom.type];
+
+ const float radij = p_params.radius + l_params.radius;
+ const float r_radij = ONE / radij;
+
+ const float elcdst = (p_params.hbtype == HBTYPE_F && l_params.hbtype == HBTYPE_F) ? FOUR
+ : TWO;
+ const float elcdst1 = (p_params.hbtype == HBTYPE_F && l_params.hbtype == HBTYPE_F)
+ ? QUARTER : HALF;
+ const int type_E = ((p_params.hbtype == HBTYPE_E || l_params.hbtype == HBTYPE_E));
+
+ const int phphb_ltz = p_params.hphb < 0.f;
+ const int phphb_gtz = p_params.hphb > 0.f;
+ const int phphb_nz = p_params.hphb != 0.f;
+ const float p_hphb = p_params.hphb * (phphb_ltz && lhphb_gtz ? -ONE : ONE);
+ const float l_hphb = l_params.hphb * (phphb_gtz && lhphb_ltz ? -ONE : ONE);
+ const float distdslv = (phphb_ltz ? (lhphb_ltz ? NPNPDIST : NPPDIST) : (lhphb_ltz
+ ? NPPDIST
+ : -FLT_MAX));
+ const float r_distdslv = ONE / distdslv;
+
+ const float chrg_init = l_params.elsc * p_params.elsc;
+ const float dslv_init = p_hphb + l_hphb;
+
+ for (int l = 0; l < PPWI; l++) {
+ // Calculate distance between atoms
+ const float x = lpos_x[l] - p_atom.x;
+ const float distij = (x * x);
+
+ // Calculate the sum of the sphere radii
+ const float distbb = distij - radij;
+
+ const int zone1 = (distbb < ZERO);
+
+ // Calculate formal and dipole charge interactions
+ float chrg_e = chrg_init * ((zone1 ? ONE : (ONE - distbb * elcdst1)) *
+ (distbb < elcdst ? ONE : ZERO));
+ float neg_chrg_e = -fabsf(chrg_e);
+ chrg_e = type_E ? neg_chrg_e : chrg_e;
+ etot[l] += chrg_e * CNSTNT;
+ }
+ }
+ }
+
+ // Write result
+ for (int l = 0; l < PPWI; l++) {
+ energies[group * PPWI + l] = etot[l] * HALF;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times {\.COND_MUL} 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times {\.VCOND} 1 "optimized" } } */