aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorVladimir Makarov <vmakarov@redhat.com>2018-01-12 17:00:36 +0000
committerVladimir Makarov <vmakarov@gcc.gnu.org>2018-01-12 17:00:36 +0000
commit0550a77b6e45a2a1c7da160ea32c518e25d5ca97 (patch)
tree941b5cfcf890eabde74a4a4c072bc64d1c2a0868 /gcc
parentf7aad330fdf138d1bc9c2744c983b9734430a856 (diff)
downloadgcc-0550a77b6e45a2a1c7da160ea32c518e25d5ca97.zip
gcc-0550a77b6e45a2a1c7da160ea32c518e25d5ca97.tar.gz
gcc-0550a77b6e45a2a1c7da160ea32c518e25d5ca97.tar.bz2
re PR rtl-optimization/80481 (Unoptimal additional copy instructions)
2018-01-12 Vladimir Makarov <vmakarov@redhat.com> PR rtl-optimization/80481 * ira-color.c (get_cap_member): New function. (allocnos_conflict_by_live_ranges_p): Use it. (slot_coalesced_allocno_live_ranges_intersect_p): Add assert. (setup_slot_coalesced_allocno_live_ranges): Ditto. 2018-01-12 Vladimir Makarov <vmakarov@redhat.com> PR rtl-optimization/80481 * g++.dg/pr80481.C: New. From-SVN: r256590
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/ira-color.c19
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/g++.dg/pr80481.C70
4 files changed, 101 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 05cc049..7483549 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2018-01-12 Vladimir Makarov <vmakarov@redhat.com>
+
+ PR rtl-optimization/80481
+ * ira-color.c (get_cap_member): New function.
+ (allocnos_conflict_by_live_ranges_p): Use it.
+ (slot_coalesced_allocno_live_ranges_intersect_p): Add assert.
+ (setup_slot_coalesced_allocno_live_ranges): Ditto.
+
2018-01-12 Uros Bizjak <ubizjak@gmail.com>
PR target/83628
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index 43f5d57..c8b6ab4 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -1905,6 +1905,18 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
/* An array used to sort copies. */
static ira_copy_t *sorted_copies;
+/* If allocno A is a cap, return non-cap allocno from which A is
+ created. Otherwise, return A. */
+static ira_allocno_t
+get_cap_member (ira_allocno_t a)
+{
+ ira_allocno_t member;
+
+ while ((member = ALLOCNO_CAP_MEMBER (a)) != NULL)
+ a = member;
+ return a;
+}
+
/* Return TRUE if live ranges of allocnos A1 and A2 intersect. It is
used to find a conflict for new allocnos or allocnos with the
different allocno classes. */
@@ -1924,6 +1936,10 @@ allocnos_conflict_by_live_ranges_p (ira_allocno_t a1, ira_allocno_t a2)
&& ORIGINAL_REGNO (reg1) == ORIGINAL_REGNO (reg2))
return false;
+ /* We don't keep live ranges for caps because they can be quite big.
+ Use ranges of non-cap allocno from which caps are created. */
+ a1 = get_cap_member (a1);
+ a2 = get_cap_member (a2);
for (i = 0; i < n1; i++)
{
ira_object_t c1 = ALLOCNO_OBJECT (a1, i);
@@ -4027,7 +4043,7 @@ slot_coalesced_allocno_live_ranges_intersect_p (ira_allocno_t allocno, int n)
{
int i;
int nr = ALLOCNO_NUM_OBJECTS (a);
-
+ gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
for (i = 0; i < nr; i++)
{
ira_object_t obj = ALLOCNO_OBJECT (a, i);
@@ -4057,6 +4073,7 @@ setup_slot_coalesced_allocno_live_ranges (ira_allocno_t allocno)
a = ALLOCNO_COALESCE_DATA (a)->next)
{
int nr = ALLOCNO_NUM_OBJECTS (a);
+ gcc_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
for (i = 0; i < nr; i++)
{
ira_object_t obj = ALLOCNO_OBJECT (a, i);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 64e14f2..b843cf6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2018-01-12 Vladimir Makarov <vmakarov@redhat.com>
+
+ PR rtl-optimization/80481
+ * g++.dg/pr80481.C: New.
+
2018-01-12 Uros Bizjak <ubizjak@gmail.com>
PR target/83628
diff --git a/gcc/testsuite/g++.dg/pr80481.C b/gcc/testsuite/g++.dg/pr80481.C
new file mode 100644
index 0000000..316da0f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr80481.C
@@ -0,0 +1,70 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" }
+// { dg-final { scan-assembler-not "vmovaps" } }
+
+#include <math.h>
+
+#include <xmmintrin.h>
+
+#define max(a, b) ( (a) > (b) ? (a) : (b) )
+
+struct Sdata {
+ float w;
+ float s;
+ float r;
+ float t;
+ float v;
+};
+ extern int N1, N2, N3;
+
+#define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down))
+
+void foo (Sdata *in, int idx, float *out)
+{
+ float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y2 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y3 = (float*)_mm_malloc(sizeof(float) * N1,16);
+ float* y4 = (float*)_mm_malloc(sizeof(float) * N1,16);
+
+ for (int k = idx; k < idx + N3; k++) {
+ float x1 = in[k].r;
+ float x2 = in[k].s;
+ float x3 = in[k].w;
+ float x4 = in[k].v;
+ float x5 = in[k].t;
+ x5 /= N2;
+ float u = exp(x4 * sqrt(x5));
+ float d = exp(-x4 * sqrt(x5));
+ float a = exp(x1 * x5);
+ float m = exp(-x1 * x5);
+ float p = (a - d) / (u - d);
+ y2[0] = x2;
+ y3[0] = float(1.f);
+ for (int i = 1; i <= N2; i++) {
+ y2[i] = u * y2[i - 1];
+ y3[i] = d * y3[i - 1];
+ }
+#pragma omp simd
+ for (int i = 0; i <= N2; i++) {
+ y1[i] =
+ max((x3 - y2[N2 - i] * y3[i]), float(0.f));
+ }
+ for (int i = N2 - 1; i >= 0; i--) {
+#pragma omp simd
+ for (int j = 0; j <= i; j++) {
+ y4[j] = func(p,y1[j],y1[j+1]) * m;
+ }
+#pragma omp simd
+ for (int j = 0; j <= i; j++) {
+ float t1 = y2[i - j] * y3[j];
+ float t2 = max(x3 - t1, float(0.f));
+ y1[j] = max(t2, y4[j]);
+ }
+ }
+ out[k] = y1[0];
+ }
+ _mm_free(y1);
+ _mm_free(y2);
+ _mm_free(y3);
+ _mm_free(y4);
+}