aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hongjiu.lu@intel.com>2010-01-04 15:14:31 +0000
committerH.J. Lu <hjl@gcc.gnu.org>2010-01-04 07:14:31 -0800
commit22554cf9376fc1253adb75a290c4a38f82562b5c (patch)
treeb2043ea8607dfc88c5b0002568c4d524baae9392
parent7152f51efdcd5f6d9643dc182055873f3eb5a4bd (diff)
downloadgcc-22554cf9376fc1253adb75a290c4a38f82562b5c.zip
gcc-22554cf9376fc1253adb75a290c4a38f82562b5c.tar.gz
gcc-22554cf9376fc1253adb75a290c4a38f82562b5c.tar.bz2
Don't convert GTU to GT for V4SI and V2DI
gcc/ 2010-01-04 H.J. Lu <hongjiu.lu@intel.com> PR target/42542 * config/i386/i386.c (ix86_expand_int_vcond): Don't convert GTU to GT for V4SI and V2DI. * config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP. (umin<mode>3): Removed. (uminv8hi3): New. (uminv4si3): Likewise. gcc/testsuite/ 2010-01-04 H.J. Lu <hongjiu.lu@intel.com> PR target/42542 * gcc.target/i386/pr42542-1.c: New. * gcc.target/i386/pr42542-1a.c: Likewise. * gcc.target/i386/pr42542-1b.c: Likewise. * gcc.target/i386/pr42542-2.c: Likewise. * gcc.target/i386/pr42542-2a.c: Likewise. * gcc.target/i386/pr42542-2b.c: Likewise. * gcc.target/i386/pr42542-3.c: Likewise. * gcc.target/i386/pr42542-3a.c: Likewise. From-SVN: r155618
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/i386/i386.c35
-rw-r--r--gcc/config/i386/sse.md39
-rw-r--r--gcc/testsuite/ChangeLog12
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-1.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-1a.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-1b.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-2.c77
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-2a.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-2b.c10
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-3.c85
-rw-r--r--gcc/testsuite/gcc.target/i386/pr42542-3a.c7
12 files changed, 339 insertions, 40 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 807a34b..11adcd0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
+ PR target/42542
+ * config/i386/i386.c (ix86_expand_int_vcond): Don't convert
+ GTU to GT for V4SI and V2DI.
+
+ * config/i386/sse.md (umaxv4si3): Enabled for SSE4.1 and XOP.
+ (umin<mode>3): Removed.
+ (uminv8hi3): New.
+ (uminv4si3): Likewise.
+
+2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
+
PR lto/42581
* collect2.c (main): Turn on trace in collect2 if -v is passed
to gcc with LTO.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3e15b9d..4d6d51c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1,6 +1,6 @@
/* Subroutines used for code generation on IA-32.
- Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+ 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GCC.
@@ -16252,37 +16252,6 @@ ix86_expand_int_vcond (rtx operands[])
switch (mode)
{
- case V4SImode:
- case V2DImode:
- {
- rtx t1, t2, mask;
-
- /* Perform a parallel modulo subtraction. */
- t1 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_subv4si3
- : gen_subv2di3) (t1, cop0, cop1));
-
- /* Extract the original sign bit of op0. */
- mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
- true, false);
- t2 = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_andv4si3
- : gen_andv2di3) (t2, cop0, mask));
-
- /* XOR it back into the result of the subtraction.
- This results in the sign bit set iff we saw
- unsigned underflow. */
- x = gen_reg_rtx (mode);
- emit_insn ((mode == V4SImode
- ? gen_xorv4si3
- : gen_xorv2di3) (x, t1, t2));
-
- code = GT;
- }
- break;
-
case V16QImode:
case V8HImode:
/* Perform a parallel unsigned saturating subtraction. */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 9bbea80..3461b8d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1,5 +1,5 @@
;; GCC machine description for SSE instructions
-;; Copyright (C) 2005, 2006, 2007, 2008, 2009
+;; Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
@@ -6138,7 +6138,7 @@
[(set (match_operand:V4SI 0 "register_operand" "")
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
- "TARGET_SSE2"
+ "TARGET_SSE4_1 || TARGET_XOP"
{
if (TARGET_SSE4_1)
ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
@@ -6195,14 +6195,39 @@
}
})
-(define_expand "umin<mode>3"
- [(set (match_operand:SSEMODE24 0 "register_operand" "")
- (umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
- (match_operand:SSEMODE24 2 "register_operand" "")))]
+(define_expand "uminv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (umin:V8HI (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1)
- ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
+ ix86_fixup_binary_operands_no_copy (UMIN, V8HImode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
+})
+
+(define_expand "uminv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (umin:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE4_1 || TARGET_XOP"
+{
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMIN, V4SImode, operands);
else
{
rtx xops[6];
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c16e2d4..44f692c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,15 @@
+2010-01-04 H.J. Lu <hongjiu.lu@intel.com>
+
+ PR target/42542
+ * gcc.target/i386/pr42542-1.c: New.
+ * gcc.target/i386/pr42542-1a.c: Likewise.
+ * gcc.target/i386/pr42542-1b.c: Likewise.
+ * gcc.target/i386/pr42542-2.c: Likewise.
+ * gcc.target/i386/pr42542-2a.c: Likewise.
+ * gcc.target/i386/pr42542-2b.c: Likewise.
+ * gcc.target/i386/pr42542-3.c: Likewise.
+ * gcc.target/i386/pr42542-3a.c: Likewise.
+
2009-01-04 Tobias Burnus <burnus@net-b.de>
PR fortran/41872
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1.c b/gcc/testsuite/gcc.target/i386/pr42542-1.c
new file mode 100644
index 0000000..6e115c3f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned int v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80000000, 1, 0xa0000000, 2,
+ 3, 0xd0000000, 0xf0000000, 0xe0000000
+};
+unsigned int v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 0xb0000000, 5, 0xc0000000,
+ 0xd0000000, 6, 7, 8
+};
+
+unsigned int max[] =
+{
+ 0x80000000, 0xb0000000, 0xa0000000, 0xc0000000,
+ 0xd0000000, 0xd0000000, 0xf0000000, 0xe0000000
+};
+
+unsigned int min[] =
+{
+ 4, 1, 5, 2,
+ 3, 6, 7, 8
+};
+
+unsigned int res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 8; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1a.c b/gcc/testsuite/gcc.target/i386/pr42542-1a.c
new file mode 100644
index 0000000..cd77175
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1a.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-1b.c b/gcc/testsuite/gcc.target/i386/pr42542-1b.c
new file mode 100644
index 0000000..7651f07
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-1b.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-1.c"
+
+/* { dg-final { scan-assembler "pmaxud" } } */
+/* { dg-final { scan-assembler "pminud" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2.c b/gcc/testsuite/gcc.target/i386/pr42542-2.c
new file mode 100644
index 0000000..fc59534
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned short v1[] __attribute__ ((aligned(16))) =
+{
+ 0x8000, 0x9000, 1, 10, 0xa000, 0xb000, 2, 20,
+ 3, 30, 0xd000, 0xe000, 0xf000, 0xe000, 25, 30
+};
+unsigned short v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 40, 0xb000, 0x8000, 5, 50, 0xc000, 0xf000,
+ 0xd000, 0xa000, 6, 65, 7, 75, 0xe000, 0xc000
+};
+
+unsigned short max[] =
+{
+ 0x8000, 0x9000, 0xb000, 0x8000, 0xa000, 0xb000, 0xc000, 0xf000,
+ 0xd000, 0xa000, 0xd000, 0xe000, 0xf000, 0xe000, 0xe000, 0xc000
+};
+
+unsigned short min[] =
+{
+ 4, 40, 1, 10, 5, 50, 2, 20,
+ 3, 30, 6, 65, 7, 75, 25, 30
+};
+
+unsigned short res[16] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 16; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 16; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2a.c b/gcc/testsuite/gcc.target/i386/pr42542-2a.c
new file mode 100644
index 0000000..bcefa9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2a.c
@@ -0,0 +1,8 @@
+/* { dg-do run } */
+/* { dg-require-effective-target sse4 } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-2b.c b/gcc/testsuite/gcc.target/i386/pr42542-2b.c
new file mode 100644
index 0000000..ddb539b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-2b.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse4.1 -ftree-vectorize" } */
+
+#define CHECK_H "sse4_1-check.h"
+#define TEST sse4_1_test
+
+#include "pr42542-2.c"
+
+/* { dg-final { scan-assembler "pmaxuw" } } */
+/* { dg-final { scan-assembler "pminuw" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3.c b/gcc/testsuite/gcc.target/i386/pr42542-3.c
new file mode 100644
index 0000000..028d2f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-3.c
@@ -0,0 +1,85 @@
+/* { dg-do run } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#ifndef CHECK_H
+#define CHECK_H "sse2-check.h"
+#endif
+
+#ifndef TEST
+#define TEST sse2_test
+#endif
+
+#include CHECK_H
+
+unsigned char v1[] __attribute__ ((aligned(16))) =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 1, 15, 10, 15,
+ 0xa0, 0xc0, 0xb0, 0xf0, 2, 25, 20, 35,
+ 3, 34, 30, 36, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 25, 34, 30, 40
+};
+unsigned char v2[] __attribute__ ((aligned(16))) =
+{
+ 4, 44, 40, 48, 0xb0, 0x80, 0x80, 0x90,
+ 5, 55, 50, 51, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 6, 61, 65, 68,
+ 7, 76, 75, 81, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char max[] =
+{
+ 0x80, 0xd0, 0x90, 0xa0, 0xb0, 0x80, 0x80, 0x90,
+ 0xa0, 0xc0, 0xb0, 0xf0, 0xc0, 0xb0, 0xf0, 0xd0,
+ 0xd0, 0x80, 0xa0, 0xf0, 0xd0, 0x80, 0xe0, 0xb0,
+ 0xf0, 0xe0, 0xe0, 0x80, 0xe0, 0xf0, 0xc0, 0x90
+};
+
+unsigned char min[] =
+{
+ 4, 44, 40, 48, 1, 15, 10, 15,
+ 5, 55, 50, 51, 2, 25, 20, 35,
+ 3, 34, 30, 36, 6, 61, 65, 68,
+ 7, 76, 75, 81, 25, 34, 30, 40
+};
+
+unsigned char res[32] __attribute__ ((aligned(16)));
+
+extern void abort (void);
+
+void
+find_max (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] < v2[i] ? v2[i] : v1[i];
+}
+
+void
+find_min (void)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ res[i] = v1[i] > v2[i] ? v2[i] : v1[i];
+}
+
+static void
+TEST (void)
+{
+ int i;
+ int err = 0;
+
+ find_max ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != max[i])
+ err++;
+
+ find_min ();
+ for (i = 0; i < 32; i++)
+ if (res[i] != min[i])
+ err++;
+
+ if (err)
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr42542-3a.c b/gcc/testsuite/gcc.target/i386/pr42542-3a.c
new file mode 100644
index 0000000..754e59e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr42542-3a.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -msse2 -ftree-vectorize" } */
+
+#include "pr42542-3.c"
+
+/* { dg-final { scan-assembler "pmaxub" } } */
+/* { dg-final { scan-assembler "pminub" } } */