aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/DATESTAMP2
-rw-r--r--gcc/config/i386/i386-expand.cc44
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c71
-rw-r--r--gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c74
-rw-r--r--gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c12
6 files changed, 196 insertions, 13 deletions
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f21e9cc..2aaf995 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20250420
+20250421
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index cdfd94d..36f71eb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -19256,8 +19256,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
e1 = gen_reg_rtx (mode);
x1 = gen_reg_rtx (mode);
- /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
b = force_reg (mode, b);
/* x0 = rcp(b) estimate */
@@ -19270,20 +19268,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
UNSPEC_RCP)));
- /* e0 = x0 * b */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+ unsigned vector_size = GET_MODE_SIZE (mode);
+
+ /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+ N-R step with 2 fma implementation. */
+ if (TARGET_FMA
+ || (TARGET_AVX512F && vector_size == 64)
+ || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+ {
+ /* e0 = x0 * a */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+ /* e1 = e0 * b - a */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+ gen_rtx_NEG (mode, a))));
+ /* res = - e1 * x0 + e0 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+ gen_rtx_NEG (mode, e1),
+ x0, e0)));
+ }
+ else
+ /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+ {
+ /* e0 = x0 * b */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
- /* e0 = x0 * e0 */
- emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+ /* e1 = x0 + x0 */
+ emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
- /* e1 = x0 + x0 */
- emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+ /* e0 = x0 * e0 */
+ emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
- /* x1 = e1 - e0 */
- emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+ /* x1 = e1 - e0 */
+ emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
- /* res = a * x1 */
- emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ /* res = a * x1 */
+ emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+ }
}
/* Output code to perform a Newton-Rhapson approximation of a
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3b3c0c1..521176a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2025-04-20 H.J. Lu <hjl.tools@gmail.com>
+
+ PR target/117863
+ * gcc.dg/rtl/i386/vector_eq-2.c: New test.
+ * gcc.dg/rtl/i386/vector_eq-3.c: Likewise.
+
2025-04-19 Thomas Schwinge <tschwinge@baylibre.com>
PR testsuite/119508
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
new file mode 100644
index 0000000..871d489
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-2.c
@@ -0,0 +1,71 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <0>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V4SI <1>) (const_vector:V4SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <0>) (reg:V4SI <1>))))
+ (cinsn 6 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 7 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <0>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V8SI <1>) (const_vector:V8SI [(const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V8SI <2>)
+ (eq:V8SI (reg:V8SI <0>) (reg:V8SI <1>))))
+ (cinsn 6 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 7 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <0>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 4 (set (reg:V2DI <1>) (const_vector:V2DI [(const_int -1) (const_int -1)])))
+ (cinsn 5 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <0>) (reg:V2DI <1>))))
+ (cinsn 6 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 7 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
new file mode 100644
index 0000000..276c4c2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/i386/vector_eq-3.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
+/* { dg-additional-options "-O2 -march=x86-64-v3" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v8si __attribute__((vector_size(32)));
+typedef int v2di __attribute__((vector_size(16)));
+
+v4si __RTL (startwith ("vregs1")) foo1 (void)
+{
+(function "foo1"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V4SI <2>)
+ (eq:V4SI (reg:V4SI <1>)
+ (mem:V4SI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V4SI <3>) (reg:V4SI <2>)))
+ (cinsn 6 (set (reg:V4SI xmm0) (reg:V4SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V4SI xmm0)))
+)
+}
+
+v8si __RTL (startwith ("vregs1")) foo2 (void)
+{
+(function "foo2"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V8SI <1>)
+ (mem:V8SI (reg:SI di) [0 ptr S256 A256])))
+ (cinsn 4 (set (reg:V8SI <2>)
+ (eq:V8SI (mem:V8SI (reg:SI di) [0 ptr S256 A256])
+ (reg:V8SI <1>))))
+ (cinsn 5 (set (reg:V8SI <3>) (reg:V8SI <2>)))
+ (cinsn 6 (set (reg:V8SI xmm0) (reg:V8SI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V8SI xmm0)))
+)
+}
+
+v2di __RTL (startwith ("vregs1")) foo3 (void)
+{
+(function "foo3"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 1 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cnote 2 NOTE_INSN_FUNCTION_BEG)
+ (cinsn 3 (set (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128])))
+ (cinsn 4 (set (reg:V2DI <2>)
+ (eq:V2DI (reg:V2DI <1>)
+ (mem:V2DI (reg:SI di) [0 ptr S128 A128]))))
+ (cinsn 5 (set (reg:V2DI <3>) (reg:V2DI <2>)))
+ (cinsn 6 (set (reg:V2DI xmm0) (reg:V2DI <3>)))
+ (edge-to exit (flags "FALLTHRU"))
+ )
+ )
+ (crtl (return_rtx (reg/i:V2DI xmm0)))
+)
+}
+
+/* { dg-final { scan-assembler-times "vpcmpeq" 3 } } */
diff --git a/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
new file mode 100644
index 0000000..ad9e07b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/recip-vec-divf-fma.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mfma -mavx2" } */
+/* { dg-final { scan-assembler-times {(?n)vfn?m(add|sub)[1-3]*ps} 2 } } */
+
+typedef float v4sf __attribute__((vector_size(16)));
+/* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a */
+
+v4sf
+foo (v4sf a, v4sf b)
+{
+ return a / b;
+}