From 48f0f297774bbc38ae8cb8bf12212e124fe479ad Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 17 Apr 2023 13:52:56 +0200 Subject: Bump BASE-VER. 2023-04-17 Jakub Jelinek * BASE-VER: Set to 14.0.0. --- gcc/BASE-VER | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/BASE-VER b/gcc/BASE-VER index 5cb7d85..4b964e9 100644 --- a/gcc/BASE-VER +++ b/gcc/BASE-VER @@ -1 +1 @@ -13.0.1 +14.0.0 -- cgit v1.1 From 2a7f0eb7340599386daf48dbea0892056eaabea2 Mon Sep 17 00:00:00 2001 From: Martin Liska Date: Mon, 17 Apr 2023 14:50:19 +0200 Subject: ada: bump Library_Version to 14. gcc/ada/ChangeLog: * gnatvsn.ads: Bump Library_Version to 14. --- gcc/ada/gnatvsn.ads | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ada/gnatvsn.ads b/gcc/ada/gnatvsn.ads index b6edc9d..a0e61e1 100644 --- a/gcc/ada/gnatvsn.ads +++ b/gcc/ada/gnatvsn.ads @@ -32,7 +32,7 @@ package Gnatvsn is -- Static string identifying this version, that can be used as an argument -- to e.g. pragma Ident. - Library_Version : constant String := "13"; + Library_Version : constant String := "14"; -- Library version. It needs to be updated whenever the major version -- number is changed. -- -- cgit v1.1 From 0ccf520d349a82dafca0deb3d307a1080e8589a0 Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Sat, 15 Apr 2023 10:11:15 -0600 Subject: RISC-V: Optimze the reverse conditions of rotate shift gcc/ChangeLog: * config/riscv/bitmanip.md (rotrsi3_sext): Support generating roriw for constant counts. * rtl.h (reverse_rotate_by_imm_p): Add function declartion * simplify-rtx.cc (reverse_rotate_by_imm_p): New function. (simplify_context::simplify_binary_operation_1): Use it. * expmed.cc (expand_shift_1): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/zbb-rol-ror-04.c: New test. * gcc.target/riscv/zbb-rol-ror-05.c: New test. * gcc.target/riscv/zbb-rol-ror-06.c: New test. * gcc.target/riscv/zbb-rol-ror-07.c: New test. --- gcc/config/riscv/bitmanip.md | 4 +- gcc/expmed.cc | 12 ++--- gcc/rtl.h | 1 + gcc/simplify-rtx.cc | 49 +++++++++++++++---- gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c | 52 ++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c | 24 ++++++++++ gcc/testsuite/gcc.target/riscv/zbb-rol-ror-06.c | 36 ++++++++++++++ gcc/testsuite/gcc.target/riscv/zbb-rol-ror-07.c | 64 +++++++++++++++++++++++++ 8 files changed, 224 insertions(+), 18 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-06.c create mode 100644 gcc/testsuite/gcc.target/riscv/zbb-rol-ror-07.c (limited to 'gcc') diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 7aa5916..062968d 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -322,9 +322,9 @@ (define_insn "*rotrsi3_sext" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI (rotatert:SI (match_operand:SI 1 "register_operand" "r") - (match_operand:QI 2 "register_operand" "r"))))] + (match_operand:QI 2 "arith_operand" "rI"))))] "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)" - "rorw\t%0,%1,%2" + "ror%i2%~\t%0,%1,%2" [(set_attr "type" "bitmanip")]) (define_insn "rotlsi3" diff --git a/gcc/expmed.cc b/gcc/expmed.cc index 1553ea8..fbd4ce2 100644 --- a/gcc/expmed.cc +++ b/gcc/expmed.cc @@ -2535,14 +2535,10 @@ expand_shift_1 (enum tree_code code, machine_mode mode, rtx shifted, op1 = SUBREG_REG (op1); } - /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, - prefer left rotation, if op1 is from bitsize / 2 + 1 to - bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 - amount instead. */ - if (rotate - && CONST_INT_P (op1) - && IN_RANGE (INTVAL (op1), GET_MODE_BITSIZE (scalar_mode) / 2 + left, - GET_MODE_BITSIZE (scalar_mode) - 1)) + /* Canonicalize rotates by constant amount. We may canonicalize + to reduce the immediate or if the ISA can rotate by constants + in only on direction. */ + if (rotate && reverse_rotate_by_imm_p (scalar_mode, left, op1)) { op1 = gen_int_shift_amount (mode, (GET_MODE_BITSIZE (scalar_mode) - INTVAL (op1))); diff --git a/gcc/rtl.h b/gcc/rtl.h index 52f0419..60852ae 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3566,6 +3566,7 @@ extern bool val_signbit_known_set_p (machine_mode, unsigned HOST_WIDE_INT); extern bool val_signbit_known_clear_p (machine_mode, unsigned HOST_WIDE_INT); +extern bool reverse_rotate_by_imm_p (machine_mode, unsigned int, rtx); /* In reginfo.cc */ extern machine_mode choose_hard_reg_mode (unsigned int, unsigned int, diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index ee75079..c57ff33 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -2741,6 +2741,44 @@ simplify_context::simplify_distributive_operation (rtx_code code, return NULL_RTX; } +/* Return TRUE if a rotate in mode MODE with a constant count in OP1 + should be reversed. + + If the rotate should not be reversed, return FALSE. + + LEFT indicates if this is a rotate left or a rotate right. */ + +bool +reverse_rotate_by_imm_p (machine_mode mode, unsigned int left, rtx op1) +{ + if (!CONST_INT_P (op1)) + return false; + + /* Some targets may only be able to rotate by a constant + in one direction. So we need to query the optab interface + to see what is possible. */ + optab binoptab = left ? rotl_optab : rotr_optab; + optab re_binoptab = left ? rotr_optab : rotl_optab; + enum insn_code icode = optab_handler (binoptab, mode); + enum insn_code re_icode = optab_handler (re_binoptab, mode); + + /* If the target can not support the reversed optab, then there + is nothing to do. */ + if (re_icode == CODE_FOR_nothing) + return false; + + /* If the target does not support the requested rotate-by-immediate, + then we want to try reversing the rotate. We also want to try + reversing to minimize the count. */ + if ((icode == CODE_FOR_nothing) + || (!insn_operand_matches (icode, 2, op1)) + || (IN_RANGE (INTVAL (op1), + GET_MODE_UNIT_PRECISION (mode) / 2 + left, + GET_MODE_UNIT_PRECISION (mode) - 1))) + return (insn_operand_matches (re_icode, 2, op1)); + return false; +} + /* Subroutine of simplify_binary_operation. Simplify a binary operation CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the @@ -4098,15 +4136,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, case ROTATE: if (trueop1 == CONST0_RTX (mode)) return op0; - /* Canonicalize rotates by constant amount. If op1 is bitsize / 2, - prefer left rotation, if op1 is from bitsize / 2 + 1 to - bitsize - 1, use other direction of rotate with 1 .. bitsize / 2 - 1 - amount instead. */ + /* Canonicalize rotates by constant amount. If the condition of + reversing direction is met, then reverse the direction. */ #if defined(HAVE_rotate) && defined(HAVE_rotatert) - if (CONST_INT_P (trueop1) - && IN_RANGE (INTVAL (trueop1), - GET_MODE_UNIT_PRECISION (mode) / 2 + (code == ROTATE), - GET_MODE_UNIT_PRECISION (mode) - 1)) + if (reverse_rotate_by_imm_p (mode, (code == ROTATE), trueop1)) { int new_amount = GET_MODE_UNIT_PRECISION (mode) - INTVAL (trueop1); rtx new_amount_rtx = gen_int_shift_amount (mode, new_amount); diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c new file mode 100644 index 0000000..0805348 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-04.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64d -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +**foo1: +** rori a0,a0,34 +** ret +*/ +unsigned long foo1 (unsigned long rs1) +{ return (rs1 >> (34)) | (rs1 << 30); } + +/* +**foo2: +** rori a0,a0,54 +** ret +*/ +unsigned long foo2(unsigned long rs1) +{ + return (rs1 << 10) | (rs1 >> 54); +} + +/* +**foo3: +** roriw a0,a0,20 +** ret +*/ +unsigned int foo3(unsigned int rs1) +{ + return (rs1 >> 20) | (rs1 << 12); +} + +/* +**foo4: +** roriw a0,a0,22 +** ret +*/ +unsigned int foo4(unsigned int rs1) +{ + return (rs1 << 10) | (rs1 >> 22); +} + +/* +**foo5: +** rorw a0,a0,a1 +** ret +*/ +unsigned int foo5(unsigned int rs1, unsigned int rs2) +{ + return (rs1 >> rs2) | (rs1 << (32 - rs2)); +} diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c new file mode 100644 index 0000000..85090b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-05.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zbb -mabi=ilp32 -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +**foo1: +** rori a0,a0,20 +** ret +*/ +unsigned int foo1(unsigned int rs1) +{ + return (rs1 >> 20) | (rs1 << 12); +} + +/* +**foo2: +** rori a0,a0,22 +** ret +*/ +unsigned int foo2(unsigned int rs1) +{ + return (rs1 << 10) | (rs1 >> 22); +} diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-06.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-06.c new file mode 100644 index 0000000..70b79ab --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-06.c @@ -0,0 +1,36 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64d -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +**foo1: +** roriw a0,a0,14 +** ret +*/ +unsigned int foo1 (unsigned int rs1) +{ return ((rs1 >> 14) | (rs1 << 18)); } + +/* +**foo2: +** roriw a0,a0,18 +** ret +*/ +unsigned int foo2 (unsigned int rs1) +{ return ((rs1 >> 18) | (rs1 << 14)); } + +/* +**foo3: +** roriw a0,a0,18 +** ret +*/ +unsigned int foo3 (unsigned int rs1) +{ return ((rs1 << 14) | (rs1 >> 18)); } + +/* +**foo4: +** roriw a0,a0,14 +** ret +*/ +unsigned int foo4 (unsigned int rs1) +{ return ((rs1 << 18) | (rs1 >> 14)); } \ No newline at end of file diff --git a/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-07.c b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-07.c new file mode 100644 index 0000000..3b6ab38 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zbb-rol-ror-07.c @@ -0,0 +1,64 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zbb -mabi=lp64d -fno-lto -O2" } */ +/* { dg-skip-if "" { *-*-* } { "-g" } } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +**foo1: +** rori a0,a0,34 +** ret +*/ +unsigned long foo1 (unsigned long rs1) +{ + unsigned long tempt; + tempt = rs1 >> 30; + tempt = tempt << 2; + tempt = tempt >> 6; + rs1 = tempt | (rs1 << 30); + return rs1 ; +} + +/* +**foo2: +** rori a0,a0,24 +** ret +*/ +unsigned long foo2 (unsigned long rs1) +{ + unsigned long tempt; + tempt = rs1 >> 20; + tempt = tempt << 2; + tempt = tempt >> 6; + rs1 = tempt | (rs1 << 40); + return rs1 ; +} + +/* +**foo3: +** rori a0,a0,40 +** ret +*/ +unsigned long foo3 (unsigned long rs1) +{ + unsigned long tempt; + tempt = rs1 << 20; + tempt = tempt >> 2; + tempt = tempt << 6; + rs1 = tempt | (rs1 >> 40); + return rs1 ; +} + +/* +**foo4: +** rori a0,a0,20 +** ret +*/ +unsigned long foo4 (unsigned long rs1) +{ + unsigned long tempt; + tempt = rs1 << 40; + tempt = tempt >> 2; + tempt = tempt << 6; + rs1 = tempt | (rs1 >> 20); + return rs1 ; +} \ No newline at end of file -- cgit v1.1 From a782346757c54a5a3cfb9f416a7ebe3554a617d7 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Mon, 17 Apr 2023 12:07:01 -0600 Subject: RISC-V: add a new parameter in riscv_first_stack_step. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_first_stack_step): Add a new function parameter remaining_size. (riscv_compute_frame_info): Adapt new riscv_first_stack_step interface. (riscv_expand_prologue): Likewise. (riscv_expand_epilogue): Likewise. --- gcc/config/riscv/riscv.cc | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e88fa2d..e4937d1 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -4834,7 +4834,7 @@ riscv_save_libcall_count (unsigned mask) They decrease stack_pointer_rtx but leave frame_pointer_rtx and hard_frame_pointer_rtx unchanged. */ -static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame); +static HOST_WIDE_INT riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size); /* Handle stack align for poly_int. */ static poly_int64 @@ -4863,7 +4863,7 @@ riscv_compute_frame_info (void) save/restore t0. We check for this before clearing the frame struct. */ if (cfun->machine->interrupt_handler_p) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); if (! POLY_SMALL_OPERAND_P ((frame->total_size - step1))) interrupt_save_prologue_temp = true; } @@ -5182,31 +5182,31 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn, without adding extra instructions. */ static HOST_WIDE_INT -riscv_first_stack_step (struct riscv_frame_info *frame) +riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_size) { - HOST_WIDE_INT frame_total_constant_size; - if (!frame->total_size.is_constant ()) - frame_total_constant_size - = riscv_stack_align (frame->total_size.coeffs[0]) - - riscv_stack_align (frame->total_size.coeffs[1]); + HOST_WIDE_INT remaining_const_size; + if (!remaining_size.is_constant ()) + remaining_const_size + = riscv_stack_align (remaining_size.coeffs[0]) + - riscv_stack_align (remaining_size.coeffs[1]); else - frame_total_constant_size = frame->total_size.to_constant (); + remaining_const_size = remaining_size.to_constant (); - if (SMALL_OPERAND (frame_total_constant_size)) - return frame_total_constant_size; + if (SMALL_OPERAND (remaining_const_size)) + return remaining_const_size; HOST_WIDE_INT min_first_step = - RISCV_STACK_ALIGN ((frame->total_size - frame->frame_pointer_offset).to_constant()); + riscv_stack_align ((remaining_size - frame->frame_pointer_offset).to_constant()); HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; - HOST_WIDE_INT min_second_step = frame_total_constant_size - max_first_step; + HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; gcc_assert (min_first_step <= max_first_step); /* As an optimization, use the least-significant bits of the total frame size, so that the second adjustment step is just LUI + ADD. */ if (!SMALL_OPERAND (min_second_step) - && frame_total_constant_size % IMM_REACH < IMM_REACH / 2 - && frame_total_constant_size % IMM_REACH >= min_first_step) - return frame_total_constant_size % IMM_REACH; + && remaining_const_size % IMM_REACH < IMM_REACH / 2 + && remaining_const_size % IMM_REACH >= min_first_step) + return remaining_const_size % IMM_REACH; if (TARGET_RVC) { @@ -5214,13 +5214,13 @@ riscv_first_stack_step (struct riscv_frame_info *frame) loads and stores, then put that one first. */ if (IN_RANGE (min_second_step, 0, (TARGET_64BIT ? SDSP_REACH : SWSP_REACH))) - return MAX (min_second_step, min_first_step); + return MAX (min_second_step, min_first_step); /* If we need LUI + ADDI + ADD for the second adjustment step, then start with the minimum first step, so that we can get compressed loads and stores. */ else if (!SMALL_OPERAND (min_second_step)) - return min_first_step; + return min_first_step; } return max_first_step; @@ -5307,7 +5307,7 @@ riscv_expand_prologue (void) /* Save the registers. */ if ((frame->mask | frame->fmask) != 0) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); if (size.is_constant ()) step1 = MIN (size.to_constant(), step1); @@ -5487,7 +5487,7 @@ riscv_expand_epilogue (int style) possible in the second step without going out of range. */ if ((frame->mask | frame->fmask) != 0) { - step2 = riscv_first_stack_step (frame); + step2 = riscv_first_stack_step (frame, frame->total_size); step1 -= step2; } -- cgit v1.1 From 8c010f6fe5ebe80d2e054b31e04ae0e9f12ae368 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 11 Apr 2023 11:04:56 -0700 Subject: RISC-V: Clean up the pr106602.c testcase The test case that was added is rv64i-specific, as there's better ways to generate this code on rv32i (where the long/int cast is a NOP) and on rv64i_zba (where we have word shifts). This renames the original test case and adds two more for those targets. gcc/testsuite/ChangeLog: PR target/106602 * gcc.target/riscv/pr106602.c: Moved to... * gcc.target/riscv/pr106602-rv64i.c: ...here. * gcc.target/riscv/pr106602-rv32i.c: New test. * gcc.target/riscv/pr106602-rv64i_zba.c: New test. --- gcc/testsuite/gcc.target/riscv/pr106602-rv32i.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/riscv/pr106602-rv64i.c | 14 ++++++++++++++ gcc/testsuite/gcc.target/riscv/pr106602-rv64i_zba.c | 15 +++++++++++++++ gcc/testsuite/gcc.target/riscv/pr106602.c | 14 -------------- 4 files changed, 43 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/pr106602-rv32i.c create mode 100644 gcc/testsuite/gcc.target/riscv/pr106602-rv64i.c create mode 100644 gcc/testsuite/gcc.target/riscv/pr106602-rv64i_zba.c delete mode 100644 gcc/testsuite/gcc.target/riscv/pr106602.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/pr106602-rv32i.c b/gcc/testsuite/gcc.target/riscv/pr106602-rv32i.c new file mode 100644 index 0000000..05b54db --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr106602-rv32i.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target { riscv64*-*-* } } } */ +/* { dg-options "-O2 -march=rv32i -mabi=ilp32" } */ + +unsigned long +foo2 (unsigned long a) +{ + return (unsigned long)(unsigned int) a << 6; +} + +/* { dg-final { scan-assembler-times "slli\t" 1 } } */ +/* { dg-final { scan-assembler-not "srli\t" } } */ +/* { dg-final { scan-assembler-not "\tli\t" } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ +/* { dg-final { scan-assembler-not "and\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr106602-rv64i.c b/gcc/testsuite/gcc.target/riscv/pr106602-rv64i.c new file mode 100644 index 0000000..ef0719f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr106602-rv64i.c @@ -0,0 +1,14 @@ +/* { dg-do compile { target { riscv64*-*-* } } } */ +/* { dg-options "-O2 -march=rv64i -mabi=lp64" } */ + +unsigned long +foo2 (unsigned long a) +{ + return (unsigned long)(unsigned int) a << 6; +} + +/* { dg-final { scan-assembler-times "slli\t" 1 } } */ +/* { dg-final { scan-assembler-times "srli\t" 1 } } */ +/* { dg-final { scan-assembler-not "\tli\t" } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ +/* { dg-final { scan-assembler-not "and\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr106602-rv64i_zba.c b/gcc/testsuite/gcc.target/riscv/pr106602-rv64i_zba.c new file mode 100644 index 0000000..23b9f1e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/pr106602-rv64i_zba.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target { riscv64*-*-* } } } */ +/* { dg-options "-O2 -march=rv64i_zba -mabi=lp64" } */ + +unsigned long +foo2 (unsigned long a) +{ + return (unsigned long)(unsigned int) a << 6; +} + +/* { dg-final { scan-assembler-times "slli.uw\t" 1 } } */ +/* { dg-final { scan-assembler-not "slli\t" } } */ +/* { dg-final { scan-assembler-not "srli\t" } } */ +/* { dg-final { scan-assembler-not "\tli\t" } } */ +/* { dg-final { scan-assembler-not "addi\t" } } */ +/* { dg-final { scan-assembler-not "and\t" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/pr106602.c b/gcc/testsuite/gcc.target/riscv/pr106602.c deleted file mode 100644 index 825b1a1..0000000 --- a/gcc/testsuite/gcc.target/riscv/pr106602.c +++ /dev/null @@ -1,14 +0,0 @@ -/* { dg-do compile { target { riscv64*-*-* } } } */ -/* { dg-options "-O2" } */ - -unsigned long -foo2 (unsigned long a) -{ - return (unsigned long)(unsigned int) a << 6; -} - -/* { dg-final { scan-assembler-times "slli\t" 1 } } */ -/* { dg-final { scan-assembler-times "srli\t" 1 } } */ -/* { dg-final { scan-assembler-not "\tli\t" } } */ -/* { dg-final { scan-assembler-not "addi\t" } } */ -/* { dg-final { scan-assembler-not "and\t" } } */ -- cgit v1.1 From e6b050da8a4513ab37fd3699c7a963421fbe4d81 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 11 Apr 2023 15:18:20 -0700 Subject: RISC-V: Set the ABI for the RVV tests The RVV test harness currently sets the ISA according to the target tuple, but doesn't also set the ABI. This just sets the ABI to match the ISA, though we should really also be respecting the user's specific ISA to test. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/rvv.exp (gcc_mabi): New variable. --- gcc/testsuite/gcc.target/riscv/rvv/rvv.exp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp index 7a9a2b6..4b5509d 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp @@ -31,15 +31,17 @@ if ![info exists DEFAULT_CFLAGS] then { } set gcc_march "rv64gcv_zfh" +set gcc_mabi "lp64d" if [istarget riscv32-*-*] then { set gcc_march "rv32gcv_zfh" + set gcc_mabi "ilp32d" } # Initialize `dg'. dg-init # Main loop. -set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -O3" +set CFLAGS "$DEFAULT_CFLAGS -march=$gcc_march -mabi=$gcc_mabi -O3" dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/base/*.\[cS\]]] \ "" $CFLAGS gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vsetvl/*.\[cS\]]] \ -- cgit v1.1 From c16848ed8f30be952ac0167fd464ae794fa5ac67 Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Tue, 11 Apr 2023 12:59:48 -0700 Subject: RISC-V: Force ilp32d for the T-Head FMV test These functions are NOPs on the soft-float ABIs. Since we're already forcing the ISA, let's just force the ABI too. gcc/testsuite/ChangeLog: * gcc.target/riscv/xtheadfmv-fmv.c: Force the ilp32d ABI. --- gcc/testsuite/gcc.target/riscv/xtheadfmv-fmv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/xtheadfmv-fmv.c b/gcc/testsuite/gcc.target/riscv/xtheadfmv-fmv.c index 10d035e..1036044 100644 --- a/gcc/testsuite/gcc.target/riscv/xtheadfmv-fmv.c +++ b/gcc/testsuite/gcc.target/riscv/xtheadfmv-fmv.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { rv32 } } } */ -/* { dg-options "-march=rv32gc_xtheadfmv" } */ +/* { dg-options "-march=rv32gc_xtheadfmv -mabi=ilp32d" } */ /* { dg-skip-if "" { *-*-* } { "-O0" } } */ double -- cgit v1.1 From e11533e2f3dd91eef663f08f8326eefb72680cdf Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 17 Apr 2023 17:05:38 +0200 Subject: Do not export global ranges from -Walloca pass. A warning pass should not be exporting global ranges it finds along the way, because that will alter the behavior of future passes. The reason the present behavior was there was because of some long ago forgotten regression in another pass. This regression is no longer there, and if there's ever any fallout from cleaning this up, we can address it in the pass that is missing some information. gcc/ChangeLog: * gimple-ssa-warn-alloca.cc (pass_walloca::execute): Do not export global ranges. --- gcc/gimple-ssa-warn-alloca.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-ssa-warn-alloca.cc b/gcc/gimple-ssa-warn-alloca.cc index 5b220a5..4374f57 100644 --- a/gcc/gimple-ssa-warn-alloca.cc +++ b/gcc/gimple-ssa-warn-alloca.cc @@ -256,7 +256,7 @@ in_loop_p (gimple *stmt) unsigned int pass_walloca::execute (function *fun) { - gimple_ranger *ranger = enable_ranger (fun); + enable_ranger (fun); basic_block bb; FOR_EACH_BB_FN (bb, fun) { @@ -379,7 +379,6 @@ pass_walloca::execute (function *fun) } } } - ranger->export_global_ranges (); disable_ranger (fun); return 0; } -- cgit v1.1 From 6d4ad4cca5d2b15d01a50a893348cbcfc340cdd5 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Fri, 31 Mar 2023 01:37:20 +0000 Subject: PHIOPT: Remove gate_hoist_loads prototype gate_hoist_loads is defined before its usage so there is no reason for the declaration (prototype) to be there. Committed as obvious after a bootstrap/test on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * tree-ssa-phiopt.cc (gate_hoist_loads): Remove prototype. --- gcc/tree-ssa-phiopt.cc | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index a7ab6ce..16acd2f 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -77,7 +77,6 @@ static bool cond_if_else_store_replacement (basic_block, basic_block, basic_bloc static hash_set * get_non_trapping (); static void hoist_adjacent_loads (basic_block, basic_block, basic_block, basic_block); -static bool gate_hoist_loads (void); /* This pass tries to transform conditional stores into unconditional ones, enabling further simplifications with the simpler then and else -- cgit v1.1 From 60524be1e3929d83e15fceac6e2aa053c8a6fb20 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Mon, 17 Apr 2023 16:47:23 -0600 Subject: RISC-V: optimize stack manipulation in save-restore The stack that save-restore reserves is not well accumulated in stack allocation and deallocation. This patch allows less instructions to be used in stack allocation and deallocation if save-restore enabled. before patch: bar: call t0,__riscv_save_4 addi sp,sp,-64 ... li t0,-12288 addi t0,t0,-1968 # optimized out after patch add sp,sp,t0 # prologue ... li t0,12288 # epilogue addi t0,t0,2000 # optimized out after patch add sp,sp,t0 ... addi sp,sp,32 tail __riscv_restore_4 after patch: bar: call t0,__riscv_save_4 addi sp,sp,-2032 ... li t0,-12288 add sp,sp,t0 # prologue ... li t0,12288 # epilogue add sp,sp,t0 ... addi sp,sp,2032 tail __riscv_restore_4 gcc/ * config/riscv/riscv.cc (riscv_expand_prologue): Consider save-restore in stack allocation. (riscv_expand_epilogue): Consider save-restore in stack deallocation. gcc/testsuite * gcc.target/riscv/stack_save_restore.c: New test. --- gcc/config/riscv/riscv.cc | 50 +++++++++++----------- .../gcc.target/riscv/stack_save_restore.c | 40 +++++++++++++++++ 2 files changed, 66 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/stack_save_restore.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e4937d1..418b29e 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5280,12 +5280,12 @@ void riscv_expand_prologue (void) { struct riscv_frame_info *frame = &cfun->machine->frame; - poly_int64 size = frame->total_size; + poly_int64 remaining_size = frame->total_size; unsigned mask = frame->mask; rtx insn; if (flag_stack_usage_info) - current_function_static_stack_size = constant_lower_bound (size); + current_function_static_stack_size = constant_lower_bound (remaining_size); if (cfun->machine->naked_p) return; @@ -5296,7 +5296,7 @@ riscv_expand_prologue (void) rtx dwarf = NULL_RTX; dwarf = riscv_adjust_libcall_cfi_prologue (); - size -= frame->save_libcall_adjustment; + remaining_size -= frame->save_libcall_adjustment; insn = emit_insn (riscv_gen_gpr_save_insn (frame)); frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ @@ -5307,16 +5307,14 @@ riscv_expand_prologue (void) /* Save the registers. */ if ((frame->mask | frame->fmask) != 0) { - HOST_WIDE_INT step1 = riscv_first_stack_step (frame, frame->total_size); - if (size.is_constant ()) - step1 = MIN (size.to_constant(), step1); + HOST_WIDE_INT step1 = riscv_first_stack_step (frame, remaining_size); insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-step1)); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; - size -= step1; - riscv_for_each_saved_reg (size, riscv_save_reg, false, false); + remaining_size -= step1; + riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false); } frame->mask = mask; /* Undo the above fib. */ @@ -5325,29 +5323,29 @@ riscv_expand_prologue (void) if (frame_pointer_needed) { insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, - GEN_INT ((frame->hard_frame_pointer_offset - size).to_constant ())); + GEN_INT ((frame->hard_frame_pointer_offset - remaining_size).to_constant ())); RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; riscv_emit_stack_tie (); } /* Allocate the rest of the frame. */ - if (known_gt (size, 0)) + if (known_gt (remaining_size, 0)) { /* Two step adjustment: 1.scalable frame. 2.constant frame. */ poly_int64 scalable_frame (0, 0); - if (!size.is_constant ()) + if (!remaining_size.is_constant ()) { /* First for scalable frame. */ - poly_int64 scalable_frame = size; - scalable_frame.coeffs[0] = size.coeffs[1]; + poly_int64 scalable_frame = remaining_size; + scalable_frame.coeffs[0] = remaining_size.coeffs[1]; riscv_v_adjust_scalable_frame (stack_pointer_rtx, scalable_frame, false); - size -= scalable_frame; + remaining_size -= scalable_frame; } /* Second step for constant frame. */ - HOST_WIDE_INT constant_frame = size.to_constant (); + HOST_WIDE_INT constant_frame = remaining_size.to_constant (); if (constant_frame == 0) return; @@ -5413,6 +5411,8 @@ riscv_expand_epilogue (int style) HOST_WIDE_INT step2 = 0; bool use_restore_libcall = ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); + unsigned libcall_size = (use_restore_libcall + ? frame->save_libcall_adjustment : 0); rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); rtx insn; @@ -5483,13 +5483,18 @@ riscv_expand_epilogue (int style) REG_NOTES (insn) = dwarf; } + if (use_restore_libcall) + frame->mask = 0; /* Temporarily fib for GPRs. */ + /* If we need to restore registers, deallocate as much stack as possible in the second step without going out of range. */ if ((frame->mask | frame->fmask) != 0) - { - step2 = riscv_first_stack_step (frame, frame->total_size); - step1 -= step2; - } + step2 = riscv_first_stack_step (frame, frame->total_size - libcall_size); + + if (use_restore_libcall) + frame->mask = mask; /* Undo the above fib. */ + + step1 -= step2 + libcall_size; /* Set TARGET to BASE + STEP1. */ if (known_gt (step1, 0)) @@ -5543,15 +5548,12 @@ riscv_expand_epilogue (int style) frame->mask = 0; /* Temporarily fib that we need not save GPRs. */ /* Restore the registers. */ - riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg, + riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size, + riscv_restore_reg, true, style == EXCEPTION_RETURN); if (use_restore_libcall) - { frame->mask = mask; /* Undo the above fib. */ - gcc_assert (step2 >= frame->save_libcall_adjustment); - step2 -= frame->save_libcall_adjustment; - } if (need_barrier_p) riscv_emit_stack_tie (); diff --git a/gcc/testsuite/gcc.target/riscv/stack_save_restore.c b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c new file mode 100644 index 0000000..522e706 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/stack_save_restore.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32imafc -mabi=ilp32f -msave-restore -O2 -fno-schedule-insns -fno-schedule-insns2 -fno-unroll-loops -fno-peel-loops -fno-lto" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +char my_getchar(); +float getf(); + +/* +**bar: +** call t0,__riscv_save_4 +** addi sp,sp,-2032 +** ... +** li t0,-12288 +** add sp,sp,t0 +** ... +** li t0,12288 +** add sp,sp,t0 +** ... +** addi sp,sp,2032 +** tail __riscv_restore_4 +*/ +int bar() +{ + float volatile farray[3568]; + + float sum = 0; + float f1 = getf(); + float f2 = getf(); + float f3 = getf(); + float f4 = getf(); + + for (int i = 0; i < 3568; i++) + { + farray[i] = my_getchar() * 1.2; + sum += farray[i]; + } + + return sum + f1 + f2 + f3 + f4; +} + -- cgit v1.1 From 2245459c85a3f4cde3d33bf3e4edaff08f3b2404 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Mon, 17 Apr 2023 18:52:07 -0400 Subject: c++: bound ttp level lowering [PR109531] Here when level lowering the bound ttp TT via the substitution T=C, we're neglecting to canonicalize (and thereby strip of simple typedefs) the substituted template arguments {A} before determining the new canonical type via hash table lookup. This leads to a hash mismatch ICE for the two equivalent types TT and TT> since iterative_hash_template_arg assumes type arguments are already canonicalized. We can fix this by canonicalizing or coercing the substituted arguments directly, but seeing as creation and ordinary substitution of bound ttps both go through lookup_template_class, which in turn performs the desired coercion/canonicalization, it seems preferable to make this code path go through lookup_template_class as well. PR c++/109531 gcc/cp/ChangeLog: * pt.cc (tsubst) : In the level-lowering case just use lookup_template_class to rebuild the bound ttp. gcc/testsuite/ChangeLog: * g++.dg/template/canon-type-20.C: New test. * g++.dg/template/ttp36.C: New test. --- gcc/cp/pt.cc | 39 ++++++++++++++------------- gcc/testsuite/g++.dg/template/canon-type-20.C | 18 +++++++++++++ gcc/testsuite/g++.dg/template/ttp36.C | 12 +++++++++ 3 files changed, 50 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/canon-type-20.C create mode 100644 gcc/testsuite/g++.dg/template/ttp36.C (limited to 'gcc') diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index fcc8e0d..e065ace 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -16232,7 +16232,6 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) { case TEMPLATE_TYPE_PARM: case TEMPLATE_TEMPLATE_PARM: - case BOUND_TEMPLATE_TEMPLATE_PARM: if (cp_type_quals (t)) { r = tsubst (TYPE_MAIN_VARIANT (t), args, complain, in_decl); @@ -16274,24 +16273,6 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) only instantiated during satisfaction. */ PLACEHOLDER_TYPE_CONSTRAINTS_INFO (r) = ci; - if (code == BOUND_TEMPLATE_TEMPLATE_PARM) - { - tree tinfo = TYPE_TEMPLATE_INFO (t); - /* We might need to substitute into the types of non-type - template parameters. */ - tree tmpl = tsubst (TI_TEMPLATE (tinfo), args, - complain, in_decl); - if (tmpl == error_mark_node) - return error_mark_node; - tree argvec = tsubst (TI_ARGS (tinfo), args, - complain, in_decl); - if (argvec == error_mark_node) - return error_mark_node; - - TEMPLATE_TEMPLATE_PARM_TEMPLATE_INFO (r) - = build_template_info (tmpl, argvec); - } - if (TYPE_STRUCTURAL_EQUALITY_P (t)) SET_TYPE_STRUCTURAL_EQUALITY (r); else @@ -16299,6 +16280,26 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) } break; + case BOUND_TEMPLATE_TEMPLATE_PARM: + { + tree tinfo = TYPE_TEMPLATE_INFO (t); + /* We might need to substitute into the types of non-type + template parameters. This also lowers the level of + the ttp appropriately. */ + tree tmpl = tsubst (TI_TEMPLATE (tinfo), args, + complain, in_decl); + if (tmpl == error_mark_node) + return error_mark_node; + tree argvec = tsubst (TI_ARGS (tinfo), args, + complain, in_decl); + if (argvec == error_mark_node) + return error_mark_node; + r = lookup_template_class (tmpl, argvec, in_decl, NULL_TREE, + /*entering_scope=*/false, complain); + r = cp_build_qualified_type (r, cp_type_quals (t), complain); + break; + } + case TEMPLATE_PARM_INDEX: /* OK, now substitute the type of the non-type parameter. We couldn't do it earlier because it might be an auto parameter, diff --git a/gcc/testsuite/g++.dg/template/canon-type-20.C b/gcc/testsuite/g++.dg/template/canon-type-20.C new file mode 100644 index 0000000..211ca10 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/canon-type-20.C @@ -0,0 +1,18 @@ +// PR c++/109531 +// { dg-do compile { target c++11 } } +// { dg-additional-options "--param=hash-table-verification-limit=1000" } + +template +using A = int; + +struct B { using type = int; }; +struct C { using type = A; }; + +template +struct D { + template class TT> + TT f(); +}; + +template struct D; +template struct D; diff --git a/gcc/testsuite/g++.dg/template/ttp36.C b/gcc/testsuite/g++.dg/template/ttp36.C new file mode 100644 index 0000000..c329bb4 --- /dev/null +++ b/gcc/testsuite/g++.dg/template/ttp36.C @@ -0,0 +1,12 @@ +// Verify we propagate cv-quals when level-lowering a bound ttp. + +template +struct B { + template class TT> + void f(TT*); + + template class TT> + void f(const TT*); // { dg-bogus "cannot be overloaded" } +}; + +template struct B; -- cgit v1.1 From 3f0f4e0fa0b1f9c6217bf5018df4d0836cad84c7 Mon Sep 17 00:00:00 2001 From: Fei Gao Date: Mon, 17 Apr 2023 18:12:40 -0600 Subject: RISC-V: make the stack manipulation codes more readable. gcc/ChangeLog: * config/riscv/riscv.cc (riscv_first_stack_step): Make codes more readable. (riscv_expand_epilogue): Likewise. --- gcc/config/riscv/riscv.cc | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 418b29e..cdb47e8 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -5195,8 +5195,11 @@ riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_siz if (SMALL_OPERAND (remaining_const_size)) return remaining_const_size; + poly_int64 callee_saved_first_step = + remaining_size - frame->frame_pointer_offset; + gcc_assert(callee_saved_first_step.is_constant ()); HOST_WIDE_INT min_first_step = - riscv_stack_align ((remaining_size - frame->frame_pointer_offset).to_constant()); + riscv_stack_align (callee_saved_first_step.to_constant ()); HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; HOST_WIDE_INT min_second_step = remaining_const_size - max_first_step; gcc_assert (min_first_step <= max_first_step); @@ -5204,7 +5207,7 @@ riscv_first_stack_step (struct riscv_frame_info *frame, poly_int64 remaining_siz /* As an optimization, use the least-significant bits of the total frame size, so that the second adjustment step is just LUI + ADD. */ if (!SMALL_OPERAND (min_second_step) - && remaining_const_size % IMM_REACH < IMM_REACH / 2 + && remaining_const_size % IMM_REACH <= max_first_step && remaining_const_size % IMM_REACH >= min_first_step) return remaining_const_size % IMM_REACH; @@ -5400,14 +5403,14 @@ riscv_adjust_libcall_cfi_epilogue () void riscv_expand_epilogue (int style) { - /* Split the frame into two. STEP1 is the amount of stack we should - deallocate before restoring the registers. STEP2 is the amount we - should deallocate afterwards. + /* Split the frame into 3 steps. STEP1 is the amount of stack we should + deallocate before restoring the registers. STEP2 is the amount we + should deallocate afterwards including the callee saved regs. STEP3 + is the amount deallocated by save-restore libcall. Start off by assuming that no registers need to be restored. */ struct riscv_frame_info *frame = &cfun->machine->frame; unsigned mask = frame->mask; - poly_int64 step1 = frame->total_size; HOST_WIDE_INT step2 = 0; bool use_restore_libcall = ((style == NORMAL_RETURN) && riscv_use_save_libcall (frame)); @@ -5494,7 +5497,7 @@ riscv_expand_epilogue (int style) if (use_restore_libcall) frame->mask = mask; /* Undo the above fib. */ - step1 -= step2 + libcall_size; + poly_int64 step1 = frame->total_size - step2 - libcall_size; /* Set TARGET to BASE + STEP1. */ if (known_gt (step1, 0)) -- cgit v1.1 From 56b288f508179c210feaf5c653ca5042f8a927de Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 18 Apr 2023 00:17:26 +0000 Subject: Daily bump. --- gcc/ChangeLog | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 4 +++ gcc/cp/ChangeLog | 7 ++++ gcc/testsuite/ChangeLog | 72 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 172 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 33e4b50..ce5cb67 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,91 @@ +2023-04-18 Fei Gao + + * config/riscv/riscv.cc (riscv_first_stack_step): Make codes more + readable. + (riscv_expand_epilogue): Likewise. + +2023-04-17 Fei Gao + + * config/riscv/riscv.cc (riscv_expand_prologue): Consider save-restore in + stack allocation. + (riscv_expand_epilogue): Consider save-restore in stack deallocation. + +2023-04-17 Andrew Pinski + + * tree-ssa-phiopt.cc (gate_hoist_loads): Remove + prototype. + +2023-04-17 Aldy Hernandez + + * gimple-ssa-warn-alloca.cc (pass_walloca::execute): Do not export + global ranges. + +2023-04-17 Fei Gao + + * config/riscv/riscv.cc (riscv_first_stack_step): Add a new function + parameter remaining_size. + (riscv_compute_frame_info): Adapt new riscv_first_stack_step interface. + (riscv_expand_prologue): Likewise. + (riscv_expand_epilogue): Likewise. + +2023-04-17 Feng Wang + + * config/riscv/bitmanip.md (rotrsi3_sext): Support generating + roriw for constant counts. + * rtl.h (reverse_rotate_by_imm_p): Add function declartion + * simplify-rtx.cc (reverse_rotate_by_imm_p): New function. + (simplify_context::simplify_binary_operation_1): Use it. + * expmed.cc (expand_shift_1): Likewise. + +2023-04-17 Martin Jambor + + PR ipa/107769 + PR ipa/109318 + * cgraph.h (symtab_node::find_reference): Add parameter use_type. + * ipa-prop.h (ipa_pass_through_data): New flag refdesc_decremented. + (ipa_zap_jf_refdesc): New function. + (ipa_get_jf_pass_through_refdesc_decremented): Likewise. + (ipa_set_jf_pass_through_refdesc_decremented): Likewise. + * ipa-cp.cc (ipcp_discover_new_direct_edges): Provide a value for + the new parameter of find_reference. + (adjust_references_in_caller): Likewise. Make sure the constant jump + function is not used to decrement a refdec counter again. Only + decrement refdesc counters when the pass_through jump function allows + it. Added a detailed dump when decrementing refdesc counters. + * ipa-prop.cc (ipa_print_node_jump_functions_for_edge): Dump new flag. + (ipa_set_jf_simple_pass_through): Initialize the new flag. + (ipa_set_jf_unary_pass_through): Likewise. + (ipa_set_jf_arith_pass_through): Likewise. + (remove_described_reference): Provide a value for the new parameter of + find_reference. + (update_jump_functions_after_inlining): Zap refdesc of new jfunc if + the previous pass_through had a flag mandating that we do so. + (propagate_controlled_uses): Likewise. Only decrement refdesc + counters when the pass_through jump function allows it. + (ipa_edge_args_sum_t::duplicate): Provide a value for the new + parameter of find_reference. + (ipa_write_jump_function): Assert the new flag does not have to be + streamed. + * symtab.cc (symtab_node::find_reference): Add parameter use_type, use + it in searching. + +2023-04-17 Philipp Tomsich + Di Zhao + + * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNING_OPTION): + Add AARCH64_EXTRA_TUNE_NO_LDP_COMBINE. + * config/aarch64/aarch64.cc (aarch64_operands_ok_for_ldpstp): + Check for the above tuning option when processing loads. + +2023-04-17 Richard Biener + + PR tree-optimization/109524 + * tree-vrp.cc (remove_unreachable::m_list): Change to a + vector of pairs of block indices. + (remove_unreachable::maybe_register_block): Adjust. + (remove_unreachable::remove_and_update_globals): Likewise. + Deal with removed blocks. + 2023-04-16 Jeff Law PR target/109508 diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 27b1d3f..378aed5 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20230417 +20230418 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 8423719..c6aa33160 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,7 @@ +2023-04-17 Martin Liska + + * gnatvsn.ads: Bump Library_Version to 14. + 2023-04-15 Eric Botcazou PR bootstrap/109510 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index bb4881d..d6a5b88 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,10 @@ +2023-04-17 Patrick Palka + + PR c++/109531 + * pt.cc (tsubst) : + In the level-lowering case just use lookup_template_class + to rebuild the bound ttp. + 2023-04-15 Jason Merrill PR c++/109357 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 2cda0bf..ac704d3 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,75 @@ +2023-04-17 Patrick Palka + + PR c++/109531 + * g++.dg/template/canon-type-20.C: New test. + * g++.dg/template/ttp36.C: New test. + +2023-04-17 Fei Gao + + * gcc.target/riscv/stack_save_restore.c: New test. + +2023-04-17 Palmer Dabbelt + + * gcc.target/riscv/xtheadfmv-fmv.c: Force the ilp32d ABI. + +2023-04-17 Palmer Dabbelt + + * gcc.target/riscv/rvv/rvv.exp (gcc_mabi): New variable. + +2023-04-17 Palmer Dabbelt + + PR target/106602 + * gcc.target/riscv/pr106602.c: Moved to... + * gcc.target/riscv/pr106602-rv64i.c: ...here. + * gcc.target/riscv/pr106602-rv32i.c: New test. + * gcc.target/riscv/pr106602-rv64i_zba.c: New test. + +2023-04-17 Feng Wang + + * gcc.target/riscv/zbb-rol-ror-04.c: New test. + * gcc.target/riscv/zbb-rol-ror-05.c: New test. + * gcc.target/riscv/zbb-rol-ror-06.c: New test. + * gcc.target/riscv/zbb-rol-ror-07.c: New test. + +2023-04-17 Martin Jambor + + PR ipa/107769 + PR ipa/109318 + * gcc.dg/ipa/pr109318.c: New test. + * gcc.dg/lto/pr107769_0.c: Likewise. + +2023-04-17 Philipp Tomsich + Di Zhao + + * gcc.target/aarch64/ampere1-no_ldp_combine.c: New test. + +2023-04-17 Jakub Jelinek + + * gcc.dg/vect/vect-simd-clone-16f.c: Add -mno-avx512f for non-lp64 x86. + * gcc.dg/vect/vect-simd-clone-17f.c: Likewise. + * gcc.dg/vect/vect-simd-clone-18f.c: Likewise. + +2023-04-17 Richard Biener + + PR tree-optimization/109524 + * g++.dg/pr109524.C: New testcase. + +2023-04-17 Jiufu Guo + + PR testsuite/108809 + * gcc.target/powerpc/builtins-5-p9-runnable.c: Update for BE. + +2023-04-17 Kito Cheng + + * gcc.target/riscv/rvv/base/scalar_move-2.c: Adjust include way + for riscv_vector.h + * gcc.target/riscv/rvv/base/spill-sp-adjust.c: Add missing + -mabi. + +2023-04-17 Pan Li + + * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: New test. + 2023-04-16 Jeff Law PR target/109508 -- cgit v1.1 From 5015cdf3155c80e5fd61f7b6ab8082ee849e3e90 Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Thu, 6 Apr 2023 16:02:07 +0800 Subject: LoongArch: Add built-in functions description of LoongArch Base instruction set instructions. gcc/ChangeLog: * doc/extend.texi: Add section for LoongArch Base Built-in functions. --- gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) (limited to 'gcc') diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index b2a1643..69c5ade 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -14674,6 +14674,7 @@ instructions, but allow the compiler to schedule those calls. * Blackfin Built-in Functions:: * BPF Built-in Functions:: * FR-V Built-in Functions:: +* LoongArch Base Built-in Functions:: * MIPS DSP Built-in Functions:: * MIPS Paired-Single Support:: * MIPS Loongson Built-in Functions:: @@ -16202,6 +16203,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x} into the data cache. The instruction is issued in slot I1@. @end table +@node LoongArch Base Built-in Functions +@subsection LoongArch Base Built-in Functions + +These built-in functions are available for LoongArch. + +Data Type Description: +@itemize +@item @code{imm0_31}, a compile-time constant in range 0 to 31; +@item @code{imm0_16383}, a compile-time constant in range 0 to 16383; +@item @code{imm0_32767}, a compile-time constant in range 0 to 32767; +@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047; +@end itemize + +The intrinsics provided are listed below: +@smallexample + unsigned int __builtin_loongarch_movfcsr2gr (imm0_31) + void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int) + void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047) + unsigned int __builtin_loongarch_cpucfg (unsigned int) + void __builtin_loongarch_asrtle_d (long int, long int) + void __builtin_loongarch_asrtgt_d (long int, long int) + long int __builtin_loongarch_lddir_d (long int, imm0_31) + void __builtin_loongarch_ldpte_d (long int, imm0_31) + + int __builtin_loongarch_crc_w_b_w (char, int) + int __builtin_loongarch_crc_w_h_w (short, int) + int __builtin_loongarch_crc_w_w_w (int, int) + int __builtin_loongarch_crc_w_d_w (long int, int) + int __builtin_loongarch_crcc_w_b_w (char, int) + int __builtin_loongarch_crcc_w_h_w (short, int) + int __builtin_loongarch_crcc_w_w_w (int, int) + int __builtin_loongarch_crcc_w_d_w (long int, int) + + unsigned int __builtin_loongarch_csrrd_w (imm0_16383) + unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383) + unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383) + unsigned long int __builtin_loongarch_csrrd_d (imm0_16383) + unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383) + unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383) + + unsigned char __builtin_loongarch_iocsrrd_b (unsigned int) + unsigned short __builtin_loongarch_iocsrrd_h (unsigned int) + unsigned int __builtin_loongarch_iocsrrd_w (unsigned int) + unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int) + void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int) + void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int) + void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int) + void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int) + + void __builtin_loongarch_dbar (imm0_32767) + void __builtin_loongarch_ibar (imm0_32767) + + void __builtin_loongarch_syscall (imm0_32767) + void __builtin_loongarch_break (imm0_32767) +@end smallexample + +@emph{Note:}Since the control register is divided into 32-bit and 64-bit, +but the access instruction is not distinguished. So GCC renames the control +instructions when implementing intrinsics. + +Take the csrrd instruction as an example, built-in functions are implemented as follows: +@smallexample + __builtin_loongarch_csrrd_w // When reading the 32-bit control register use. + __builtin_loongarch_csrrd_d // When reading the 64-bit control register use. +@end smallexample + +For the convenience of use, the built-in functions are encapsulated, +the encapsulated functions and @code{__drdtime_t, __rdtime_t} are +defined in the @code{larchintrin.h}. So if you call the following +function you need to include @code{larchintrin.h}. + +@smallexample + typedef struct drdtime@{ + unsigned long dvalue; + unsigned long dtimeid; + @} __drdtime_t; + + typedef struct rdtime@{ + unsigned int value; + unsigned int timeid; + @} __rdtime_t; +@end smallexample + +@smallexample + __drdtime_t __rdtime_d (void) + __rdtime_t __rdtimel_w (void) + __rdtime_t __rdtimeh_w (void) + unsigned int __movfcsr2gr (imm0_31) + void __movgr2fcsr (imm0_31, unsigned int) + void __cacop_d (imm0_31, unsigned long, imm_n2048_2047) + unsigned int __cpucfg (unsigned int) + void __asrtle_d (long int, long int) + void __asrtgt_d (long int, long int) + long int __lddir_d (long int, imm0_31) + void __ldpte_d (long int, imm0_31) + + int __crc_w_b_w (char, int) + int __crc_w_h_w (short, int) + int __crc_w_w_w (int, int) + int __crc_w_d_w (long int, int) + int __crcc_w_b_w (char, int) + int __crcc_w_h_w (short, int) + int __crcc_w_w_w (int, int) + int __crcc_w_d_w (long int, int) + + unsigned int __csrrd_w (imm0_16383) + unsigned int __csrwr_w (unsigned int, imm0_16383) + unsigned int __csrxchg_w (unsigned int, unsigned int, imm0_16383) + unsigned long __csrrd_d (imm0_16383) + unsigned long __csrwr_d (unsigned long, imm0_16383) + unsigned long __csrxchg_d (unsigned long, unsigned long, imm0_16383) + + unsigned char __iocsrrd_b (unsigned int) + unsigned short __iocsrrd_h (unsigned int) + unsigned int __iocsrrd_w (unsigned int) + unsigned long __iocsrrd_d (unsigned int) + void __iocsrwr_b (unsigned char, unsigned int) + void __iocsrwr_h (unsigned short, unsigned int) + void __iocsrwr_w (unsigned int, unsigned int) + void __iocsrwr_d (unsigned long, unsigned int) + + void __dbar (imm0_32767) + void __ibar (imm0_32767) + + void __syscall (imm0_32767) + void __break (imm0_32767) +@end smallexample + @node MIPS DSP Built-in Functions @subsection MIPS DSP Built-in Functions -- cgit v1.1 From d2ee685f6fa922e97908ed998d14def9e01e1e4e Mon Sep 17 00:00:00 2001 From: Lulu Cheng Date: Thu, 13 Apr 2023 19:24:38 +0800 Subject: LoongArch: Remove the definition of the macro LOGICAL_OP_NON_SHORT_CIRCUIT under the architecture and use the default definition instead. In some cases, setting this macro as the default can reduce the number of conditional branch instructions. gcc/ChangeLog: * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro definition. --- gcc/config/loongarch/loongarch.h | 1 - 1 file changed, 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index f816787..6b7dbec 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -836,7 +836,6 @@ typedef struct { 1 is the default; other values are interpreted relative to that. */ #define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost -#define LOGICAL_OP_NON_SHORT_CIRCUIT 0 /* Return the asm template for a conditional branch instruction. OPCODE is the opcode's mnemonic and OPERANDS is the asm template for -- cgit v1.1 From 027a94b00759ad19a9b8b99b36819b6f60d5560e Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 2 Mar 2023 13:12:45 +0100 Subject: Constify invariant fields of vrange and irange. The discriminator in vrange cannot change after construction, similarly the number of allocated ranges in an irange. It's best to make them constant to avoid invalid changes. gcc/ChangeLog: * value-range.h (class vrange): Make m_discriminator const. (class irange): Make m_max_ranges const. Adjust constructors accordingly. (class unsupported_range): Construct vrange appropriately. (class frange): Same. --- gcc/value-range.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 969b2b6..e9439bb 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -104,8 +104,9 @@ public: enum value_range_kind kind () const; // DEPRECATED protected: + vrange (enum value_range_discriminator d) : m_discriminator (d) { } ENUM_BITFIELD(value_range_kind) m_kind : 8; - ENUM_BITFIELD(value_range_discriminator) m_discriminator : 4; + const ENUM_BITFIELD(value_range_discriminator) m_discriminator : 4; }; // An integer range without any storage. @@ -214,7 +215,7 @@ private: bool intersect (const wide_int& lb, const wide_int& ub); unsigned char m_num_ranges; - unsigned char m_max_ranges; + const unsigned char m_max_ranges; tree m_nonzero_mask; tree *m_base; }; @@ -257,8 +258,8 @@ class unsupported_range : public vrange { public: unsupported_range () + : vrange (VR_UNKNOWN) { - m_discriminator = VR_UNKNOWN; set_undefined (); } virtual void set_undefined () final override @@ -873,10 +874,10 @@ gt_pch_nx (int_range *x, gt_pointer_operator op, void *cookie) inline irange::irange (tree *base, unsigned nranges) + : vrange (VR_IRANGE), + m_max_ranges (nranges) { - m_discriminator = VR_IRANGE; m_base = base; - m_max_ranges = nranges; set_undefined (); } @@ -1112,22 +1113,22 @@ vrp_val_min (const_tree type) inline frange::frange () + : vrange (VR_FRANGE) { - m_discriminator = VR_FRANGE; set_undefined (); } inline frange::frange (const frange &src) + : vrange (VR_FRANGE) { - m_discriminator = VR_FRANGE; *this = src; } inline frange::frange (tree type) + : vrange (VR_FRANGE) { - m_discriminator = VR_FRANGE; set_varying (type); } @@ -1137,8 +1138,8 @@ inline frange::frange (tree type, const REAL_VALUE_TYPE &min, const REAL_VALUE_TYPE &max, value_range_kind kind) + : vrange (VR_FRANGE) { - m_discriminator = VR_FRANGE; set (type, min, max, kind); } @@ -1146,8 +1147,8 @@ frange::frange (tree type, inline frange::frange (tree min, tree max, value_range_kind kind) + : vrange (VR_FRANGE) { - m_discriminator = VR_FRANGE; set (min, max, kind); } -- cgit v1.1 From d5904f356ce327a45964eac449fc3bdd4787b24b Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 23 Feb 2023 09:00:04 +0100 Subject: Add two new methods to Value_Range. This is for upcoming work in this area. gcc/ChangeLog: * value-range.h (Value_Range::Value_Range): New. (Value_Range::contains_p): New. --- gcc/value-range.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index e9439bb..f8aa0ca 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -505,6 +505,7 @@ public: Value_Range (); Value_Range (const vrange &r); Value_Range (tree type); + Value_Range (tree, tree, value_range_kind kind = VR_RANGE); Value_Range (const Value_Range &); void set_type (tree type); vrange& operator= (const vrange &); @@ -526,6 +527,7 @@ public: void set_undefined () { m_vrange->set_undefined (); } bool union_ (const vrange &r) { return m_vrange->union_ (r); } bool intersect (const vrange &r) { return m_vrange->intersect (r); } + bool contains_p (tree cst) const { return m_vrange->contains_p (cst); } bool singleton_p (tree *result = NULL) const { return m_vrange->singleton_p (result); } bool zero_p () const { return m_vrange->zero_p (); } @@ -564,6 +566,13 @@ Value_Range::Value_Range (tree type) } inline +Value_Range::Value_Range (tree min, tree max, value_range_kind kind) +{ + init (TREE_TYPE (min)); + set (min, max, kind); +} + +inline Value_Range::Value_Range (const Value_Range &r) { m_vrange = r.m_vrange; -- cgit v1.1 From 603fc926fee69ab3c7169af8a9c0918611a75d92 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sun, 2 Apr 2023 21:37:49 +0800 Subject: LoongArch: Optimize additions with immediates 1. Use addu16i.d for TARGET_64BIT and suitable immediates. 2. Split one addition with immediate into two addu16i.d or addi.{d/w} instructions if possible. This can avoid using a temp register w/o increase the count of instructions. Inspired by https://reviews.llvm.org/D143710 and https://reviews.llvm.org/D147222. Bootstrapped and regtested on loongarch64-linux-gnu. Ok for GCC 14? gcc/ChangeLog: * config/loongarch/loongarch-protos.h (loongarch_addu16i_imm12_operand_p): New function prototype. (loongarch_split_plus_constant): Likewise. * config/loongarch/loongarch.cc (loongarch_addu16i_imm12_operand_p): New function. (loongarch_split_plus_constant): Likewise. * config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro. (DUAL_IMM12_OPERAND): Likewise. (DUAL_ADDU16I_OPERAND): Likewise. * config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New constraint. * config/loongarch/predicates.md (const_dual_imm12_operand): New predicate. (const_addu16i_operand): Likewise. (const_addu16i_imm12_di_operand): Likewise. (const_addu16i_imm12_si_operand): Likewise. (plus_di_operand): Likewise. (plus_si_operand): Likewise. (plus_si_extend_operand): Likewise. * config/loongarch/loongarch.md (add3): Convert to define_insn_and_split. Use plus__operand predicate instead of arith_operand. Add alternatives for La, Lb, Lc, Ld, and Le constraints. (*addsi3_extended): Convert to define_insn_and_split. Use plus_si_extend_operand instead of arith_operand. Add alternatives for La and Le alternatives. gcc/testsuite/ChangeLog: * gcc.target/loongarch/add-const.c: New test. * gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack frame size change. * gcc.target/loongarch/stack-check-cfa-2.c: Likewise. --- gcc/config/loongarch/constraints.md | 46 ++++++++++++++- gcc/config/loongarch/loongarch-protos.h | 2 + gcc/config/loongarch/loongarch.cc | 44 +++++++++++++++ gcc/config/loongarch/loongarch.h | 19 +++++++ gcc/config/loongarch/loongarch.md | 66 +++++++++++++++++----- gcc/config/loongarch/predicates.md | 36 ++++++++++++ gcc/testsuite/gcc.target/loongarch/add-const.c | 45 +++++++++++++++ .../gcc.target/loongarch/stack-check-cfa-1.c | 2 +- .../gcc.target/loongarch/stack-check-cfa-2.c | 2 +- 9 files changed, 246 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c (limited to 'gcc') diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md index cb7fa68..7a38cd0 100644 --- a/gcc/config/loongarch/constraints.md +++ b/gcc/config/loongarch/constraints.md @@ -60,7 +60,22 @@ ;; "I" "A signed 12-bit constant (for arithmetic instructions)." ;; "J" "Integer zero." ;; "K" "An unsigned 12-bit constant (for logic instructions)." -;; "L" <-----unused +;; "L" - +;; "La" +;; "A signed constant in [-4096, 2048) or (2047, 4094]." +;; "Lb" +;; "A signed 32-bit constant and low 16-bit is zero, which can be +;; added onto a register with addu16i.d. It matches nothing if +;; the addu16i.d instruction is not available." +;; "Lc" +;; "A signed 64-bit constant can be expressed as Lb + I, but not a +;; single Lb or I." +;; "Ld" +;; "A signed 64-bit constant can be expressed as Lb + Lb, but not a +;; single Lb." +;; "Le" +;; "A signed 32-bit constant can be expressed as Lb + I, but not a +;; single Lb or I." ;; "M" <-----unused ;; "N" <-----unused ;; "O" <-----unused @@ -170,6 +185,35 @@ (and (match_code "const_int") (match_test "IMM12_OPERAND_UNSIGNED (ival)"))) +(define_constraint "La" + "A signed constant in [-4096, 2048) or (2047, 4094]." + (and (match_code "const_int") + (match_test "DUAL_IMM12_OPERAND (ival)"))) + +(define_constraint "Lb" + "A signed 32-bit constant and low 16-bit is zero, which can be added + onto a register with addu16i.d." + (and (match_code "const_int") + (match_test "ADDU16I_OPERAND (ival)"))) + +(define_constraint "Lc" + "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb + or I." + (and (match_code "const_int") + (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)"))) + +(define_constraint "Ld" + "A signed 64-bit constant can be expressed as Lb + Lb, but not a single + Lb." + (and (match_code "const_int") + (match_test "DUAL_ADDU16I_OPERAND (ival)"))) + +(define_constraint "Le" + "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb + or I." + (and (match_code "const_int") + (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)"))) + (define_constraint "Yd" "@internal A constant @code{move_operand} that can be safely loaded using diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 35cc77c..83df489 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -83,6 +83,8 @@ extern rtx loongarch_legitimize_call_address (rtx); extern rtx loongarch_subword (rtx, bool); extern bool loongarch_split_move_p (rtx, rtx); extern void loongarch_split_move (rtx, rtx, rtx); +extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode); +extern void loongarch_split_plus_constant (rtx *, machine_mode); extern const char *loongarch_output_move (rtx, rtx); extern bool loongarch_cfun_has_cprestore_slot_p (void); #ifdef RTX_CODE diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 6927bdc..34532d8 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -3754,6 +3754,50 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_) } } +/* Check if adding an integer constant value for a specific mode can be + performed with an addu16i.d instruction and an addi.{w/d} + instruction. */ + +bool +loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode) +{ + /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */ + if (!TARGET_64BIT) + return false; + + if ((value & 0xffff) == 0) + return false; + + if (IMM12_OPERAND (value)) + return false; + + value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1); + return ADDU16I_OPERAND (trunc_int_for_mode (value, mode)); +} + +/* Split one integer constant op[0] into two (op[1] and op[2]) for constant + plus operation in a specific mode. The splitted constants can be added + onto a register with a single instruction (addi.{d/w} or addu16i.d). */ + +void +loongarch_split_plus_constant (rtx *op, machine_mode mode) +{ + HOST_WIDE_INT v = INTVAL (op[0]), a; + + if (DUAL_IMM12_OPERAND (v)) + a = (v > 0 ? 2047 : -2048); + else if (loongarch_addu16i_imm12_operand_p (v, mode)) + a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1); + else if (mode == DImode && DUAL_ADDU16I_OPERAND (v)) + a = (v > 0 ? 0x7fff : -0x8000) << 16; + else + gcc_unreachable (); + + op[1] = gen_int_mode (a, mode); + v = v - (unsigned HOST_WIDE_INT) a; + op[2] = gen_int_mode (v, mode); +} + /* Return true if a move from SRC to DEST in INSN should be split. */ static bool diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index 6b7dbec..277facb 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -612,6 +612,25 @@ enum reg_class #define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE)) +/* True if VALUE can be added onto a register with one addu16i.d + instruction. */ + +#define ADDU16I_OPERAND(VALUE) \ + (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \ + && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536))) + +/* True if VALUE can be added onto a register with two addi.{d/w} + instructions, but not one addi.{d/w} instruction. */ +#define DUAL_IMM12_OPERAND(VALUE) \ + (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE)) + +/* True if VALUE can be added onto a register with two addu16i.d + instruction, but not one addu16i.d instruction. */ +#define DUAL_ADDU16I_OPERAND(VALUE) \ + (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \ + && !ADDU16I_OPERAND (VALUE) \ + && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe))) + #define IMM12_INT(X) IMM12_OPERAND (INTVAL (X)) #define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X)) #define LU12I_INT(X) LU12I_OPERAND (INTVAL (X)) diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 3509c3c..628ecc7 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -598,24 +598,64 @@ [(set_attr "type" "fadd") (set_attr "mode" "")]) -(define_insn "add3" - [(set (match_operand:GPR 0 "register_operand" "=r,r") - (plus:GPR (match_operand:GPR 1 "register_operand" "r,r") - (match_operand:GPR 2 "arith_operand" "r,I")))] +(define_insn_and_split "add3" + [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r") + (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r") + (match_operand:GPR 2 "plus__operand" + "r,I,La,Lb,Lc,Ld,Le")))] "" - "add%i2.\t%0,%1,%2"; + "@ + add.\t%0,%1,%2 + addi.\t%0,%1,%2 + # + * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \ + return \"addu16i.d\t%0,%1,%2\"; + # + # + #" + "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \ + && !ADDU16I_OPERAND (INTVAL (operands[2]))" + [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3))) + (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))] + { + loongarch_split_plus_constant (&operands[2], mode); + } [(set_attr "alu_type" "add") - (set_attr "mode" "")]) - -(define_insn "*addsi3_extended" - [(set (match_operand:DI 0 "register_operand" "=r,r") + (set_attr "mode" "") + (set_attr "insn_count" "1,1,2,1,2,2,2") + (set (attr "enabled") + (cond + [(match_test "mode != DImode && which_alternative == 4") + (const_string "no") + (match_test "mode != DImode && which_alternative == 5") + (const_string "no") + (match_test "mode != SImode && which_alternative == 6") + (const_string "no")] + (const_string "yes")))]) + +(define_insn_and_split "*addsi3_extended" + [(set (match_operand:DI 0 "register_operand" "=r,r,r,r") (sign_extend:DI - (plus:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "r,I"))))] + (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r") + (match_operand:SI 2 "plus_si_extend_operand" + "r,I,La,Le"))))] "TARGET_64BIT" - "add%i2.w\t%0,%1,%2" + "@ + add.w\t%0,%1,%2 + addi.w\t%0,%1,%2 + # + #" + "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])" + [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3))) + (set (match_dup 0) + (sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0) + (match_dup 4))))] + { + loongarch_split_plus_constant (&operands[2], SImode); + } [(set_attr "alu_type" "add") - (set_attr "mode" "SI")]) + (set_attr "mode" "SI") + (set_attr "insn_count" "1,1,2,2")]) ;; diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md index 9514028..510973a 100644 --- a/gcc/config/loongarch/predicates.md +++ b/gcc/config/loongarch/predicates.md @@ -39,14 +39,50 @@ (and (match_code "const_int") (match_test "IMM12_OPERAND (INTVAL (op))"))) +(define_predicate "const_dual_imm12_operand" + (and (match_code "const_int") + (match_test "DUAL_IMM12_OPERAND (INTVAL (op))"))) + (define_predicate "const_imm16_operand" (and (match_code "const_int") (match_test "IMM16_OPERAND (INTVAL (op))"))) +(define_predicate "const_addu16i_operand" + (and (match_code "const_int") + (match_test "ADDU16I_OPERAND (INTVAL (op))"))) + +(define_predicate "const_addu16i_imm12_di_operand" + (and (match_code "const_int") + (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)"))) + +(define_predicate "const_addu16i_imm12_si_operand" + (and (match_code "const_int") + (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)"))) + +(define_predicate "const_dual_addu16i_operand" + (and (match_code "const_int") + (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))"))) + (define_predicate "arith_operand" (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) +(define_predicate "plus_di_operand" + (ior (match_operand 0 "arith_operand") + (match_operand 0 "const_dual_imm12_operand") + (match_operand 0 "const_addu16i_operand") + (match_operand 0 "const_addu16i_imm12_di_operand") + (match_operand 0 "const_dual_addu16i_operand"))) + +(define_predicate "plus_si_extend_operand" + (ior (match_operand 0 "arith_operand") + (match_operand 0 "const_dual_imm12_operand") + (match_operand 0 "const_addu16i_imm12_si_operand"))) + +(define_predicate "plus_si_operand" + (ior (match_operand 0 "plus_si_extend_operand") + (match_operand 0 "const_addu16i_operand"))) + (define_predicate "const_immalsl_operand" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 1, 4)"))) diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c new file mode 100644 index 0000000..7b6a7cb --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/add-const.c @@ -0,0 +1,45 @@ +/* { dg-do compile } */ +/* { dg-options "-O -mabi=lp64d" } */ + +/* None of these functions should load the const operand into a temp + register. */ + +/* { dg-final { scan-assembler-not "add\\.[dw]" } } */ + +unsigned long f01 (unsigned long x) { return x + 1; } +unsigned long f02 (unsigned long x) { return x - 1; } +unsigned long f03 (unsigned long x) { return x + 2047; } +unsigned long f04 (unsigned long x) { return x + 4094; } +unsigned long f05 (unsigned long x) { return x - 2048; } +unsigned long f06 (unsigned long x) { return x - 4096; } +unsigned long f07 (unsigned long x) { return x + 0x7fff0000; } +unsigned long f08 (unsigned long x) { return x - 0x80000000l; } +unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; } +unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; } +unsigned long f11 (unsigned long x) { return x + 0x7fff0000 + 0x1; } +unsigned long f12 (unsigned long x) { return x + 0x7fff0000 - 0x1; } +unsigned long f13 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; } +unsigned long f14 (unsigned long x) { return x + 0x7fff0000 - 0x800; } +unsigned long f15 (unsigned long x) { return x - 0x80000000l - 1; } +unsigned long f16 (unsigned long x) { return x - 0x80000000l + 1; } +unsigned long f17 (unsigned long x) { return x - 0x80000000l - 0x800; } +unsigned long f18 (unsigned long x) { return x - 0x80000000l + 0x7ff; } + +unsigned int g01 (unsigned int x) { return x + 1; } +unsigned int g02 (unsigned int x) { return x - 1; } +unsigned int g03 (unsigned int x) { return x + 2047; } +unsigned int g04 (unsigned int x) { return x + 4094; } +unsigned int g05 (unsigned int x) { return x - 2048; } +unsigned int g06 (unsigned int x) { return x - 4096; } +unsigned int g07 (unsigned int x) { return x + 0x7fff0000; } +unsigned int g08 (unsigned int x) { return x - 0x80000000l; } +unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; } +unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; } +unsigned int g11 (unsigned int x) { return x + 0x7fff0000 + 0x1; } +unsigned int g12 (unsigned int x) { return x + 0x7fff0000 - 0x1; } +unsigned int g13 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; } +unsigned int g14 (unsigned int x) { return x + 0x7fff0000 - 0x800; } +unsigned int g15 (unsigned int x) { return x - 0x80000000l - 1; } +unsigned int g16 (unsigned int x) { return x - 0x80000000l + 1; } +unsigned int g17 (unsigned int x) { return x - 0x80000000l - 0x800; } +unsigned int g18 (unsigned int x) { return x - 0x80000000l + 0x7ff; } diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c index 3533fe7..cd72154 100644 --- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c +++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c @@ -6,7 +6,7 @@ #define SIZE 128*1024 #include "stack-check-prologue.h" -/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */ +/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131072} 1 } } */ /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */ /* Checks that the CFA notes are correct for every sp adjustment. */ diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c index e5e7111..3e5ca05 100644 --- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c +++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c @@ -6,7 +6,7 @@ #define SIZE 1280*1024 + 512 #include "stack-check-prologue.h" -/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */ +/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311232} 1 } } */ /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */ /* Checks that the CFA notes are correct for every sp adjustment. */ -- cgit v1.1 From 5baf2cccd0345a7ac3d4467343414f8b7ff1724b Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Fri, 10 Feb 2023 12:52:24 +0100 Subject: Abstract out calculation of max HWIs per wide int. I'm about to add one more use of the same snippet of code, for a total of 4 identical calculations in the code base. gcc/ChangeLog: * wide-int.h (WIDE_INT_MAX_HWIS): New. (class fixed_wide_int_storage): Use it. (trailing_wide_ints ::set_precision): Use it. (trailing_wide_ints ::extra_size): Use it. --- gcc/wide-int.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/wide-int.h b/gcc/wide-int.h index a450a74..6be343c 100644 --- a/gcc/wide-int.h +++ b/gcc/wide-int.h @@ -264,6 +264,10 @@ along with GCC; see the file COPYING3. If not see /* The number of HWIs needed to store an offset_int. */ #define OFFSET_INT_ELTS (ADDR_MAX_PRECISION / HOST_BITS_PER_WIDE_INT) +/* The max number of HWIs needed to store a wide_int of PRECISION. */ +#define WIDE_INT_MAX_HWIS(PRECISION) \ + ((PRECISION + HOST_BITS_PER_WIDE_INT - 1) / HOST_BITS_PER_WIDE_INT) + /* The type of result produced by a binary operation on types T1 and T2. Defined purely for brevity. */ #define WI_BINARY_RESULT(T1, T2) \ @@ -1214,7 +1218,7 @@ template class GTY(()) fixed_wide_int_storage { private: - HOST_WIDE_INT val[(N + HOST_BITS_PER_WIDE_INT + 1) / HOST_BITS_PER_WIDE_INT]; + HOST_WIDE_INT val[WIDE_INT_MAX_HWIS (N)]; unsigned int len; public: @@ -1475,8 +1479,7 @@ trailing_wide_ints ::set_precision (unsigned int precision, gcc_checking_assert (num_elements <= N); m_num_elements = num_elements; m_precision = precision; - m_max_len = ((precision + HOST_BITS_PER_WIDE_INT - 1) - / HOST_BITS_PER_WIDE_INT); + m_max_len = WIDE_INT_MAX_HWIS (precision); } /* Return a reference to element INDEX. */ @@ -1505,8 +1508,7 @@ inline size_t trailing_wide_ints ::extra_size (unsigned int precision, unsigned int num_elements) { - unsigned int max_len = ((precision + HOST_BITS_PER_WIDE_INT - 1) - / HOST_BITS_PER_WIDE_INT); + unsigned int max_len = WIDE_INT_MAX_HWIS (precision); gcc_checking_assert (num_elements <= N); return (num_elements * max_len - 1) * sizeof (HOST_WIDE_INT); } -- cgit v1.1 From 6e552ec218a04dac046066e2608202ba90d66f11 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Tue, 18 Apr 2023 07:56:52 +0200 Subject: Abstract out REAL_VALUE_TYPE streaming. In upcoming patches I will contribute code to stream out frange's as well as vrange's. This patch abstracts out the REAL_VALUE_TYPE streaming into their own functions, so that they may be used elsewhere. gcc/ChangeLog: * data-streamer.cc (bp_pack_real_value): New. (bp_unpack_real_value): New. * data-streamer.h (bp_pack_real_value): New. (bp_unpack_real_value): New. * tree-streamer-in.cc (unpack_ts_real_cst_value_fields): Use bp_unpack_real_value. * tree-streamer-out.cc (pack_ts_real_cst_value_fields): Use bp_pack_real_value. --- gcc/data-streamer.cc | 33 +++++++++++++++++++++++++++++++++ gcc/data-streamer.h | 2 ++ gcc/tree-streamer-in.cc | 14 +------------- gcc/tree-streamer-out.cc | 14 ++------------ 4 files changed, 38 insertions(+), 25 deletions(-) (limited to 'gcc') diff --git a/gcc/data-streamer.cc b/gcc/data-streamer.cc index d4b663b..0b9c457 100644 --- a/gcc/data-streamer.cc +++ b/gcc/data-streamer.cc @@ -113,3 +113,36 @@ bp_unpack_var_len_int (struct bitpack_d *bp) } } } + +/* Pack REAL_VALUE_TYPE R into BP. */ + +void +bp_pack_real_value (struct bitpack_d *bp, const REAL_VALUE_TYPE *r) +{ + bp_pack_value (bp, r->cl, 2); + bp_pack_value (bp, r->decimal, 1); + bp_pack_value (bp, r->sign, 1); + bp_pack_value (bp, r->signalling, 1); + bp_pack_value (bp, r->canonical, 1); + bp_pack_value (bp, r->uexp, EXP_BITS); + for (unsigned i = 0; i < SIGSZ; i++) + bp_pack_value (bp, r->sig[i], HOST_BITS_PER_LONG); +} + +/* Unpack REAL_VALUE_TYPE R from BP. */ + +void +bp_unpack_real_value (struct bitpack_d *bp, REAL_VALUE_TYPE *r) +{ + /* Clear all bits of the real value type so that we can later do + bitwise comparisons to see if two values are the same. */ + memset (r, 0, sizeof (*r)); + r->cl = (unsigned) bp_unpack_value (bp, 2); + r->decimal = (unsigned) bp_unpack_value (bp, 1); + r->sign = (unsigned) bp_unpack_value (bp, 1); + r->signalling = (unsigned) bp_unpack_value (bp, 1); + r->canonical = (unsigned) bp_unpack_value (bp, 1); + r->uexp = (unsigned) bp_unpack_value (bp, EXP_BITS); + for (unsigned i = 0; i < SIGSZ; i++) + r->sig[i] = (unsigned long) bp_unpack_value (bp, HOST_BITS_PER_LONG); +} diff --git a/gcc/data-streamer.h b/gcc/data-streamer.h index d8c7e21..19c9d6e 100644 --- a/gcc/data-streamer.h +++ b/gcc/data-streamer.h @@ -46,6 +46,8 @@ struct bitpack_d /* In data-streamer.cc */ void bp_pack_var_len_unsigned (struct bitpack_d *, unsigned HOST_WIDE_INT); void bp_pack_var_len_int (struct bitpack_d *, HOST_WIDE_INT); +void bp_pack_real_value (struct bitpack_d *, const REAL_VALUE_TYPE *); +void bp_unpack_real_value (struct bitpack_d *, REAL_VALUE_TYPE *); unsigned HOST_WIDE_INT bp_unpack_var_len_unsigned (struct bitpack_d *); HOST_WIDE_INT bp_unpack_var_len_int (struct bitpack_d *); diff --git a/gcc/tree-streamer-in.cc b/gcc/tree-streamer-in.cc index d4dc30f..bf4bd5c 100644 --- a/gcc/tree-streamer-in.cc +++ b/gcc/tree-streamer-in.cc @@ -188,21 +188,9 @@ unpack_ts_int_cst_value_fields (struct bitpack_d *bp, tree expr) static void unpack_ts_real_cst_value_fields (struct bitpack_d *bp, tree expr) { - unsigned i; REAL_VALUE_TYPE r; - /* Clear all bits of the real value type so that we can later do - bitwise comparisons to see if two values are the same. */ - memset (&r, 0, sizeof r); - r.cl = (unsigned) bp_unpack_value (bp, 2); - r.decimal = (unsigned) bp_unpack_value (bp, 1); - r.sign = (unsigned) bp_unpack_value (bp, 1); - r.signalling = (unsigned) bp_unpack_value (bp, 1); - r.canonical = (unsigned) bp_unpack_value (bp, 1); - r.uexp = (unsigned) bp_unpack_value (bp, EXP_BITS); - for (i = 0; i < SIGSZ; i++) - r.sig[i] = (unsigned long) bp_unpack_value (bp, HOST_BITS_PER_LONG); - + bp_unpack_real_value (bp, &r); memcpy (TREE_REAL_CST_PTR (expr), &r, sizeof (REAL_VALUE_TYPE)); } diff --git a/gcc/tree-streamer-out.cc b/gcc/tree-streamer-out.cc index d107229..81e6fcb 100644 --- a/gcc/tree-streamer-out.cc +++ b/gcc/tree-streamer-out.cc @@ -166,18 +166,8 @@ pack_ts_int_cst_value_fields (struct bitpack_d *bp, tree expr) static void pack_ts_real_cst_value_fields (struct bitpack_d *bp, tree expr) { - unsigned i; - REAL_VALUE_TYPE r; - - r = TREE_REAL_CST (expr); - bp_pack_value (bp, r.cl, 2); - bp_pack_value (bp, r.decimal, 1); - bp_pack_value (bp, r.sign, 1); - bp_pack_value (bp, r.signalling, 1); - bp_pack_value (bp, r.canonical, 1); - bp_pack_value (bp, r.uexp, EXP_BITS); - for (i = 0; i < SIGSZ; i++) - bp_pack_value (bp, r.sig[i], HOST_BITS_PER_LONG); + REAL_VALUE_TYPE r = TREE_REAL_CST (expr); + bp_pack_real_value (bp, &r); } -- cgit v1.1 From 2349e69125335d4c8c5e43cf3643844519d154c3 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 18 Apr 2023 11:01:47 +0200 Subject: match.pd: Improve fneg/fadd optimization [PR109240] match.pd has mostly for AArch64 an optimization in which it optimizes certain forms of __builtin_shuffle of x + y and x - y vectors into fneg using twice as wide element type so that every other sign is changed, followed by fadd. The following patch extends that optimization, so that it can handle other forms as well, using the same fneg but fsub instead of fadd. As the plus is commutative and minus is not and I want to handle vec_perm with plus minus and minus plus order preferrably in one pattern, I had to do the matching operand checks by hand. 2023-04-18 Jakub Jelinek PR tree-optimization/109240 * match.pd (fneg/fadd): Rewrite such that it handles both plus as first vec_perm operand and minus as second using fneg/fadd and minus as first vec_perm operand and plus as second using fneg/fsub. * gcc.target/aarch64/simd/addsub_2.c: New test. * gcc.target/aarch64/sve/addsub_2.c: New test. --- gcc/match.pd | 121 +++++++++++++---------- gcc/testsuite/gcc.target/aarch64/simd/addsub_2.c | 56 +++++++++++ gcc/testsuite/gcc.target/aarch64/sve/addsub_2.c | 52 ++++++++++ 3 files changed, 175 insertions(+), 54 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/simd/addsub_2.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/addsub_2.c (limited to 'gcc') diff --git a/gcc/match.pd b/gcc/match.pd index c5d2c36..b7d7a5d 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -8074,63 +8074,76 @@ and, under IEEE 754 the fneg of the wider type will negate every even entry and when doing an add we get a sub of the even and add of every odd elements. */ -(simplify - (vec_perm (plus:c @0 @1) (minus @0 @1) VECTOR_CST@2) - (if (!VECTOR_INTEGER_TYPE_P (type) - && !FLOAT_WORDS_BIG_ENDIAN) - (with - { - /* Build a vector of integers from the tree mask. */ - vec_perm_builder builder; - } - (if (tree_to_vec_perm_builder (&builder, @2)) - (with - { - /* Create a vec_perm_indices for the integer vector. */ - poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); - vec_perm_indices sel (builder, 2, nelts); - machine_mode vec_mode = TYPE_MODE (type); - machine_mode wide_mode; - scalar_mode wide_elt_mode; - poly_uint64 wide_nunits; - scalar_mode inner_mode = GET_MODE_INNER (vec_mode); - } - (if (sel.series_p (0, 2, 0, 2) - && sel.series_p (1, 2, nelts + 1, 2) - && GET_MODE_2XWIDER_MODE (inner_mode).exists (&wide_elt_mode) - && multiple_p (GET_MODE_NUNITS (vec_mode), 2, &wide_nunits) - && related_vector_mode (vec_mode, wide_elt_mode, - wide_nunits).exists (&wide_mode)) - (with - { - tree stype - = lang_hooks.types.type_for_mode (GET_MODE_INNER (wide_mode), - TYPE_UNSIGNED (type)); - tree ntype = build_vector_type_for_mode (stype, wide_mode); - - /* The format has to be a non-extended ieee format. */ - const struct real_format *fmt_old = FLOAT_MODE_FORMAT (vec_mode); - const struct real_format *fmt_new = FLOAT_MODE_FORMAT (wide_mode); - } - (if (TYPE_MODE (stype) != BLKmode - && VECTOR_TYPE_P (ntype) - && fmt_old != NULL - && fmt_new != NULL) - (with - { - /* If the target doesn't support v1xx vectors, try using - scalar mode xx instead. */ +(for plusminus (plus minus) + minusplus (minus plus) + (simplify + (vec_perm (plusminus @0 @1) (minusplus @2 @3) VECTOR_CST@4) + (if (!VECTOR_INTEGER_TYPE_P (type) + && !FLOAT_WORDS_BIG_ENDIAN + /* plus is commutative, while minus is not, so :c can't be used. + Do equality comparisons by hand and at the end pick the operands + from the minus. */ + && (operand_equal_p (@0, @2, 0) + ? operand_equal_p (@1, @3, 0) + : operand_equal_p (@0, @3, 0) && operand_equal_p (@1, @2, 0))) + (with + { + /* Build a vector of integers from the tree mask. */ + vec_perm_builder builder; + } + (if (tree_to_vec_perm_builder (&builder, @4)) + (with + { + /* Create a vec_perm_indices for the integer vector. */ + poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + vec_perm_indices sel (builder, 2, nelts); + machine_mode vec_mode = TYPE_MODE (type); + machine_mode wide_mode; + scalar_mode wide_elt_mode; + poly_uint64 wide_nunits; + scalar_mode inner_mode = GET_MODE_INNER (vec_mode); + } + (if (sel.series_p (0, 2, 0, 2) + && sel.series_p (1, 2, nelts + 1, 2) + && GET_MODE_2XWIDER_MODE (inner_mode).exists (&wide_elt_mode) + && multiple_p (GET_MODE_NUNITS (vec_mode), 2, &wide_nunits) + && related_vector_mode (vec_mode, wide_elt_mode, + wide_nunits).exists (&wide_mode)) + (with + { + tree stype + = lang_hooks.types.type_for_mode (GET_MODE_INNER (wide_mode), + TYPE_UNSIGNED (type)); + tree ntype = build_vector_type_for_mode (stype, wide_mode); + + /* The format has to be a non-extended ieee format. */ + const struct real_format *fmt_old = FLOAT_MODE_FORMAT (vec_mode); + const struct real_format *fmt_new = FLOAT_MODE_FORMAT (wide_mode); + } + (if (TYPE_MODE (stype) != BLKmode + && VECTOR_TYPE_P (ntype) + && fmt_old != NULL + && fmt_new != NULL) + (with + { + /* If the target doesn't support v1xx vectors, try using + scalar mode xx instead. */ if (known_eq (GET_MODE_NUNITS (wide_mode), 1) && !target_supports_op_p (ntype, NEGATE_EXPR, optab_vector)) ntype = stype; - } - (if (fmt_new->signbit_rw - == fmt_old->signbit_rw + GET_MODE_UNIT_BITSIZE (vec_mode) - && fmt_new->signbit_rw == fmt_new->signbit_ro - && targetm.can_change_mode_class (TYPE_MODE (ntype), TYPE_MODE (type), ALL_REGS) - && ((optimize_vectors_before_lowering_p () && VECTOR_TYPE_P (ntype)) - || target_supports_op_p (ntype, NEGATE_EXPR, optab_vector))) - (plus (view_convert:type (negate (view_convert:ntype @1))) @0))))))))))) + } + (if (fmt_new->signbit_rw + == fmt_old->signbit_rw + GET_MODE_UNIT_BITSIZE (vec_mode) + && fmt_new->signbit_rw == fmt_new->signbit_ro + && targetm.can_change_mode_class (TYPE_MODE (ntype), + TYPE_MODE (type), ALL_REGS) + && ((optimize_vectors_before_lowering_p () + && VECTOR_TYPE_P (ntype)) + || target_supports_op_p (ntype, NEGATE_EXPR, optab_vector))) + (if (plusminus == PLUS_EXPR) + (plus (view_convert:type (negate (view_convert:ntype @3))) @2) + (minus @0 (view_convert:type + (negate (view_convert:ntype @1)))))))))))))))) (simplify (vec_perm @0 @1 VECTOR_CST@2) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/addsub_2.c b/gcc/testsuite/gcc.target/aarch64/simd/addsub_2.c new file mode 100644 index 0000000..87424c9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/addsub_2.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target arm_v8_2a_fp16_neon_ok } */ +/* { dg-options "-Ofast" } */ +/* { dg-add-options arm_v8_2a_fp16_neon } */ +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ + +#pragma GCC target "+nosve" + +/* +** f1: +** ... +** fneg v[0-9]+.2d, v[0-9]+.2d +** fsub v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** ... +*/ +void f1 (float *restrict a, float *restrict b, float *res, int n) +{ + for (int i = 0; i < (n & -4); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} + +/* +** d1: +** ... +** fneg v[0-9]+.4s, v[0-9]+.4s +** fsub v[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h +** ... +*/ +void d1 (_Float16 *restrict a, _Float16 *restrict b, _Float16 *res, int n) +{ + for (int i = 0; i < (n & -8); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} + +/* +** e1: +** ... +** fsub v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d +** fadd v[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d +** ins v[0-9]+.d\[1\], v[0-9]+.d\[1\] +** ... +*/ +void e1 (double *restrict a, double *restrict b, double *res, int n) +{ + for (int i = 0; i < (n & -4); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/addsub_2.c b/gcc/testsuite/gcc.target/aarch64/sve/addsub_2.c new file mode 100644 index 0000000..5b9406a --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/addsub_2.c @@ -0,0 +1,52 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast" } */ +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ + +/* +** f1: +** ... +** fneg z[0-9]+.d, p[0-9]+/m, z[0-9]+.d +** fsub z[0-9]+.s, z[0-9]+.s, z[0-9]+.s +** ... +*/ +void f1 (float *restrict a, float *restrict b, float *res, int n) +{ + for (int i = 0; i < (n & -4); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} + +/* +** d1: +** ... +** fneg z[0-9]+.s, p[0-9]+/m, z[0-9]+.s +** fsub z[0-9]+.h, z[0-9]+.h, z[0-9]+.h +** ... +*/ +void d1 (_Float16 *restrict a, _Float16 *restrict b, _Float16 *res, int n) +{ + for (int i = 0; i < (n & -8); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} + +/* +** e1: +** ... +** fadd z[0-9]+.d, z[0-9]+.d, z[0-9]+.d +** movprfx z[0-9]+.d, p[0-9]+/m, z[0-9]+.d +** fsub z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d +** ... +*/ +void e1 (double *restrict a, double *restrict b, double *res, int n) +{ + for (int i = 0; i < (n & -4); i+=2) + { + res[i+0] = a[i+0] - b[i+0]; + res[i+1] = a[i+1] + b[i+1]; + } +} -- cgit v1.1 From 4204ed2dc74390ab3689d1d6a53001761338baf6 Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Tue, 18 Apr 2023 17:26:49 +0800 Subject: RISC-V: Adjust the parsing order of extensions to be consistent with riscv-spec and binutils. The current order of gcc and binutils parsing extensions is inconsistent. According to latest risc-v spec, the canonical order in which extension names must appear in the name string specified in Table 29.1 is different from before. In the latest table, non-standard extensions must be listed after all standard extensions. To keep consistent, we now change the parsing order. Related llvm patch links: https://reviews.llvm.org/D148315 gcc/ChangeLog: * common/config/riscv/riscv-common.cc (multi_letter_subset_rank): Swap the order of z-extensions and s-extensions. (riscv_subset_list::parse): Likewise. gcc/testsuite/ChangeLog: * gcc.target/riscv/arch-5.c: Likewise. --- gcc/common/config/riscv/riscv-common.cc | 12 ++++++------ gcc/testsuite/gcc.target/riscv/arch-5.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index 2fc0f8b..309a52d 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -398,10 +398,10 @@ multi_letter_subset_rank (const std::string &subset) char multiletter_class = subset[0]; switch (multiletter_class) { - case 's': + case 'z': high_order = 0; break; - case 'z': + case 's': high_order = 1; break; case 'x': @@ -1121,14 +1121,14 @@ riscv_subset_list::parse (const char *arch, location_t loc) if (p == NULL) goto fail; - /* Parsing supervisor extension. */ - p = subset_list->parse_multiletter_ext (p, "s", "supervisor extension"); + /* Parsing sub-extensions. */ + p = subset_list->parse_multiletter_ext (p, "z", "sub-extension"); if (p == NULL) goto fail; - /* Parsing sub-extensions. */ - p = subset_list->parse_multiletter_ext (p, "z", "sub-extension"); + /* Parsing supervisor extension. */ + p = subset_list->parse_multiletter_ext (p, "s", "supervisor extension"); if (p == NULL) goto fail; diff --git a/gcc/testsuite/gcc.target/riscv/arch-5.c b/gcc/testsuite/gcc.target/riscv/arch-5.c index b945a64..8258552 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-5.c +++ b/gcc/testsuite/gcc.target/riscv/arch-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32isabc_zfoo_xbar -mabi=ilp32" } */ +/* { dg-options "-march=rv32i_zfoo_sabc_xbar -mabi=ilp32" } */ int foo() { } -- cgit v1.1 From 2d70f3213fe4e76722cd55e48f8eb0820c56ec7a Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 18 Apr 2023 12:06:49 +0100 Subject: aarch64: Add QI -> HI zero-extension for LDAPR This patch is a straightforward extension of the zero-extending LDAPR pattern to represent QI -> HI load-extends. This maps down to a LDAPRB-W instruction. This lets us remove a redundant zero-extend in the new test function. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/atomics.md (*aarch64_atomic_load_rcpc_zext): Use SD_HSDI for destination mode iterator. gcc/testsuite/ChangeLog: * gcc.target/aarch64/ldapr-zext.c: Add test for u8 to u16 extension. --- gcc/config/aarch64/atomics.md | 6 +++--- gcc/testsuite/gcc.target/aarch64/ldapr-zext.c | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 999f86e..2b6f04e 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -705,13 +705,13 @@ ) (define_insn "*aarch64_atomic_load_rcpc_zext" - [(set (match_operand:GPI 0 "register_operand" "=r") - (zero_extend:GPI + [(set (match_operand:SD_HSDI 0 "register_operand" "=r") + (zero_extend:SD_HSDI (unspec_volatile:ALLX [(match_operand:ALLX 1 "aarch64_sync_memory_operand" "Q") (match_operand:SI 2 "const_int_operand")] ;; model UNSPECV_LDAP)))] - "TARGET_RCPC && ( > )" + "TARGET_RCPC && ( > )" "ldapr\t%w0, %1" ) diff --git a/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c index 6f448ee..e9d9058 100644 --- a/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c +++ b/gcc/testsuite/gcc.target/aarch64/ldapr-zext.c @@ -65,3 +65,11 @@ TEST(u8_u32, u8, unsigned) */ TEST(u16_u32, u16, unsigned) +/* +**test_u8_u16: +**... +** ldaprb w0, \[x[0-9]+\] +** ret +*/ +TEST(u8_u16, u8, unsigned short) + -- cgit v1.1 From ebaec582ca1ee33269724bff945a16655e487ad5 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 18 Apr 2023 12:15:35 +0100 Subject: aarch64: Give hint for -mcpu options that match -march instead We should redirect users of the erroneous -mcpu=armv8.2-a to use -march instead. There is an equivalent hint for -march used with a CPU name. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_validate_mcpu): Add hint to use -march if the argument matches that. gcc/testsuite/ChangeLog: * gcc.target/aarch64/spellcheck_11.c: New test. --- gcc/config/aarch64/aarch64.cc | 6 ++++++ gcc/testsuite/gcc.target/aarch64/spellcheck_11.c | 13 +++++++++++++ 2 files changed, 19 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/spellcheck_11.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 0f04ab9..adbdaaf 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -18139,6 +18139,12 @@ aarch64_validate_mcpu (const char *str, const struct processor **res, case AARCH_PARSE_INVALID_ARG: error ("unknown value %qs for %<-mcpu%>", str); aarch64_print_hint_for_core (str); + /* A common user error is confusing -march and -mcpu. + If the -mcpu string matches a known architecture then suggest + -march=. */ + parse_res = aarch64_parse_arch (str, res, isa_flags, &invalid_extension); + if (parse_res == AARCH_PARSE_OK) + inform (input_location, "did you mean %<-march=%s%>?", str); break; case AARCH_PARSE_INVALID_FEATURE: error ("invalid feature modifier %qs in %<-mcpu=%s%>", diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_11.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_11.c new file mode 100644 index 0000000..a278328 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_11.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-march=*" } { "" } } */ +/* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ +/* { dg-options "-mcpu=armv8.2-a+dotprod" } */ + +void +foo () +{ +} + +/* { dg-error "unknown value .armv8.2-a\\+dotprod. for .-mcpu." "" { target *-*-* } 0 } */ +/* { dg-message "valid arguments are: \[^\n\r]*" "" { target *-*-* } 0 } */ +/* { dg-message "did you mean .-march=armv8.2-a\\+dotprod.?" "" { target *-*-* } 0 } */ -- cgit v1.1 From cfdc45f73c56ad051a53576a4e88675ced2660d4 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Fri, 14 Apr 2023 17:05:15 +0100 Subject: amdgcn: HardFP divide Implement FP division using hardware instructions. This replaces both the softfp library calls, and the --fast-math inaccurate divsion we had previously. The GCN architecture does not have a single divide instruction, but it does have a number of support instructions designed to make multiply-by-reciprocal sufficiently accurate for non-fast-math usage. gcc/ChangeLog: * config/gcn/gcn-valu.md (SV_SFDF): New iterator. (SV_FP): New iterator. (scalar_mode, SCALAR_MODE): Add identity mappings for scalar modes. (recip2): Unify the two patterns using SV_FP. (div_scale): New insn. (div_fmas): New insn. (div_fixup): New insn. (div3): Unify the two expanders and rewrite using hardfp. * config/gcn/gcn.cc (gcn_md_reorg): Support "vccwait" attribute. * config/gcn/gcn.md (unspec): Add UNSPEC_DIV_SCALE, UNSPEC_DIV_FMAS, and UNSPEC_DIV_FIXUP. (vccwait): New attribute. gcc/testsuite/ChangeLog: * gcc.target/gcn/fpdiv.c: Remove the -ffast-math requirement. --- gcc/config/gcn/gcn-valu.md | 223 ++++++++++++++++++++--------------- gcc/config/gcn/gcn.cc | 9 ++ gcc/config/gcn/gcn.md | 8 +- gcc/testsuite/gcc.target/gcn/fpdiv.c | 1 - 4 files changed, 144 insertions(+), 97 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index e3d6d65..4a40a9d 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -15,6 +15,7 @@ ;; . ;; {{{ Vector iterators +; SV iterators include both scalar and vector modes. ; Vector modes for specific types (define_mode_iterator V_QI @@ -126,6 +127,15 @@ V32SI V32DI V64SI V64DI]) +(define_mode_iterator SV_SFDF + [SF DF + V2SF V2DF + V4SF V4DF + V8SF V8DF + V16SF V16DF + V32SF V32DF + V64SF V64DF]) + ; All of above (define_mode_iterator V_ALL [V2QI V2HI V2HF V2SI V2SF V2DI V2DF @@ -156,9 +166,19 @@ V16HF V16SF V16DF V32HF V32SF V32DF V64HF V64SF V64DF]) +(define_mode_iterator SV_FP + [HF SF DF + V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) (define_mode_attr scalar_mode - [(V2QI "qi") (V2HI "hi") (V2SI "si") + [(QI "qi") (HI "hi") (SI "si") + (HF "hf") (SF "sf") (DI "di") (DF "df") + (V2QI "qi") (V2HI "hi") (V2SI "si") (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df") (V4QI "qi") (V4HI "hi") (V4SI "si") (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df") @@ -172,7 +192,9 @@ (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) (define_mode_attr SCALAR_MODE - [(V2QI "QI") (V2HI "HI") (V2SI "SI") + [(QI "QI") (HI "HI") (SI "SI") + (HF "HF") (SF "SF") (DI "DI") (DF "DF") + (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF") (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF") @@ -3188,113 +3210,124 @@ ;; {{{ FP division (define_insn "recip2" - [(set (match_operand:V_FP 0 "register_operand" "= v") - (unspec:V_FP - [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] + [(set (match_operand:SV_FP 0 "register_operand" "= v") + (unspec:SV_FP + [(match_operand:SV_FP 1 "gcn_alu_operand" "vSvB")] UNSPEC_RCP))] "" "v_rcp%i0\t%0, %1" [(set_attr "type" "vop1") (set_attr "length" "8")]) -(define_insn "recip2" - [(set (match_operand:FP 0 "register_operand" "= v") - (unspec:FP - [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] - UNSPEC_RCP))] - "" - "v_rcp%i0\t%0, %1" - [(set_attr "type" "vop1") +;; v_div_scale takes a numerator (op2) and denominator (op1) and returns the +;; one that matches op3 adjusted for best results in reciprocal division. +;; It also emits a VCC mask that is intended for input to v_div_fmas. +;; The caller is expected to call this twice, once for each input. The output +;; VCC is the same in both cases, so the caller may discard one. +(define_insn "div_scale" + [(set (match_operand:SV_SFDF 0 "register_operand" "=v") + (unspec:SV_SFDF + [(match_operand:SV_SFDF 1 "gcn_alu_operand" "v") + (match_operand:SV_SFDF 2 "gcn_alu_operand" "v") + (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")] + UNSPEC_DIV_SCALE)) + (set (match_operand:DI 4 "register_operand" "=SvcV") + (unspec:DI + [(match_dup 1) (match_dup 2) (match_dup 3)] + UNSPEC_DIV_SCALE))] + "" + "v_div_scale%i0\t%0, %4, %3, %1, %2" + [(set_attr "type" "vop3b") (set_attr "length" "8")]) -;; Do division via a = b * 1/c -;; The v_rcp_* instructions are not sufficiently accurate on their own, -;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson -;; which the ISA manual says is enough to improve the reciprocal accuracy. -;; -;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. +;; v_div_fmas is "FMA and Scale" that uses the VCC output from v_div_scale +;; to conditionally scale the output of the whole division operation. +;; This is necessary to counter the adjustments made by v_div_scale and +;; replaces the last FMA instruction of the Newton Raphson algorithm. +(define_insn "div_fmas" + [(set (match_operand:SV_SFDF 0 "register_operand" "=v") + (unspec:SV_SFDF + [(plus:SV_SFDF + (mult:SV_SFDF + (match_operand:SV_SFDF 1 "gcn_alu_operand" "v") + (match_operand:SV_SFDF 2 "gcn_alu_operand" "v")) + (match_operand:SV_SFDF 3 "gcn_alu_operand" "v")) + (match_operand:DI 4 "register_operand" "cV")] + UNSPEC_DIV_FMAS))] + "" + "v_div_fmas%i0\t%0, %1, %2, %3; %4" + [(set_attr "type" "vop3a") + (set_attr "length" "8") + (set_attr "vccwait" "5")]) + +;; v_div_fixup takes the inputs and outputs of a division operation already +;; completed and cleans up the floating-point sign bit, infinity, underflow, +;; overflow, and NaN status. It will also emit any FP exceptions. +;; op1: quotient, op2: denominator, op3: numerator +(define_insn "div_fixup" + [(set (match_operand:SV_FP 0 "register_operand" "=v") + (unspec:SV_FP + [(match_operand:SV_FP 1 "register_operand" "v") + (match_operand:SV_FP 2 "gcn_alu_operand" "v") + (match_operand:SV_FP 3 "gcn_alu_operand" "v")] + UNSPEC_DIV_FIXUP))] + "" + "v_div_fixup%i0\t%0, %1, %2, %3" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) (define_expand "div3" - [(match_operand:V_FP 0 "gcn_valu_dst_operand") - (match_operand:V_FP 1 "gcn_valu_src0_operand") - (match_operand:V_FP 2 "gcn_valu_src0_operand")] - "flag_reciprocal_math" + [(match_operand:SV_SFDF 0 "register_operand") + (match_operand:SV_SFDF 1 "gcn_alu_operand") + (match_operand:SV_SFDF 2 "gcn_alu_operand")] + "" { + rtx numerator = operands[1]; + rtx denominator = operands[2]; + + /* Scale the inputs if they are close to the FP limits. + This will be reversed later. */ + rtx vcc = gen_reg_rtx (DImode); + rtx discardedvcc = gen_reg_rtx (DImode); + rtx scaled_numerator = gen_reg_rtx (mode); + rtx scaled_denominator = gen_reg_rtx (mode); + emit_insn (gen_div_scale (scaled_denominator, + denominator, numerator, + denominator, discardedvcc)); + emit_insn (gen_div_scale (scaled_numerator, + denominator, numerator, + numerator, vcc)); + + /* Find the reciprocal of the denominator, and use Newton-Raphson to + improve the accuracy over the basic hardware instruction. */ rtx one = gcn_vec_constant (mode, const_double_from_real_value (dconst1, mode)); rtx initrcp = gen_reg_rtx (mode); - rtx fma = gen_reg_rtx (mode); - rtx rcp; - rtx num = operands[1], denom = operands[2]; - - bool is_rcp = (GET_CODE (num) == CONST_VECTOR - && real_identical - (CONST_DOUBLE_REAL_VALUE - (CONST_VECTOR_ELT (num, 0)), &dconstm1)); - - if (is_rcp) - rcp = operands[0]; - else - rcp = gen_reg_rtx (mode); - - emit_insn (gen_recip2 (initrcp, denom)); - emit_insn (gen_fma4_negop2 (fma, initrcp, denom, one)); - emit_insn (gen_fma4 (rcp, fma, initrcp, initrcp)); - - if (!is_rcp) - { - rtx div_est = gen_reg_rtx (mode); - rtx fma2 = gen_reg_rtx (mode); - rtx fma3 = gen_reg_rtx (mode); - rtx fma4 = gen_reg_rtx (mode); - emit_insn (gen_mul3 (div_est, num, rcp)); - emit_insn (gen_fma4_negop2 (fma2, div_est, denom, num)); - emit_insn (gen_fma4 (fma3, fma2, rcp, div_est)); - emit_insn (gen_fma4_negop2 (fma4, fma3, denom, num)); - emit_insn (gen_fma4 (operands[0], fma4, rcp, fma3)); - } - - DONE; - }) - -(define_expand "div3" - [(match_operand:FP 0 "gcn_valu_dst_operand") - (match_operand:FP 1 "gcn_valu_src0_operand") - (match_operand:FP 2 "gcn_valu_src0_operand")] - "flag_reciprocal_math" - { - rtx one = const_double_from_real_value (dconst1, mode); - rtx initrcp = gen_reg_rtx (mode); - rtx fma = gen_reg_rtx (mode); - rtx rcp; - rtx num = operands[1], denom = operands[2]; - - bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE - && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), - &dconstm1)); - - if (is_rcp) - rcp = operands[0]; - else - rcp = gen_reg_rtx (mode); - - emit_insn (gen_recip2 (initrcp, denom)); - emit_insn (gen_fma4_negop2 (fma, initrcp, denom, one)); - emit_insn (gen_fma4 (rcp, fma, initrcp, initrcp)); - - if (!is_rcp) - { - rtx div_est = gen_reg_rtx (mode); - rtx fma2 = gen_reg_rtx (mode); - rtx fma3 = gen_reg_rtx (mode); - rtx fma4 = gen_reg_rtx (mode); - emit_insn (gen_mul3 (div_est, num, rcp)); - emit_insn (gen_fma4_negop2 (fma2, div_est, denom, num)); - emit_insn (gen_fma4 (fma3, fma2, rcp, div_est)); - emit_insn (gen_fma4_negop2 (fma4, fma3, denom, num)); - emit_insn (gen_fma4 (operands[0], fma4, rcp, fma3)); - } - + rtx fma1 = gen_reg_rtx (mode); + rtx rcp = gen_reg_rtx (mode); + emit_insn (gen_recip2 (initrcp, scaled_denominator)); + emit_insn (gen_fma4_negop2 (fma1, initrcp, scaled_denominator, one)); + emit_insn (gen_fma4 (rcp, fma1, initrcp, initrcp)); + + /* Do the division "a/b" via "a*1/b" and use Newton-Raphson to improve + the accuracy. The "div_fmas" instruction reverses any scaling + performed by "div_scale", above. */ + rtx div_est = gen_reg_rtx (mode); + rtx fma2 = gen_reg_rtx (mode); + rtx fma3 = gen_reg_rtx (mode); + rtx fma4 = gen_reg_rtx (mode); + rtx fmas = gen_reg_rtx (mode); + emit_insn (gen_mul3 (div_est, scaled_numerator, rcp)); + emit_insn (gen_fma4_negop2 (fma2, div_est, scaled_denominator, + scaled_numerator)); + emit_insn (gen_fma4 (fma3, fma2, rcp, div_est)); + emit_insn (gen_fma4_negop2 (fma4, fma3, scaled_denominator, + scaled_numerator)); + emit_insn (gen_div_fmas (fmas, fma4, rcp, fma3, vcc)); + + /* Finally, use "div_fixup" to get the details right and find errors. */ + emit_insn (gen_div_fixup (operands[0], fmas, denominator, + numerator)); DONE; }) diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index a7d278c..5608d85 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -5840,6 +5840,7 @@ gcn_md_reorg (void) attr_type itype = get_attr_type (insn); attr_unit iunit = get_attr_unit (insn); attr_delayeduse idelayeduse = get_attr_delayeduse (insn); + int ivccwait = get_attr_vccwait (insn); HARD_REG_SET ireads, iwrites; CLEAR_HARD_REG_SET (ireads); CLEAR_HARD_REG_SET (iwrites); @@ -5917,6 +5918,14 @@ gcn_md_reorg (void) && ((hard_reg_set_intersect_p (prev_insn->reads, iwrites)))) nops_rqd = 1 - prev_insn->age; + + /* Instruction that requires VCC is not written too close before + using it. */ + if (prev_insn->age < ivccwait + && (hard_reg_set_intersect_p + (prev_insn->writes, + reg_class_contents[(int)VCC_CONDITIONAL_REG]))) + nops_rqd = ivccwait - prev_insn->age; } /* Insert the required number of NOPs. */ diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index c90303c..7065acf 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -90,7 +90,8 @@ UNSPEC_RCP UNSPEC_FLBIT_INT UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2 - UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT]) + UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT + UNSPEC_DIV_SCALE UNSPEC_DIV_FMAS UNSPEC_DIV_FIXUP]) ;; }}} ;; {{{ Attributes @@ -302,6 +303,11 @@ (define_attr "delayeduse" "yes,no" (const_string "no")) +; Identify instructions that require "Manually Inserted Wait State" if +; a previous instruction writes to VCC. The number gives the number of NOPs. + +(define_attr "vccwait" "" (const_int 0)) + ;; }}} ;; {{{ Iterators useful across the wole machine description diff --git a/gcc/testsuite/gcc.target/gcn/fpdiv.c b/gcc/testsuite/gcc.target/gcn/fpdiv.c index 7125b6f..936d39c 100644 --- a/gcc/testsuite/gcc.target/gcn/fpdiv.c +++ b/gcc/testsuite/gcc.target/gcn/fpdiv.c @@ -1,5 +1,4 @@ /* { dg-do run } */ -/* { dg-options "-ffast-math" } */ #include #include -- cgit v1.1 From fdbaab2dc886f78a1e75512eeee0faa17e77c862 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 18 Apr 2023 11:49:48 +0200 Subject: tree-optimization/109539 - restrict PHI handling in access diagnostics Access diagnostics visits the SSA def-use chains to diagnose things like dangling pointer uses. When that runs into PHIs it tries to prove all incoming pointers of which one is the currently visited use are related to decide whether to keep looking for the PHI def uses. That turns out to be overly optimistic and thus costly. The following scraps the existing handling for simply requiring that we eventually visit all incoming pointers of the PHI during the def-use chain analysis and only then process uses of the PHI def. Note this handles backedges of natural loops optimistically, diagnosing the first iteration. There's gcc.dg/Wuse-after-free-2.c containing a testcase requiring this. PR tree-optimization/109539 * gimple-ssa-warn-access.cc (pass_waccess::check_pointer_uses): Re-implement pointer relatedness for PHIs. --- gcc/gimple-ssa-warn-access.cc | 56 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc index d0d2148..48e85e9 100644 --- a/gcc/gimple-ssa-warn-access.cc +++ b/gcc/gimple-ssa-warn-access.cc @@ -4173,8 +4173,9 @@ pass_waccess::check_pointer_uses (gimple *stmt, tree ptr, auto_bitmap visited; - auto_vec pointers; - pointers.safe_push (ptr); + auto_vec pointers; + pointers.quick_push (ptr); + hash_map *phi_map = nullptr; /* Starting with PTR, iterate over POINTERS added by the loop, and either warn for their uses in basic blocks dominated by the STMT @@ -4241,19 +4242,49 @@ pass_waccess::check_pointer_uses (gimple *stmt, tree ptr, tree_code code = gimple_cond_code (cond); equality = code == EQ_EXPR || code == NE_EXPR; } - else if (gimple_code (use_stmt) == GIMPLE_PHI) + else if (gphi *phi = dyn_cast (use_stmt)) { /* Only add a PHI result to POINTERS if all its - operands are related to PTR, otherwise continue. */ - tree lhs = gimple_phi_result (use_stmt); - if (!pointers_related_p (stmt, lhs, ptr, m_ptr_qry)) - continue; - - if (TREE_CODE (lhs) == SSA_NAME) + operands are related to PTR, otherwise continue. The + PHI result is related once we've reached all arguments + through this iteration. That also means any invariant + argument will make the PHI not related. For arguments + flowing over natural loop backedges we are optimistic + (and diagnose the first iteration). */ + tree lhs = gimple_phi_result (phi); + if (!phi_map) + phi_map = new hash_map; + bool existed_p; + int &related = phi_map->get_or_insert (lhs, &existed_p); + if (!existed_p) { - pointers.safe_push (lhs); - continue; + related = gimple_phi_num_args (phi) - 1; + for (unsigned j = 0; j < gimple_phi_num_args (phi); ++j) + { + if ((unsigned) phi_arg_index_from_use (use_p) == j) + continue; + tree arg = gimple_phi_arg_def (phi, j); + edge e = gimple_phi_arg_edge (phi, j); + basic_block arg_bb; + if (dominated_by_p (CDI_DOMINATORS, e->src, e->dest) + /* Make sure we are not forward visiting a + backedge argument. */ + && (TREE_CODE (arg) != SSA_NAME + || (!SSA_NAME_IS_DEFAULT_DEF (arg) + && ((arg_bb + = gimple_bb (SSA_NAME_DEF_STMT (arg))) + != e->dest) + && !dominated_by_p (CDI_DOMINATORS, + e->dest, arg_bb)))) + related--; + } } + else + related--; + + if (related == 0) + pointers.safe_push (lhs); + continue; } /* Warn if USE_STMT is dominated by the deallocation STMT. @@ -4292,6 +4323,9 @@ pass_waccess::check_pointer_uses (gimple *stmt, tree ptr, } } } + + if (phi_map) + delete phi_map; } /* Check call STMT for invalid accesses. */ -- cgit v1.1 From 4d747ea3a4e5b94eda4a46dada0726eaa1b7b5fd Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 23 Feb 2023 08:48:28 +0100 Subject: Add inchash support for vrange. This patch provides inchash support for vrange. It is along the lines of the streaming support I just posted and will be used for IPA hashing of ranges. gcc/ChangeLog: * inchash.cc (hash::add_real_value): New. * inchash.h (class hash): Add add_real_value. * value-range.cc (add_vrange): New. * value-range.h (inchash::add_vrange): New. --- gcc/inchash.cc | 36 ++++++++++++++++++++++++++++++++++++ gcc/inchash.h | 2 ++ gcc/value-range.cc | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/value-range.h | 5 +++++ 4 files changed, 95 insertions(+) (limited to 'gcc') diff --git a/gcc/inchash.cc b/gcc/inchash.cc index a30662b..7890db0 100644 --- a/gcc/inchash.cc +++ b/gcc/inchash.cc @@ -24,3 +24,39 @@ along with GCC; see the file COPYING3. If not see #endif #include "system.h" #include "coretypes.h" +#include "real.h" +#include "inchash.h" + +namespace inchash +{ + +/* This is here instead of inchash.h to keep us from having to put + real.h in coretypes.h. */ +void +hash::add_real_value (const real_value &v) +{ + add_int (v.cl); + add_int (v.sign); + switch (v.cl) + { + case rvc_zero: + case rvc_inf: + return; + case rvc_normal: + add_int (v.decimal); + add_int (REAL_EXP (&v)); + break; + case rvc_nan: + add_int (v.signalling); + add_int (v.canonical); + if (v.canonical) + return; + break; + default: + gcc_unreachable (); + } + for (unsigned i = 0; i < SIGSZ; ++i) + add_hwi (v.sig[i]); +} + +} // namespace inchash diff --git a/gcc/inchash.h b/gcc/inchash.h index bf76308..41ae153 100644 --- a/gcc/inchash.h +++ b/gcc/inchash.h @@ -88,6 +88,8 @@ class hash add_hwi (x.sext_elt (i)); } + void add_real_value (const class real_value &v); + /* Hash in pointer PTR. */ void add_ptr (const void *ptr) { diff --git a/gcc/value-range.cc b/gcc/value-range.cc index ec826c2..c14a27e 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -232,6 +232,58 @@ vrange::dump (FILE *file) const pp_flush (&buffer); } +namespace inchash +{ + +void +add_vrange (const vrange &v, inchash::hash &hstate, + unsigned int) +{ + if (v.undefined_p ()) + { + hstate.add_int (VR_UNDEFINED); + return; + } + // Types are ignored throughout to inhibit two ranges being equal + // but having different hash values. This can happen when two + // ranges are equal and their types are different (but + // types_compatible_p is true). + if (is_a (v)) + { + const irange &r = as_a (v); + if (r.varying_p ()) + hstate.add_int (VR_VARYING); + else + hstate.add_int (VR_RANGE); + for (unsigned i = 0; i < r.num_pairs (); ++i) + { + hstate.add_wide_int (r.lower_bound (i)); + hstate.add_wide_int (r.upper_bound (i)); + } + hstate.add_wide_int (r.get_nonzero_bits ()); + return; + } + if (is_a (v)) + { + const frange &r = as_a (v); + if (r.varying_p ()) + hstate.add_int (VR_VARYING); + else + hstate.add_int (VR_RANGE); + + hstate.add_real_value (r.lower_bound ()); + hstate.add_real_value (r.upper_bound ()); + + nan_state nan = r.get_nan_state (); + hstate.add_int (nan.pos_p ()); + hstate.add_int (nan.neg_p ()); + return; + } + gcc_unreachable (); +} + +} //namespace inchash + bool irange::supports_type_p (const_tree type) const { diff --git a/gcc/value-range.h b/gcc/value-range.h index f8aa0ca..5545cce 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -109,6 +109,11 @@ protected: const ENUM_BITFIELD(value_range_discriminator) m_discriminator : 4; }; +namespace inchash +{ + extern void add_vrange (const vrange &, hash &, unsigned flags = 0); +} + // An integer range without any storage. class GTY((user)) irange : public vrange -- cgit v1.1 From 10e481b154c5fc63e6ce4b449ce86cecb87a6015 Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 26 Jan 2023 04:46:54 +0100 Subject: Return true from operator== for two identical ranges containing NAN. The == operator for ranges signifies that two ranges contain the same thing, not that they are ultimately equal. So [2,4] == [2,4], even though one may be a 2 and the other may be a 3. Similarly with two VARYING ranges. There is an oversight in frange::operator== where we are returning false for two identical NANs. This is causing us to never cache NANs in sbr_sparse_bitmap::set_bb_range. gcc/ChangeLog: * value-range.cc (frange::operator==): Adjust for NAN. (range_tests_nan): Remove some NAN tests. --- gcc/value-range.cc | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'gcc') diff --git a/gcc/value-range.cc b/gcc/value-range.cc index c14a27e..8b5d0cb 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -681,9 +681,6 @@ frange::operator== (const frange &src) const if (varying_p ()) return types_compatible_p (m_type, src.m_type); - if (known_isnan () || src.known_isnan ()) - return false; - return (real_identical (&m_min, &src.m_min) && real_identical (&m_max, &src.m_max) && m_pos_nan == src.m_pos_nan @@ -3801,13 +3798,6 @@ range_tests_nan () ASSERT_TRUE (r0.maybe_isnan ()); } - // NAN ranges are not equal to each other. - r0.set_nan (float_type_node); - r1 = r0; - ASSERT_FALSE (r0 == r1); - ASSERT_FALSE (r0 == r0); - ASSERT_TRUE (r0 != r0); - // [5,6] U NAN = [5,6] NAN. r0 = frange_float ("5", "6"); r0.clear_nan (); -- cgit v1.1 From fc8eaf42393d462e0454fa36a41fa313408b87cb Mon Sep 17 00:00:00 2001 From: Jin Ma Date: Tue, 18 Apr 2023 07:28:22 -0600 Subject: Fixed typo. gcc/ada * gcc-interface/utils.cc (unchecked_convert): Fixed typo. --- gcc/ada/gcc-interface/utils.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ada/gcc-interface/utils.cc b/gcc/ada/gcc-interface/utils.cc index 392ec0b..0c4f8b9 100644 --- a/gcc/ada/gcc-interface/utils.cc +++ b/gcc/ada/gcc-interface/utils.cc @@ -5543,7 +5543,7 @@ unchecked_convert (tree type, tree expr, bool notrunc_p) } } - /* Likewise if we are converting from a fixed-szie type to a type with self- + /* Likewise if we are converting from a fixed-size type to a type with self- referential size. We use the max size to do the padding in this case. */ else if (!INDIRECT_REF_P (expr) && TREE_CODE (expr) != STRING_CST -- cgit v1.1 From 8b271f6069da6eee4afdbfcac72888a4551191f9 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 18 Apr 2023 14:36:14 +0100 Subject: aarch64: Use intrinsic flags information rather than hardcoding FLAG_AUTO_FP We record the flags to use for the intrinsics in aarch64_simd_intrinsic_data, so use it when initialising them rather than using a hardcoded FLAG_AUTO_FP. The current vreinterpret intrinsics use FLAG_AUTO_FP anyway so this patch is an NFC but this will be needed as we migrate more builtins into the intrinsics infrastructure. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-builtins.cc (aarch64_init_simd_intrinsics): Take builtin flags from intrinsic data rather than hardcoded FLAG_AUTO_FP. --- gcc/config/aarch64/aarch64-builtins.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc index cc6b7c0..94ad364 100644 --- a/gcc/config/aarch64/aarch64-builtins.cc +++ b/gcc/config/aarch64/aarch64-builtins.cc @@ -1356,7 +1356,7 @@ aarch64_init_simd_intrinsics (void) } tree ftype = build_function_type (return_type, args); - tree attrs = aarch64_get_attributes (FLAG_AUTO_FP, d->op_modes[0]); + tree attrs = aarch64_get_attributes (d->flags, d->op_modes[0]); unsigned int code = (d->fcode << AARCH64_BUILTIN_SHIFT | AARCH64_BUILTIN_GENERAL); tree fndecl = simulate_builtin_function_decl (input_location, d->name, -- cgit v1.1 From 278f8f567b5470e87e2e6482ee385d61c7f45a5d Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 6 Mar 2023 11:06:38 +0100 Subject: RAII auto_mpfr and autp_mpz The following adds two RAII classes, one for mpz_t and one for mpfr_t making object lifetime management easier. Both formerly require explicit initialization with {mpz,mpfr}_init and release with {mpz,mpfr}_clear. I've converted two example places (where lifetime is trivial). * system.h (class auto_mpz): New, * realmpfr.h (class auto_mpfr): Likewise. * fold-const-call.cc (do_mpfr_arg1): Use auto_mpfr. (do_mpfr_arg2): Likewise. * tree-ssa-loop-niter.cc (bound_difference): Use auto_mpz; --- gcc/fold-const-call.cc | 8 ++------ gcc/realmpfr.h | 20 ++++++++++++++++++++ gcc/system.h | 18 ++++++++++++++++++ gcc/tree-ssa-loop-niter.cc | 10 +--------- 4 files changed, 41 insertions(+), 15 deletions(-) (limited to 'gcc') diff --git a/gcc/fold-const-call.cc b/gcc/fold-const-call.cc index 43819c1..fa0b287 100644 --- a/gcc/fold-const-call.cc +++ b/gcc/fold-const-call.cc @@ -130,14 +130,12 @@ do_mpfr_arg1 (real_value *result, int prec = format->p; mpfr_rnd_t rnd = format->round_towards_zero ? MPFR_RNDZ : MPFR_RNDN; - mpfr_t m; - mpfr_init2 (m, prec); + auto_mpfr m (prec); mpfr_from_real (m, arg, MPFR_RNDN); mpfr_clear_flags (); bool inexact = func (m, m, rnd); bool ok = do_mpfr_ckconv (result, m, inexact, format); - mpfr_clear (m); return ok; } @@ -224,14 +222,12 @@ do_mpfr_arg2 (real_value *result, int prec = format->p; mpfr_rnd_t rnd = format->round_towards_zero ? MPFR_RNDZ : MPFR_RNDN; - mpfr_t m; - mpfr_init2 (m, prec); + auto_mpfr m (prec); mpfr_from_real (m, arg1, MPFR_RNDN); mpfr_clear_flags (); bool inexact = func (m, arg0.to_shwi (), m, rnd); bool ok = do_mpfr_ckconv (result, m, inexact, format); - mpfr_clear (m); return ok; } diff --git a/gcc/realmpfr.h b/gcc/realmpfr.h index 5e032c0..3824e62 100644 --- a/gcc/realmpfr.h +++ b/gcc/realmpfr.h @@ -24,6 +24,26 @@ #include #include +class auto_mpfr +{ +public: + auto_mpfr () { mpfr_init (m_mpfr); } + explicit auto_mpfr (mpfr_prec_t prec) { mpfr_init2 (m_mpfr, prec); } + ~auto_mpfr () { mpfr_clear (m_mpfr); } + + operator mpfr_t& () { return m_mpfr; } + + auto_mpfr (const auto_mpfr &) = delete; + auto_mpfr &operator= (const auto_mpfr &) = delete; + + friend void mpfr_clear (auto_mpfr&) = delete; + friend void mpfr_init (auto_mpfr&) = delete; + friend void mpfr_init2 (auto_mpfr&, mpfr_prec_t) = delete; + +private: + mpfr_t m_mpfr; +}; + /* Convert between MPFR and REAL_VALUE_TYPE. The caller is responsible for initializing and clearing the MPFR parameter. */ diff --git a/gcc/system.h b/gcc/system.h index cf45db3..71d8a04 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -701,6 +701,24 @@ extern int vsnprintf (char *, size_t, const char *, va_list); /* Do not introduce a gmp.h dependency on the build system. */ #ifndef GENERATOR_FILE #include + +class auto_mpz +{ +public: + auto_mpz () { mpz_init (m_mpz); } + ~auto_mpz () { mpz_clear (m_mpz); } + + operator mpz_t& () { return m_mpz; } + + auto_mpz (const auto_mpz &) = delete; + auto_mpz &operator= (const auto_mpz &) = delete; + + friend void mpz_clear (auto_mpz&) = delete; + friend void mpz_init (auto_mpz&) = delete; + +private: + mpz_t m_mpz; +}; #endif /* Get libiberty declarations. */ diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index dc4c7a4..dcfba2f 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -722,7 +722,6 @@ bound_difference (class loop *loop, tree x, tree y, bounds *bnds) tree type = TREE_TYPE (x); tree varx, vary; mpz_t offx, offy; - mpz_t minx, maxx, miny, maxy; int cnt = 0; edge e; basic_block bb; @@ -754,19 +753,12 @@ bound_difference (class loop *loop, tree x, tree y, bounds *bnds) { /* Otherwise, use the value ranges to determine the initial estimates on below and up. */ - mpz_init (minx); - mpz_init (maxx); - mpz_init (miny); - mpz_init (maxy); + auto_mpz minx, maxx, miny, maxy; determine_value_range (loop, type, varx, offx, minx, maxx); determine_value_range (loop, type, vary, offy, miny, maxy); mpz_sub (bnds->below, minx, maxy); mpz_sub (bnds->up, maxx, miny); - mpz_clear (minx); - mpz_clear (maxx); - mpz_clear (miny); - mpz_clear (maxy); } /* If both X and Y are constants, we cannot get any more precise. */ -- cgit v1.1 From 19cb965e9d16e875944a31173b5e79b65e25d0de Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Wed, 12 Apr 2023 14:05:57 +0200 Subject: Declare dconstm0 to go along with dconst0 and friends. Negating dconst0 is getting pretty old, and we will keep adding copies of the same idiom. Fixed by adding a dconstm0 constant to go along with dconst1, dconstm1, etc. gcc/ChangeLog: * emit-rtl.cc (init_emit_once): Initialize dconstm0. * gimple-range-op.cc (class cfn_signbit): Remove dconstm0 declaration. * range-op-float.cc (zero_range): Use dconstm0. (zero_to_inf_range): Same. * real.h (dconstm0): New. * value-range.cc (frange::flush_denormals_to_zero): Use dconstm0. (frange::set_zero): Do not declare dconstm0. --- gcc/emit-rtl.cc | 4 ++++ gcc/gimple-range-op.cc | 2 -- gcc/range-op-float.cc | 6 +++--- gcc/real.h | 1 + gcc/value-range.cc | 7 +++---- 5 files changed, 11 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/emit-rtl.cc b/gcc/emit-rtl.cc index a11f72f..4036f4b 100644 --- a/gcc/emit-rtl.cc +++ b/gcc/emit-rtl.cc @@ -105,6 +105,7 @@ rtx const_true_rtx; REAL_VALUE_TYPE dconst0; REAL_VALUE_TYPE dconst1; REAL_VALUE_TYPE dconst2; +REAL_VALUE_TYPE dconstm0; REAL_VALUE_TYPE dconstm1; REAL_VALUE_TYPE dconsthalf; REAL_VALUE_TYPE dconstinf; @@ -6206,6 +6207,9 @@ init_emit_once (void) real_from_integer (&dconst1, double_mode, 1, SIGNED); real_from_integer (&dconst2, double_mode, 2, SIGNED); + dconstm0 = dconst0; + dconstm0.sign = 1; + dconstm1 = dconst1; dconstm1.sign = 1; diff --git a/gcc/gimple-range-op.cc b/gcc/gimple-range-op.cc index 4ca32a7..f7409e3 100644 --- a/gcc/gimple-range-op.cc +++ b/gcc/gimple-range-op.cc @@ -360,8 +360,6 @@ public: } if (!lhs.contains_p (build_zero_cst (lhs.type ()))) { - REAL_VALUE_TYPE dconstm0 = dconst0; - dconstm0.sign = 1; r.set (type, frange_val_min (type), dconstm0); r.update_nan (true); return true; diff --git a/gcc/range-op-float.cc b/gcc/range-op-float.cc index e0e91ba..9d18461 100644 --- a/gcc/range-op-float.cc +++ b/gcc/range-op-float.cc @@ -2263,7 +2263,7 @@ zero_range (REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub, int signbit_known) { ub = lb = dconst0; if (signbit_known <= 0) - lb = real_value_negate (&dconst0); + lb = dconstm0; if (signbit_known < 0) ub = lb; } @@ -2297,7 +2297,7 @@ zero_to_inf_range (REAL_VALUE_TYPE &lb, REAL_VALUE_TYPE &ub, int signbit_known) else if (signbit_known < 0) { lb = dconstninf; - ub = real_value_negate (&dconst0); + ub = dconstm0; } else { @@ -2634,7 +2634,7 @@ private: if (real_isneg (&lh_lb) == real_isneg (&lh_ub)) cp[1] = dconst0; else - cp[1] = real_value_negate (&dconst0); + cp[1] = dconstm0; } else cp[1] = cp[0]; diff --git a/gcc/real.h b/gcc/real.h index dd41c65..9e02139 100644 --- a/gcc/real.h +++ b/gcc/real.h @@ -468,6 +468,7 @@ extern void real_ldexp (REAL_VALUE_TYPE *, const REAL_VALUE_TYPE *, int); extern REAL_VALUE_TYPE dconst0; extern REAL_VALUE_TYPE dconst1; extern REAL_VALUE_TYPE dconst2; +extern REAL_VALUE_TYPE dconstm0; extern REAL_VALUE_TYPE dconstm1; extern REAL_VALUE_TYPE dconsthalf; extern REAL_VALUE_TYPE dconstinf; diff --git a/gcc/value-range.cc b/gcc/value-range.cc index 8b5d0cb..3b3102b 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -322,9 +322,10 @@ frange::flush_denormals_to_zero () // Flush [x, -DENORMAL] to [x, -0.0]. if (real_isdenormal (&m_max, mode) && real_isneg (&m_max)) { - m_max = dconst0; if (HONOR_SIGNED_ZEROS (m_type)) - m_max.sign = 1; + m_max = dconstm0; + else + m_max = dconst0; } // Flush [+DENORMAL, x] to [+0.0, x]. if (real_isdenormal (&m_min, mode) && !real_isneg (&m_min)) @@ -837,8 +838,6 @@ frange::set_zero (tree type) { if (HONOR_SIGNED_ZEROS (type)) { - REAL_VALUE_TYPE dconstm0 = dconst0; - dconstm0.sign = 1; set (type, dconstm0, dconst0); clear_nan (); } -- cgit v1.1 From 18e78844e78d7096c8e073c5b431480a0b8249d0 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Tue, 18 Apr 2023 15:06:49 +0100 Subject: aarch64: Use standard RTL codes for __rev16 intrinsic expansion I noticed for the expansion of the __rev16* arm_acle.h intrinsics we don't need to use an unspec just because it doesn't match neatly to a bswap code. We have organic combine patterns for it that we can reuse. This patch removes the define_insn using UNSPEC_REV (should it have been an UNSPEC_REV16?) and adds an expander to emit the patterns we have for rev16 using standard RTL codes. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.md (@aarch64_rev16): Change to define_expand. (rev162): Rename to... (aarch64_rev162_alt1): ... This. (rev162_alt): Rename to... (*aarch64_rev162_alt2): ... This. --- gcc/config/aarch64/aarch64.md | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 022eef8..065cf4b 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -6120,13 +6120,6 @@ [(set_attr "type" "rev")] ) -(define_insn "@aarch64_rev16" - [(set (match_operand:GPI 0 "register_operand" "=r") - (unspec:GPI [(match_operand:GPI 1 "register_operand" "r")] UNSPEC_REV))] - "" - "rev16\\t%0, %1" - [(set_attr "type" "rev")]) - (define_insn "*aarch64_bfxil" [(set (match_operand:GPI 0 "register_operand" "=r,r") (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r,0") @@ -6183,7 +6176,7 @@ ;; operations within an IOR/AND RTX, therefore we have two patterns matching ;; each valid permutation. -(define_insn "rev162" +(define_insn "aarch64_rev162_alt1" [(set (match_operand:GPI 0 "register_operand" "=r") (ior:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r") (const_int 8)) @@ -6197,7 +6190,7 @@ [(set_attr "type" "rev")] ) -(define_insn "rev162_alt" +(define_insn "*aarch64_rev162_alt2" [(set (match_operand:GPI 0 "register_operand" "=r") (ior:GPI (and:GPI (lshiftrt:GPI (match_operand:GPI 1 "register_operand" "r") (const_int 8)) @@ -6220,6 +6213,21 @@ [(set_attr "type" "rev")] ) +;; Expander for __rev16 intrinsics. We have organic RTL patterns for rev16 above. +;; Use this expander to just create the shift constants needed. +(define_expand "@aarch64_rev16" + [(match_operand:GPI 0 "register_operand") + (match_operand:GPI 1 "register_operand")] + "" + { + rtx left = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00), mode); + rtx right = gen_int_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff), mode); + emit_insn (gen_aarch64_rev162_alt1 (operands[0], operands[1], + right, left)); + DONE; + } +) + ;; ------------------------------------------------------------------- ;; Floating-point intrinsics ;; ------------------------------------------------------------------- -- cgit v1.1 From decd9de45064d9c14fa3b153d84ad42573d1b34d Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Sat, 8 Apr 2023 19:03:38 -0700 Subject: PHIOPT: small cleanup in match_simplify_replacement We know that the statement we are moving is already have a SSA_NAME on the lhs so we don't need to check that and can also just call reset_flow_sensitive_info with the name we already got. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * tree-ssa-phiopt.cc (match_simplify_replacement): Simplify code that does the movement slightly. --- gcc/tree-ssa-phiopt.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 16acd2f..4ddb4d5 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -1094,11 +1094,10 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, tree name = gimple_get_lhs (stmt_to_move); // Mark the name to be renamed if there is one. - if (name && TREE_CODE (name) == SSA_NAME) - bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name)); + bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name)); gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move); gsi_move_before (&gsi1, &gsi); - reset_flow_sensitive_info (gimple_assign_lhs (stmt_to_move)); + reset_flow_sensitive_info (name); } replace_phi_edge_with_variable (cond_bb, e1, phi, result, inserted_exprs); -- cgit v1.1 From 529489825677968e6e60dba41ed8f3d046141727 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Tue, 4 Apr 2023 00:09:27 +0000 Subject: PHIOPT: add folding/simplification detail to the dump While debugging PHI-OPT with match-and-simplify, I found that adding more dumping to the debug dumps made it easier to understand what was going on rather than stepping in the debugger so this adds them. Note I used TDF_FOLDING rather than TDF_DETAILS as these debug messages can be chatty and only needed if you are debugging match and simplify with PHI-OPT and match and simplify uses TDF_FOLDING as its check. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * tree-ssa-phiopt.cc (gimple_simplify_phiopt): Dump the expression that is being tried when TDF_FOLDING is true. (phiopt_worker::match_simplify_replacement): Dump the sequence which was created by gimple_simplify_phiopt when TDF_FOLDING is true. --- gcc/tree-ssa-phiopt.cc | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'gcc') diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 4ddb4d5..616b577 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -916,6 +916,18 @@ gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt, "a != 0". */ tree cond = build2_loc (loc, comp_code, boolean_type_node, cmp0, cmp1); + + if (dump_file && (dump_flags & TDF_FOLDING)) + { + fprintf (dump_file, "\nphiopt match-simplify trying:\n\t"); + print_generic_expr (dump_file, cond); + fprintf (dump_file, " ? "); + print_generic_expr (dump_file, arg0); + fprintf (dump_file, " : "); + print_generic_expr (dump_file, arg1); + fprintf (dump_file, "\n"); + } + gimple_match_op op (gimple_match_cond::UNCOND, COND_EXPR, type, cond, arg0, arg1); @@ -947,6 +959,18 @@ gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt, cond = build2_loc (loc, comp_code, boolean_type_node, cmp0, cmp1); + + if (dump_file && (dump_flags & TDF_FOLDING)) + { + fprintf (dump_file, "\nphiopt match-simplify trying:\n\t"); + print_generic_expr (dump_file, cond); + fprintf (dump_file, " ? "); + print_generic_expr (dump_file, arg1); + fprintf (dump_file, " : "); + print_generic_expr (dump_file, arg0); + fprintf (dump_file, "\n"); + } + gimple_match_op op1 (gimple_match_cond::UNCOND, COND_EXPR, type, cond, arg1, arg0); @@ -1078,6 +1102,11 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb, if (name && TREE_CODE (name) == SSA_NAME) bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name)); } + if (dump_file && (dump_flags & TDF_FOLDING)) + { + fprintf (dump_file, "Folded into the sequence:\n"); + print_gimple_seq (dump_file, seq, 0, TDF_VOPS|TDF_MEMSYMS); + } gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING); } -- cgit v1.1 From 2b53ac39bce7f6696332a8374205182a72ef2cb7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 10 Mar 2023 12:23:09 +0100 Subject: Shrink points-to analysis dumps when not dumping with -details The following allows to get PTA stats with -stats without blowing up your filesystem by guarding constraint and solution dumping with TDF_DETAILS and the SSA points-to info with TDF_DETAILS or TDF_ALIAS. * tree-ssa-structalias.cc (dump_sa_stats): Split out from... (dump_sa_points_to_info): ... this function. (compute_points_to_sets): Guard large dumps with TDF_DETAILS, and call dump_sa_stats guarded with TDF_STATS. (ipa_pta_execute): Likewise. (compute_may_aliases): Guard dump_alias_info with TDF_DETAILS|TDF_ALIAS. * gcc.dg/ipa/ipa-pta-16.c: Use -details for dump. * gcc.dg/tm/alias-1.c: Likewise. * gcc.dg/tm/alias-2.c: Likewise. * gcc.dg/torture/ipa-pta-1.c: Likewise. * gcc.dg/torture/pr39074-2.c: Likewise. * gcc.dg/torture/pr39074.c: Likewise. * gcc.dg/torture/pta-callused-1.c: Likewise. * gcc.dg/torture/pta-escape-1.c: Likewise. * gcc.dg/torture/pta-ptrarith-1.c: Likewise. * gcc.dg/torture/pta-ptrarith-2.c: Likewise. * gcc.dg/torture/pta-ptrarith-3.c: Likewise. * gcc.dg/torture/pta-structcopy-1.c: Likewise. * gcc.dg/torture/ssa-pta-fn-1.c: Likewise. * gcc.dg/tree-ssa/alias-19.c: Likewise. * gcc.dg/tree-ssa/pta-callused.c: Likewise. * gcc.dg/tree-ssa/pta-fp.c: Likewise. * gcc.dg/tree-ssa/pta-ptrarith-1.c: Likewise. * gcc.dg/tree-ssa/pta-ptrarith-2.c: Likewise. --- gcc/testsuite/gcc.dg/ipa/ipa-pta-16.c | 2 +- gcc/testsuite/gcc.dg/tm/alias-1.c | 2 +- gcc/testsuite/gcc.dg/tm/alias-2.c | 2 +- gcc/testsuite/gcc.dg/torture/ipa-pta-1.c | 2 +- gcc/testsuite/gcc.dg/torture/pr39074-2.c | 2 +- gcc/testsuite/gcc.dg/torture/pr39074.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-callused-1.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-escape-1.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-ptrarith-3.c | 2 +- gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c | 2 +- gcc/testsuite/gcc.dg/torture/ssa-pta-fn-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/alias-19.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pta-fp.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-1.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-2.c | 2 +- gcc/tree-ssa-structalias.cc | 63 ++++++++++++++----------- 19 files changed, 53 insertions(+), 46 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-pta-16.c b/gcc/testsuite/gcc.dg/ipa/ipa-pta-16.c index 83b9cd8..68fa1c9 100644 --- a/gcc/testsuite/gcc.dg/ipa/ipa-pta-16.c +++ b/gcc/testsuite/gcc.dg/ipa/ipa-pta-16.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fno-tree-sra -fipa-pta -fdump-ipa-pta2" } */ +/* { dg-options "-O2 -fno-tree-sra -fipa-pta -fdump-ipa-pta2-details" } */ struct X { diff --git a/gcc/testsuite/gcc.dg/tm/alias-1.c b/gcc/testsuite/gcc.dg/tm/alias-1.c index 86b3d7a..b90d142 100644 --- a/gcc/testsuite/gcc.dg/tm/alias-1.c +++ b/gcc/testsuite/gcc.dg/tm/alias-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fgnu-tm -fdump-tree-ealias -O" } */ +/* { dg-options "-fgnu-tm -fdump-tree-ealias-details -O" } */ typedef __UINTPTR_TYPE__ ptrcast; diff --git a/gcc/testsuite/gcc.dg/tm/alias-2.c b/gcc/testsuite/gcc.dg/tm/alias-2.c index dd3db94..1967741 100644 --- a/gcc/testsuite/gcc.dg/tm/alias-2.c +++ b/gcc/testsuite/gcc.dg/tm/alias-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-fgnu-tm -fdump-tree-ealias -O" } */ +/* { dg-options "-fgnu-tm -fdump-tree-ealias-details -O" } */ typedef __UINTPTR_TYPE__ ptrcast; diff --git a/gcc/testsuite/gcc.dg/torture/ipa-pta-1.c b/gcc/testsuite/gcc.dg/torture/ipa-pta-1.c index 30156a3..fdac819 100644 --- a/gcc/testsuite/gcc.dg/torture/ipa-pta-1.c +++ b/gcc/testsuite/gcc.dg/torture/ipa-pta-1.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { nonpic || pie_enabled } } } */ -/* { dg-options "-fipa-pta -fdump-ipa-pta2 -fno-ipa-icf" } */ +/* { dg-options "-fipa-pta -fdump-ipa-pta2-details -fno-ipa-icf" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ struct X { char x; char y; }; diff --git a/gcc/testsuite/gcc.dg/torture/pr39074-2.c b/gcc/testsuite/gcc.dg/torture/pr39074-2.c index 7286a4f..16c10e8 100644 --- a/gcc/testsuite/gcc.dg/torture/pr39074-2.c +++ b/gcc/testsuite/gcc.dg/torture/pr39074-2.c @@ -1,6 +1,6 @@ /* { dg-do run } */ /* { dg-require-effective-target stdint_types } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ #include diff --git a/gcc/testsuite/gcc.dg/torture/pr39074.c b/gcc/testsuite/gcc.dg/torture/pr39074.c index 40ecdb9..eec4873 100644 --- a/gcc/testsuite/gcc.dg/torture/pr39074.c +++ b/gcc/testsuite/gcc.dg/torture/pr39074.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ typedef __INTPTR_TYPE__ intptr_t; diff --git a/gcc/testsuite/gcc.dg/torture/pta-callused-1.c b/gcc/testsuite/gcc.dg/torture/pta-callused-1.c index 0ca6ac9..36cf02f 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-callused-1.c +++ b/gcc/testsuite/gcc.dg/torture/pta-callused-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ volatile int i; diff --git a/gcc/testsuite/gcc.dg/torture/pta-escape-1.c b/gcc/testsuite/gcc.dg/torture/pta-escape-1.c index 9172bed..d7e9078 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-escape-1.c +++ b/gcc/testsuite/gcc.dg/torture/pta-escape-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ int *p; diff --git a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c index 85b6806..a1dcf4f 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c +++ b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ struct Foo { diff --git a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c index 4f5556a..7cda636 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c +++ b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ struct Foo { diff --git a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-3.c b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-3.c index 7082937..7a2b8db 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-ptrarith-3.c +++ b/gcc/testsuite/gcc.dg/torture/pta-ptrarith-3.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c b/gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c index f9cf892..9c4d680 100644 --- a/gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c +++ b/gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-ealias -Wno-attributes" } */ +/* { dg-options "-fdump-tree-ealias-details -Wno-attributes" } */ /* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ struct X diff --git a/gcc/testsuite/gcc.dg/torture/ssa-pta-fn-1.c b/gcc/testsuite/gcc.dg/torture/ssa-pta-fn-1.c index de019a7..3746320 100644 --- a/gcc/testsuite/gcc.dg/torture/ssa-pta-fn-1.c +++ b/gcc/testsuite/gcc.dg/torture/ssa-pta-fn-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-fdump-tree-alias" } */ +/* { dg-options "-fdump-tree-alias-details" } */ /* { dg-skip-if "" { *-*-* } { "-O0" "-fno-fat-lto-objects" } { "" } } */ extern void abort (void); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c index 330ec00..219db9b 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-19.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -fdump-tree-alias" } */ +/* { dg-options "-O2 -fdump-tree-alias-details" } */ const static int a; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c index b9a57d8..c7e96fb 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-callused.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 --param max-fields-for-field-sensitive=2 -fdump-tree-alias" } */ +/* { dg-options "-O2 --param max-fields-for-field-sensitive=2 -fdump-tree-alias-details" } */ struct Foo { int *p, *q; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-fp.c b/gcc/testsuite/gcc.dg/tree-ssa/pta-fp.c index 1ff007e..59652c3 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pta-fp.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-fp.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-alias" } */ +/* { dg-options "-O2 -fdump-tree-alias-details" } */ extern double cos (double); extern double sin (double); double f(double a) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-1.c index b56d589..9e4c0fe 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-forwprop -fno-tree-ccp -fdump-tree-ealias" } */ +/* { dg-options "-O2 -fno-tree-forwprop -fno-tree-ccp -fdump-tree-ealias-details" } */ extern void abort (void); struct X { diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-2.c index be06a75..70b432f 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pta-ptrarith-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fno-tree-forwprop -fno-tree-ccp -fdump-tree-ealias" } */ +/* { dg-options "-O2 -fno-tree-forwprop -fno-tree-ccp -fdump-tree-ealias-details" } */ extern void abort (void); struct X { diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index c3c5bce..fa3a2e4 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -7137,33 +7137,33 @@ pt_solutions_intersect (struct pt_solution *pt1, struct pt_solution *pt2) return res; } +/* Dump stats information to OUTFILE. */ + +static void +dump_sa_stats (FILE *outfile) +{ + fprintf (outfile, "Points-to Stats:\n"); + fprintf (outfile, "Total vars: %d\n", stats.total_vars); + fprintf (outfile, "Non-pointer vars: %d\n", + stats.nonpointer_vars); + fprintf (outfile, "Statically unified vars: %d\n", + stats.unified_vars_static); + fprintf (outfile, "Dynamically unified vars: %d\n", + stats.unified_vars_dynamic); + fprintf (outfile, "Iterations: %d\n", stats.iterations); + fprintf (outfile, "Number of edges: %d\n", stats.num_edges); + fprintf (outfile, "Number of implicit edges: %d\n", + stats.num_implicit_edges); +} /* Dump points-to information to OUTFILE. */ static void dump_sa_points_to_info (FILE *outfile) { - unsigned int i; - fprintf (outfile, "\nPoints-to sets\n\n"); - if (dump_flags & TDF_STATS) - { - fprintf (outfile, "Stats:\n"); - fprintf (outfile, "Total vars: %d\n", stats.total_vars); - fprintf (outfile, "Non-pointer vars: %d\n", - stats.nonpointer_vars); - fprintf (outfile, "Statically unified vars: %d\n", - stats.unified_vars_static); - fprintf (outfile, "Dynamically unified vars: %d\n", - stats.unified_vars_dynamic); - fprintf (outfile, "Iterations: %d\n", stats.iterations); - fprintf (outfile, "Number of edges: %d\n", stats.num_edges); - fprintf (outfile, "Number of implicit edges: %d\n", - stats.num_implicit_edges); - } - - for (i = 1; i < varmap.length (); i++) + for (unsigned i = 1; i < varmap.length (); i++) { varinfo_t vi = get_varinfo (i); if (!vi->may_have_pointers) @@ -7544,7 +7544,7 @@ compute_points_to_sets (void) } } - if (dump_file) + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Points-to analysis\n\nConstraints:\n\n"); dump_constraints (dump_file, 0); @@ -7617,7 +7617,10 @@ compute_points_to_sets (void) BITMAP_FREE (new_delta); } - if (dump_file) + if (dump_file && (dump_flags & TDF_STATS)) + dump_sa_stats (dump_file); + + if (dump_file && (dump_flags & TDF_DETAILS)) dump_sa_points_to_info (dump_file); /* Compute the points-to set for ESCAPED used for call-clobber analysis. */ @@ -8039,7 +8042,8 @@ compute_may_aliases (void) "because IPA points-to information is available.\n\n"); /* But still dump what we have remaining it. */ - dump_alias_info (dump_file); + if (dump_flags & (TDF_DETAILS|TDF_ALIAS)) + dump_alias_info (dump_file); } return 0; @@ -8051,7 +8055,7 @@ compute_may_aliases (void) compute_points_to_sets (); /* Debugging dumps. */ - if (dump_file) + if (dump_file && (dump_flags & (TDF_DETAILS|TDF_ALIAS))) dump_alias_info (dump_file); /* Compute restrict-based memory disambiguations. */ @@ -8312,7 +8316,7 @@ ipa_pta_execute (void) fprintf (dump_file, "\n"); } - if (dump_file) + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Generating generic constraints\n\n"); dump_constraints (dump_file, from); @@ -8351,7 +8355,7 @@ ipa_pta_execute (void) vi = create_function_info_for (node->decl, alias_get_name (node->decl), false, nonlocal_p); - if (dump_file + if (dump_file && (dump_flags & TDF_DETAILS) && from != constraints.length ()) { fprintf (dump_file, @@ -8392,7 +8396,7 @@ ipa_pta_execute (void) vi->is_ipa_escape_point = true; } - if (dump_file + if (dump_file && (dump_flags & TDF_DETAILS) && from != constraints.length ()) { fprintf (dump_file, @@ -8449,7 +8453,7 @@ ipa_pta_execute (void) } } - if (dump_file) + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "\n"); dump_constraints (dump_file, from); @@ -8461,7 +8465,10 @@ ipa_pta_execute (void) /* From the constraints compute the points-to sets. */ solve_constraints (); - if (dump_file) + if (dump_file && (dump_flags & TDF_STATS)) + dump_sa_stats (dump_file); + + if (dump_file && (dump_flags & TDF_DETAILS)) dump_sa_points_to_info (dump_file); /* Now post-process solutions to handle locals from different -- cgit v1.1 From f548ece7abc0a0c81dd049e9f8b480ff2c38e18b Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 Feb 2023 16:36:03 +0100 Subject: middle-end/108786 - add bitmap_clear_first_set_bit This adds bitmap_clear_first_set_bit and uses it where previously bitmap_clear_bit followed bitmap_first_set_bit. The advantage is speeding up the search and avoiding to clobber ->current. PR middle-end/108786 * bitmap.h (bitmap_clear_first_set_bit): New. * bitmap.cc (bitmap_first_set_bit_worker): Rename from bitmap_first_set_bit and add optional clearing of the bit. (bitmap_first_set_bit): Wrap bitmap_first_set_bit_worker. (bitmap_clear_first_set_bit): Likewise. * df-core.cc (df_worklist_dataflow_doublequeue): Use bitmap_clear_first_set_bit. * graphite-scop-detection.cc (scop_detection::merge_sese): Likewise. * sanopt.cc (sanitize_asan_mark_unpoison): Likewise. (sanitize_asan_mark_poison): Likewise. * tree-cfgcleanup.cc (cleanup_tree_cfg_noloop): Likewise. * tree-into-ssa.cc (rewrite_blocks): Likewise. * tree-ssa-dce.cc (simple_dce_from_worklist): Likewise. * tree-ssa-sccvn.cc (do_rpo_vn_1): Likewise. --- gcc/bitmap.cc | 41 +++++++++++++++++++++++++++++++++++++---- gcc/bitmap.h | 3 +++ gcc/df-core.cc | 3 +-- gcc/graphite-scop-detection.cc | 3 +-- gcc/sanopt.cc | 6 ++---- gcc/tree-cfgcleanup.cc | 3 +-- gcc/tree-into-ssa.cc | 3 +-- gcc/tree-ssa-dce.cc | 3 +-- gcc/tree-ssa-sccvn.cc | 3 +-- 9 files changed, 48 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/bitmap.cc b/gcc/bitmap.cc index 20de562..d1d0324 100644 --- a/gcc/bitmap.cc +++ b/gcc/bitmap.cc @@ -1217,12 +1217,12 @@ bitmap_single_bit_set_p (const_bitmap a) /* Return the bit number of the first set bit in the bitmap. The - bitmap must be non-empty. */ + bitmap must be non-empty. When CLEAR is true it clears the bit. */ -unsigned -bitmap_first_set_bit (const_bitmap a) +static unsigned +bitmap_first_set_bit_worker (bitmap a, bool clear) { - const bitmap_element *elt = a->first; + bitmap_element *elt = a->first; unsigned bit_no; BITMAP_WORD word; unsigned ix; @@ -1269,6 +1269,21 @@ bitmap_first_set_bit (const_bitmap a) gcc_checking_assert (word & 1); #endif + + if (clear) + { + elt->bits[ix] &= ~((BITMAP_WORD) 1 << (bit_no % BITMAP_WORD_BITS)); + /* If we cleared the entire word, free up the element. */ + if (!elt->bits[ix] + && bitmap_element_zerop (elt)) + { + if (!a->tree_form) + bitmap_list_unlink_element (a, elt); + else + bitmap_tree_unlink_element (a, elt); + } + } + return bit_no; } @@ -1276,6 +1291,24 @@ bitmap_first_set_bit (const_bitmap a) bitmap must be non-empty. */ unsigned +bitmap_first_set_bit (const_bitmap a) +{ + return bitmap_first_set_bit_worker (const_cast (a), false); +} + +/* Return and clear the bit number of the first set bit in the bitmap. The + bitmap must be non-empty. */ + +unsigned +bitmap_clear_first_set_bit (bitmap a) +{ + return bitmap_first_set_bit_worker (a, true); +} + +/* Return the bit number of the first set bit in the bitmap. The + bitmap must be non-empty. */ + +unsigned bitmap_last_set_bit (const_bitmap a) { const bitmap_element *elt; diff --git a/gcc/bitmap.h b/gcc/bitmap.h index 43337d2..5432f38 100644 --- a/gcc/bitmap.h +++ b/gcc/bitmap.h @@ -110,6 +110,7 @@ along with GCC; see the file COPYING3. If not see * clear : bitmap_clear * smallest_member : bitmap_first_set_bit + * pop_smallest : bitmap_clear_first_set_bit * choose_one : (not implemented, but could be in constant time) @@ -133,6 +134,7 @@ along with GCC; see the file COPYING3. If not see amortized time with O(E) worst-case behavior. * smallest_member + * pop_smallest * largest_member * set_size * member_p @@ -501,6 +503,7 @@ extern void debug (const bitmap_head &ref); extern void debug (const bitmap_head *ptr); extern unsigned bitmap_first_set_bit (const_bitmap); +extern unsigned bitmap_clear_first_set_bit (bitmap); extern unsigned bitmap_last_set_bit (const_bitmap); /* Compute bitmap hash (for purposes of hashing etc.) */ diff --git a/gcc/df-core.cc b/gcc/df-core.cc index 38f69ac..3286ffd 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -1040,8 +1040,7 @@ df_worklist_dataflow_doublequeue (struct dataflow *dataflow, do { - unsigned index = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, index); + unsigned index = bitmap_clear_first_set_bit (worklist); unsigned bb_index; dcount++; diff --git a/gcc/graphite-scop-detection.cc b/gcc/graphite-scop-detection.cc index f976451..9955199 100644 --- a/gcc/graphite-scop-detection.cc +++ b/gcc/graphite-scop-detection.cc @@ -469,8 +469,7 @@ scop_detection::merge_sese (sese_l first, sese_l second) const its border it acts more like a visited bitmap. */ do { - int index = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, index); + int index = bitmap_clear_first_set_bit (worklist); basic_block bb = BASIC_BLOCK_FOR_FN (cfun, index); edge_iterator ei; edge e; diff --git a/gcc/sanopt.cc b/gcc/sanopt.cc index 8548973..ce8393b 100644 --- a/gcc/sanopt.cc +++ b/gcc/sanopt.cc @@ -1012,8 +1012,7 @@ sanitize_asan_mark_unpoison (void) /* 2) Propagate the information to all reachable blocks. */ while (!bitmap_empty_p (worklist)) { - unsigned i = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, i); + unsigned i = bitmap_clear_first_set_bit (worklist); basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); gcc_assert (bb); @@ -1109,8 +1108,7 @@ sanitize_asan_mark_poison (void) /* 2) Propagate the information to all definitions blocks. */ while (!bitmap_empty_p (worklist)) { - unsigned i = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, i); + unsigned i = bitmap_clear_first_set_bit (worklist); basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i); gcc_assert (bb); diff --git a/gcc/tree-cfgcleanup.cc b/gcc/tree-cfgcleanup.cc index 64ff16f..42b2531 100644 --- a/gcc/tree-cfgcleanup.cc +++ b/gcc/tree-cfgcleanup.cc @@ -1133,8 +1133,7 @@ cleanup_tree_cfg_noloop (unsigned ssa_update_flags) /* Now process the altered blocks, as long as any are available. */ while (!bitmap_empty_p (cfgcleanup_altered_bbs)) { - unsigned i = bitmap_first_set_bit (cfgcleanup_altered_bbs); - bitmap_clear_bit (cfgcleanup_altered_bbs, i); + unsigned i = bitmap_clear_first_set_bit (cfgcleanup_altered_bbs); if (i < NUM_FIXED_BLOCKS) continue; diff --git a/gcc/tree-into-ssa.cc b/gcc/tree-into-ssa.cc index 2e32299..5cfe7c5 100644 --- a/gcc/tree-into-ssa.cc +++ b/gcc/tree-into-ssa.cc @@ -2348,8 +2348,7 @@ rewrite_blocks (basic_block entry, enum rewrite_mode what) } while (!bitmap_empty_p (worklist)) { - int idx = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, idx); + int idx = bitmap_clear_first_set_bit (worklist); basic_block bb = BASIC_BLOCK_FOR_FN (cfun, idx); bb->flags |= in_region; extra_rgn.safe_push (bb); diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index 0ae998f..bda7808 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -2102,8 +2102,7 @@ simple_dce_from_worklist (bitmap worklist) while (! bitmap_empty_p (worklist)) { /* Pop item. */ - unsigned i = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, i); + unsigned i = bitmap_clear_first_set_bit (worklist); tree def = ssa_name (i); /* Removed by somebody else or still in use. */ diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc index 9692911..7fa2a15 100644 --- a/gcc/tree-ssa-sccvn.cc +++ b/gcc/tree-ssa-sccvn.cc @@ -8491,8 +8491,7 @@ do_rpo_vn_1 (function *fn, edge entry, bitmap exit_bbs, bitmap_set_bit (worklist, 0); while (!bitmap_empty_p (worklist)) { - int idx = bitmap_first_set_bit (worklist); - bitmap_clear_bit (worklist, idx); + int idx = bitmap_clear_first_set_bit (worklist); basic_block bb = BASIC_BLOCK_FOR_FN (fn, rpo[idx]); gcc_assert ((bb->flags & BB_EXECUTABLE) && !rpo_state[idx].visited); -- cgit v1.1 From 14c1a8dfa3eaea283ab467c2aa3a62a25d3b49cb Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Tue, 18 Apr 2023 18:07:06 +0800 Subject: Docs: Add doc for RISC-V vector intrinsics Document which version of RISC-V vector intrinsics has implemented in GCC. gcc/ChangeLog: * doc/extend.texi (Target Builtins): Add RISC-V Vector Intrinsics. (RISC-V Vector Intrinsics): Document GCC implemented which version of RISC-V vector intrinsics and its reference. --- gcc/doc/extend.texi | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'gcc') diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 69c5ade..84b44cb 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -14689,6 +14689,7 @@ instructions, but allow the compiler to schedule those calls. * PowerPC Matrix-Multiply Assist Built-in Functions:: * PRU Built-in Functions:: * RISC-V Built-in Functions:: +* RISC-V Vector Intrinsics:: * RX Built-in Functions:: * S/390 System z Built-in Functions:: * SH Built-in Functions:: @@ -21248,6 +21249,14 @@ Xgnuzihintpausestate extension, which redefines the @code{pause} instruction to change architectural state. @enddefbuiltin +@node RISC-V Vector Intrinsics +@subsection RISC-V Vector Intrinsics + +GCC supports vector intrinsics as specified in version 0.11 of the RISC-V +vector intrinsic specification, which is available at the following link: +@uref{https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/v0.11.x}. +All of these functions are declared in the include file @file{riscv_vector.h}. + @node RX Built-in Functions @subsection RX Built-in Functions GCC supports some of the RX instructions which cannot be expressed in -- cgit v1.1 From b80317116547c85c176a7e41bdb67376cee6f0ce Mon Sep 17 00:00:00 2001 From: "Victor L. Do Nascimento" Date: Tue, 18 Apr 2023 17:11:58 +0100 Subject: constraint: fix relaxed memory and repeated constraint handling The function `constrain_operands' lacked the logic to consider relaxed memory constraints when "traditional" memory constraints were not satisfied, creating potential issues as observed during the reload compilation pass. In addition, it was observed that while `constrain_operands' chooses to disregard constraints when more than one alternative is provided, e.g. "m,r" using CONSTRAINT__UNKNOWN, it has no checks in place to determine whether the multiple constraints in a given string are in fact repetitions of the same constraint and should thus in fact be treated as a single constraint, as ought to be the case for something like "m,m". Both of these issues are dealt with here, thus ensuring that we get appropriate pattern matching. gcc/ * lra-constraints.cc (constraint_unique): New. (process_address_1): Apply constraint_unique test. * recog.cc (constrain_operands): Allow relaxed memory constaints. --- gcc/lra-constraints.cc | 39 ++++++++++++++++++++++++++++++++++++--- gcc/recog.cc | 3 ++- 2 files changed, 38 insertions(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index ff4e8f0..b231cb6 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -3450,6 +3450,41 @@ skip_constraint_modifiers (const char *str) } } +/* Takes a string of 0 or more comma-separated constraints. When more + than one constraint is present, evaluate whether they all correspond + to a single, repeated constraint (e.g. "r,r") or whether we have + more than one distinct constraints (e.g. "r,m"). */ +static bool +constraint_unique (const char *cstr) +{ + enum constraint_num ca, cb; + ca = CONSTRAINT__UNKNOWN; + for (;;) + { + cstr = skip_constraint_modifiers (cstr); + if (*cstr == '\0' || *cstr == ',') + cb = CONSTRAINT_X; + else + { + cb = lookup_constraint (cstr); + if (cb == CONSTRAINT__UNKNOWN) + return false; + cstr += CONSTRAINT_LEN (cstr[0], cstr); + } + /* Handle the first iteration of the loop. */ + if (ca == CONSTRAINT__UNKNOWN) + ca = cb; + /* Handle the general case of comparing ca with subsequent + constraints. */ + else if (ca != cb) + return false; + if (*cstr == '\0') + return true; + if (*cstr == ',') + cstr += 1; + } +} + /* Major function to make reloads for an address in operand NOP or check its correctness (If CHECK_ONLY_P is true). The supported cases are: @@ -3509,9 +3544,7 @@ process_address_1 (int nop, bool check_only_p, operand has one address constraint, probably all others constraints are address ones. */ if (constraint[0] != '\0' && get_constraint_type (cn) != CT_ADDRESS - && *skip_constraint_modifiers (constraint - + CONSTRAINT_LEN (constraint[0], - constraint)) != '\0') + && !constraint_unique (constraint)) cn = CONSTRAINT__UNKNOWN; if (insn_extra_address_constraint (cn) /* When we find an asm operand with an address constraint that diff --git a/gcc/recog.cc b/gcc/recog.cc index 200cf42..3ddeab5 100644 --- a/gcc/recog.cc +++ b/gcc/recog.cc @@ -3234,7 +3234,8 @@ constrain_operands (int strict, alternative_mask alternatives) else if (constraint_satisfied_p (op, cn)) win = 1; - else if (insn_extra_memory_constraint (cn) + else if ((insn_extra_memory_constraint (cn) + || insn_extra_relaxed_memory_constraint (cn)) /* Every memory operand can be reloaded to fit. */ && ((strict < 0 && MEM_P (op)) /* Before reload, accept what reload can turn -- cgit v1.1 From 1e29f9063b0e771be5be922250665c4fed3dc46e Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Thu, 23 Feb 2023 09:10:16 +0100 Subject: Add GTY support for vrange. IPA currently puts *some* irange's in GC memory. When I contribute support for generic ranges in IPA, we'll need to change this to vrange. This patch adds GTY support for both vrange and frange. gcc/ChangeLog: * value-range.cc (gt_ggc_mx): New. (gt_pch_nx): New. * value-range.h (class vrange): Add GTY marker. (class frange): Same. (gt_ggc_mx): Remove. (gt_pch_nx): Remove. --- gcc/value-range.cc | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ gcc/value-range.h | 51 +++++++++----------------------- 2 files changed, 99 insertions(+), 37 deletions(-) (limited to 'gcc') diff --git a/gcc/value-range.cc b/gcc/value-range.cc index 3b3102b..17f4e1b 100644 --- a/gcc/value-range.cc +++ b/gcc/value-range.cc @@ -3252,6 +3252,91 @@ vrp_operand_equal_p (const_tree val1, const_tree val2) return true; } +void +gt_ggc_mx (irange *x) +{ + for (unsigned i = 0; i < x->m_num_ranges; ++i) + { + gt_ggc_mx (x->m_base[i * 2]); + gt_ggc_mx (x->m_base[i * 2 + 1]); + } + if (x->m_nonzero_mask) + gt_ggc_mx (x->m_nonzero_mask); +} + +void +gt_pch_nx (irange *x) +{ + for (unsigned i = 0; i < x->m_num_ranges; ++i) + { + gt_pch_nx (x->m_base[i * 2]); + gt_pch_nx (x->m_base[i * 2 + 1]); + } + if (x->m_nonzero_mask) + gt_pch_nx (x->m_nonzero_mask); +} + +void +gt_pch_nx (irange *x, gt_pointer_operator op, void *cookie) +{ + for (unsigned i = 0; i < x->m_num_ranges; ++i) + { + op (&x->m_base[i * 2], NULL, cookie); + op (&x->m_base[i * 2 + 1], NULL, cookie); + } + if (x->m_nonzero_mask) + op (&x->m_nonzero_mask, NULL, cookie); +} + +void +gt_ggc_mx (frange *x) +{ + gt_ggc_mx (x->m_type); +} + +void +gt_pch_nx (frange *x) +{ + gt_pch_nx (x->m_type); +} + +void +gt_pch_nx (frange *x, gt_pointer_operator op, void *cookie) +{ + op (&x->m_type, NULL, cookie); +} + +void +gt_ggc_mx (vrange *x) +{ + if (is_a (*x)) + return gt_ggc_mx ((irange *) x); + if (is_a (*x)) + return gt_ggc_mx ((frange *) x); + gcc_unreachable (); +} + +void +gt_pch_nx (vrange *x) +{ + if (is_a (*x)) + return gt_pch_nx ((irange *) x); + if (is_a (*x)) + return gt_pch_nx ((frange *) x); + gcc_unreachable (); +} + +void +gt_pch_nx (vrange *x, gt_pointer_operator op, void *cookie) +{ + if (is_a (*x)) + gt_pch_nx ((irange *) x, op, cookie); + else if (is_a (*x)) + gt_pch_nx ((frange *) x, op, cookie); + else + gcc_unreachable (); +} + // ?? These stubs are for ipa-prop.cc which use a value_range in a // hash_traits. hash-traits.h defines an extern of gt_ggc_mx (T &) // instead of picking up the gt_ggc_mx (T *) version. diff --git a/gcc/value-range.h b/gcc/value-range.h index 5545cce..0eeea79 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -72,7 +72,7 @@ enum value_range_discriminator // if (f.supports_type_p (type)) ... // } -class vrange +class GTY((user)) vrange { template friend bool is_a (vrange &); friend class Value_Range; @@ -329,10 +329,13 @@ nan_state::neg_p () const // The representation is a type with a couple of endpoints, unioned // with the set of { -NAN, +Nan }. -class frange : public vrange +class GTY((user)) frange : public vrange { friend class frange_storage_slot; friend class vrange_printer; + friend void gt_ggc_mx (frange *); + friend void gt_pch_nx (frange *); + friend void gt_pch_nx (frange *, gt_pointer_operator, void *); public: frange (); frange (const frange &); @@ -827,41 +830,15 @@ range_includes_zero_p (const irange *vr) return vr->may_contain_p (build_zero_cst (vr->type ())); } -inline void -gt_ggc_mx (irange *x) -{ - for (unsigned i = 0; i < x->m_num_ranges; ++i) - { - gt_ggc_mx (x->m_base[i * 2]); - gt_ggc_mx (x->m_base[i * 2 + 1]); - } - if (x->m_nonzero_mask) - gt_ggc_mx (x->m_nonzero_mask); -} - -inline void -gt_pch_nx (irange *x) -{ - for (unsigned i = 0; i < x->m_num_ranges; ++i) - { - gt_pch_nx (x->m_base[i * 2]); - gt_pch_nx (x->m_base[i * 2 + 1]); - } - if (x->m_nonzero_mask) - gt_pch_nx (x->m_nonzero_mask); -} - -inline void -gt_pch_nx (irange *x, gt_pointer_operator op, void *cookie) -{ - for (unsigned i = 0; i < x->m_num_ranges; ++i) - { - op (&x->m_base[i * 2], NULL, cookie); - op (&x->m_base[i * 2 + 1], NULL, cookie); - } - if (x->m_nonzero_mask) - op (&x->m_nonzero_mask, NULL, cookie); -} +extern void gt_ggc_mx (vrange *); +extern void gt_pch_nx (vrange *); +extern void gt_pch_nx (vrange *, gt_pointer_operator, void *); +extern void gt_ggc_mx (irange *); +extern void gt_pch_nx (irange *); +extern void gt_pch_nx (irange *, gt_pointer_operator, void *); +extern void gt_ggc_mx (frange *); +extern void gt_pch_nx (frange *); +extern void gt_pch_nx (frange *, gt_pointer_operator, void *); template inline void -- cgit v1.1 From 95b99e47f4f2df2d0c5680f45e3ec0a3170218ad Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 18 Apr 2023 17:50:37 +0200 Subject: i386: Improve permutations with INSERTPS instruction [PR94908] INSERTPS can select any element from src and insert into any place of the dest. For SSE4.1 targets, compiler can generate e.g. insertps $64, %xmm0, %xmm1 to insert element 1 from %xmm1 to element 0 of %xmm0. gcc/ChangeLog: PR target/94908 * config/i386/i386-builtin.def (__builtin_ia32_insertps128): Use CODE_FOR_sse4_1_insertps_v4sf. * config/i386/i386-expand.cc (expand_vec_perm_insertps): New. (expand_vec_perm_1): Call expand_vec_per_insertps. * config/i386/i386.md ("unspec"): Declare UNSPEC_INSERTPS here. * config/i386/mmx.md (mmxscalarmode): New mode attribute. (@sse4_1_insertps_): New insn pattern. * config/i386/sse.md (@sse4_1_insertps_): Macroize insn pattern from sse4_1_insertps using VI4F_128 mode iterator. gcc/testsuite/ChangeLog: PR target/94908 * gcc.target/i386/pr94908.c: New test. * gcc.target/i386/sse4_1-insertps-5.c: New test. * gcc.target/i386/vperm-v4sf-2-sse4.c: New test. --- gcc/config/i386/i386-builtin.def | 2 +- gcc/config/i386/i386-expand.cc | 76 +++++++++++++++++++++++ gcc/config/i386/i386.md | 1 + gcc/config/i386/mmx.md | 40 ++++++++++++ gcc/config/i386/sse.md | 17 ++--- gcc/testsuite/gcc.target/i386/pr94908.c | 14 +++++ gcc/testsuite/gcc.target/i386/sse4_1-insertps-5.c | 19 ++++++ gcc/testsuite/gcc.target/i386/vperm-v4sf-2-sse4.c | 4 ++ 8 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr94908.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-insertps-5.c create mode 100644 gcc/testsuite/gcc.target/i386/vperm-v4sf-2-sse4.c (limited to 'gcc') diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 6dae697..f7cf105 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -942,7 +942,7 @@ BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blen BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) -BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_insertps_v4sf, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_nothing, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI) BDESC (OPTION_MASK_ISA_SSE4_1, 0, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 0d817fc..9fa549c 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -18986,6 +18986,78 @@ expand_vec_perm_movs (struct expand_vec_perm_d *d) } /* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D + using insertps. */ +static bool +expand_vec_perm_insertps (struct expand_vec_perm_d *d) +{ + machine_mode vmode = d->vmode; + unsigned i, cnt_s, nelt = d->nelt; + int cnt_d = -1; + rtx src, dst; + + if (d->one_operand_p) + return false; + + if (!(TARGET_SSE4_1 + && (vmode == V4SFmode || vmode == V4SImode + || (TARGET_MMX_WITH_SSE + && (vmode == V2SFmode || vmode == V2SImode))))) + return false; + + for (i = 0; i < nelt; ++i) + { + if (d->perm[i] == i) + continue; + if (cnt_d != -1) + { + cnt_d = -1; + break; + } + cnt_d = i; + } + + if (cnt_d == -1) + { + for (i = 0; i < nelt; ++i) + { + if (d->perm[i] == i + nelt) + continue; + if (cnt_d != -1) + return false; + cnt_d = i; + } + + if (cnt_d == -1) + return false; + } + + if (d->testing_p) + return true; + + gcc_assert (cnt_d != -1); + + cnt_s = d->perm[cnt_d]; + if (cnt_s < nelt) + { + src = d->op0; + dst = d->op1; + } + else + { + cnt_s -= nelt; + src = d->op1; + dst = d->op0; + } + gcc_assert (cnt_s < nelt); + + rtx x = gen_sse4_1_insertps (vmode, d->target, dst, src, + GEN_INT (cnt_s << 6 | cnt_d << 4)); + emit_insn (x); + + return true; +} + +/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement D in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */ static bool @@ -19918,6 +19990,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d) if (expand_vec_perm_movs (d)) return true; + /* Try the SSE4.1 insertps instruction. */ + if (expand_vec_perm_insertps (d)) + return true; + /* Try the fully general two operand permute. */ if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt, d->testing_p)) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index ed689b0..1419ea4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -120,6 +120,7 @@ UNSPEC_MASKMOV UNSPEC_MOVCC_MASK UNSPEC_MOVMSK + UNSPEC_INSERTPS UNSPEC_BLENDV UNSPEC_PSHUFB UNSPEC_XOP_PERMUTE diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index 18dae03..872ddbc 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -106,6 +106,10 @@ (define_mode_attr mmxintvecmodelower [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")]) +;; Mapping of vector modes back to the scalar modes +(define_mode_attr mmxscalarmode + [(V2SI "SI") (V2SF "SF")]) + (define_mode_attr Yv_Yw [(V8QI "Yw") (V4HI "Yw") (V2SI "Yv") (V1DI "Yv") (V2SF "Yv")]) @@ -1154,6 +1158,42 @@ DONE; }) +(define_insn "@sse4_1_insertps_" + [(set (match_operand:V2FI 0 "register_operand" "=Yr,*x,v") + (unspec:V2FI + [(match_operand:V2FI 2 "nonimmediate_operand" "Yrm,*xm,vm") + (match_operand:V2FI 1 "register_operand" "0,0,v") + (match_operand:SI 3 "const_0_to_255_operand")] + UNSPEC_INSERTPS))] + "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE" +{ + if (MEM_P (operands[2])) + { + unsigned count_s = INTVAL (operands[3]) >> 6; + if (count_s) + operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); + operands[2] = adjust_address_nv (operands[2], + mode, count_s * 4); + } + switch (which_alternative) + { + case 0: + case 1: + return "insertps\t{%3, %2, %0|%0, %2, %3}"; + case 2: + return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,noavx,avx") + (set_attr "type" "sselog") + (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_extra" "1") + (set_attr "length_immediate" "1") + (set_attr "prefix" "orig,orig,maybe_evex") + (set_attr "mode" "V4SF")]) + (define_insn "*mmx_blendps" [(set (match_operand:V2SF 0 "register_operand" "=Yr,*x,x") (vec_merge:V2SF diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 513960e..5dca8dd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -38,7 +38,6 @@ UNSPEC_INSERTQ ;; For SSE4.1 support - UNSPEC_INSERTPS UNSPEC_DP UNSPEC_MOVNTDQA UNSPEC_MPSADBW @@ -10959,12 +10958,13 @@ DONE; }) -(define_insn "sse4_1_insertps" - [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") - (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm") - (match_operand:V4SF 1 "register_operand" "0,0,v") - (match_operand:SI 3 "const_0_to_255_operand")] - UNSPEC_INSERTPS))] +(define_insn "@sse4_1_insertps_" + [(set (match_operand:VI4F_128 0 "register_operand" "=Yr,*x,v") + (unspec:VI4F_128 + [(match_operand:VI4F_128 2 "nonimmediate_operand" "Yrm,*xm,vm") + (match_operand:VI4F_128 1 "register_operand" "0,0,v") + (match_operand:SI 3 "const_0_to_255_operand")] + UNSPEC_INSERTPS))] "TARGET_SSE4_1" { if (MEM_P (operands[2])) @@ -10972,7 +10972,8 @@ unsigned count_s = INTVAL (operands[3]) >> 6; if (count_s) operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f); - operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4); + operands[2] = adjust_address_nv (operands[2], + mode, count_s * 4); } switch (which_alternative) { diff --git a/gcc/testsuite/gcc.target/i386/pr94908.c b/gcc/testsuite/gcc.target/i386/pr94908.c new file mode 100644 index 0000000..11a5f90 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr94908.c @@ -0,0 +1,14 @@ +/* PR target/94908 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef float v4sf __attribute__((vector_size(16))); + +v4sf g(); + +v4sf f(v4sf a, v4sf b) +{ + return (v4sf){g()[1], a[1], a[2], a[3]}; +} + +/* { dg-final { scan-assembler "\[ \t\]v?insertps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-insertps-5.c b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-5.c new file mode 100644 index 0000000..d9c4cfc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-insertps-5.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse4.1" } */ + +typedef unsigned int v4si __attribute__((vector_size(16))); +typedef float v4sf __attribute__((vector_size(16))); + +v4si foo_1(v4si x, v4si y) { return (v4si){x[0],y[3],x[2],x[3]}; } +v4si foo_2(v4si x, v4si y) { return (v4si){y[0],x[2],y[2],y[3]}; } +v4si foo_3(v4si x, v4si y) { return (v4si){x[3],y[1],y[2],y[3]}; } + +v4sf bar_1(v4sf x, v4sf y) { return (v4sf){y[0],x[3],y[2],y[3]}; } +v4sf bar_2(v4sf x, v4sf y) { return (v4sf){x[0],y[2],x[2],x[3]}; } +v4sf bar_3(v4sf x, v4sf y) { return (v4sf){y[3],x[1],x[2],x[3]}; } + +/* { dg-final { scan-assembler-times "\tv?insertps\t" 6 } } */ +/* { dg-final { scan-assembler-not "pshufd" } } */ +/* { dg-final { scan-assembler-not "pblendw" } } */ +/* { dg-final { scan-assembler-not "shufps" } } */ +/* { dg-final { scan-assembler-not "blendps" } } */ diff --git a/gcc/testsuite/gcc.target/i386/vperm-v4sf-2-sse4.c b/gcc/testsuite/gcc.target/i386/vperm-v4sf-2-sse4.c new file mode 100644 index 0000000..ed5963e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vperm-v4sf-2-sse4.c @@ -0,0 +1,4 @@ +/* { dg-do run } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-options "-O -msse4" } */ +#include "vperm-v4sf-2.c" -- cgit v1.1 From 9874ceed091a0ce17b23d8d77b5bf90b8902a3c0 Mon Sep 17 00:00:00 2001 From: Sinan Lin Date: Tue, 18 Apr 2023 12:24:52 -0600 Subject: Add TARGET_ZBKB to the condition of bswapsi2, bswapdi2 and rotr3 patterns gcc/ * config/riscv/bitmanip.md (rotr3 expander): Enable for ZBKB. (bswapdi2, bswapsi2): Similarly. --- gcc/config/riscv/bitmanip.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 062968d..388ef66 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -297,7 +297,7 @@ [(set (match_operand:GPR 0 "register_operand") (rotatert:GPR (match_operand:GPR 1 "register_operand") (match_operand:QI 2 "arith_operand")))] - "TARGET_ZBB || TARGET_XTHEADBB" + "TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB" { if (TARGET_XTHEADBB && !immediate_operand (operands[2], VOIDmode)) FAIL; @@ -362,12 +362,12 @@ (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "register_operand")))] - "TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB)") + "TARGET_64BIT && (TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)") (define_expand "bswapsi2" [(set (match_operand:SI 0 "register_operand") (bswap:SI (match_operand:SI 1 "register_operand")))] - "(!TARGET_64BIT && TARGET_ZBB) || TARGET_XTHEADBB") + "(!TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)) || TARGET_XTHEADBB") (define_insn "*bswap2" [(set (match_operand:X 0 "register_operand" "=r") -- cgit v1.1 From c9d7fbdf3e752be40c34855741e7c27121315744 Mon Sep 17 00:00:00 2001 From: Kevin Lee Date: Tue, 18 Apr 2023 12:42:17 -0600 Subject: vect: Verify that GET_MODE_UNITS is greater than one for vect_grouped_store_supported gcc/ChangeLog: * tree-vect-data-refs.cc (vect_grouped_store_supported): Add new condition. --- gcc/tree-vect-data-refs.cc | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gcc') diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 8daf7bd..c03ffb3 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -5399,6 +5399,8 @@ vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count) poly_uint64 nelt = GET_MODE_NUNITS (mode); /* The encoding has 2 interleaved stepped patterns. */ + if(!multiple_p (nelt, 2)) + return false; vec_perm_builder sel (nelt, 2, 3); sel.quick_grow (6); for (i = 0; i < 3; i++) -- cgit v1.1 From 403779a7d659418079760ab9e0facefcc59f89ad Mon Sep 17 00:00:00 2001 From: Sam James Date: Tue, 18 Apr 2023 13:27:38 -0600 Subject: gcc: Drop obsolete INCLUDE_PTHREAD_H gcc/ChangeLog: * system.h: Drop unused INCLUDE_PTHREAD_H. --- gcc/system.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'gcc') diff --git a/gcc/system.h b/gcc/system.h index 71d8a04..65d514d 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -779,10 +779,6 @@ private: #endif #endif -#ifdef INCLUDE_PTHREAD_H -#include -#endif - #ifdef INCLUDE_ISL #ifdef HAVE_isl #include -- cgit v1.1 From 6c11d30799ff3160729315d07c3df641c3ca9870 Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Fri, 31 Mar 2023 00:00:20 +0000 Subject: PHIOPT: Move tree_ssa_cs_elim into pass_cselim::execute. This moves around the code for tree_ssa_cs_elim slightly improving code readability and removing declarations that are no longer needed. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * tree-ssa-phiopt.cc (tree_ssa_phiopt_worker): Remove declaration. (make_pass_phiopt): Make execute out of line. (tree_ssa_cs_elim): Move code into ... (pass_cselim::execute): here. --- gcc/tree-ssa-phiopt.cc | 118 ++++++++++++++++++++++++------------------------- 1 file changed, 57 insertions(+), 61 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc index 616b577..945507b 100644 --- a/gcc/tree-ssa-phiopt.cc +++ b/gcc/tree-ssa-phiopt.cc @@ -55,7 +55,6 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-propagate.h" #include "tree-ssa-dce.h" -static unsigned int tree_ssa_phiopt_worker (bool, bool, bool); static bool two_value_replacement (basic_block, basic_block, edge, gphi *, tree, tree); static bool match_simplify_replacement (basic_block, basic_block, @@ -78,62 +77,6 @@ static hash_set * get_non_trapping (); static void hoist_adjacent_loads (basic_block, basic_block, basic_block, basic_block); -/* This pass tries to transform conditional stores into unconditional - ones, enabling further simplifications with the simpler then and else - blocks. In particular it replaces this: - - bb0: - if (cond) goto bb2; else goto bb1; - bb1: - *p = RHS; - bb2: - - with - - bb0: - if (cond) goto bb1; else goto bb2; - bb1: - condtmp' = *p; - bb2: - condtmp = PHI - *p = condtmp; - - This transformation can only be done under several constraints, - documented below. It also replaces: - - bb0: - if (cond) goto bb2; else goto bb1; - bb1: - *p = RHS1; - goto bb3; - bb2: - *p = RHS2; - bb3: - - with - - bb0: - if (cond) goto bb3; else goto bb1; - bb1: - bb3: - condtmp = PHI - *p = condtmp; */ - -static unsigned int -tree_ssa_cs_elim (void) -{ - unsigned todo; - /* ??? We are not interested in loop related info, but the following - will create it, ICEing as we didn't init loops with pre-headers. - An interfacing issue of find_data_references_in_bb. */ - loop_optimizer_init (LOOPS_NORMAL); - scev_initialize (); - todo = tree_ssa_phiopt_worker (true, false, false); - scev_finalize (); - loop_optimizer_finalize (); - return todo; -} - /* Return the singleton PHI in the SEQ of PHIs for edges E0 and E1. */ static gphi * @@ -4278,6 +4221,47 @@ make_pass_phiopt (gcc::context *ctxt) return new pass_phiopt (ctxt); } +/* This pass tries to transform conditional stores into unconditional + ones, enabling further simplifications with the simpler then and else + blocks. In particular it replaces this: + + bb0: + if (cond) goto bb2; else goto bb1; + bb1: + *p = RHS; + bb2: + + with + + bb0: + if (cond) goto bb1; else goto bb2; + bb1: + condtmp' = *p; + bb2: + condtmp = PHI + *p = condtmp; + + This transformation can only be done under several constraints, + documented below. It also replaces: + + bb0: + if (cond) goto bb2; else goto bb1; + bb1: + *p = RHS1; + goto bb3; + bb2: + *p = RHS2; + bb3: + + with + + bb0: + if (cond) goto bb3; else goto bb1; + bb1: + bb3: + condtmp = PHI + *p = condtmp; */ + namespace { const pass_data pass_data_cselim = @@ -4302,10 +4286,7 @@ public: /* opt_pass methods: */ bool gate (function *) final override { return flag_tree_cselim; } - unsigned int execute (function *) final override - { - return tree_ssa_cs_elim (); - } + unsigned int execute (function *) final override; }; // class pass_cselim @@ -4316,3 +4297,18 @@ make_pass_cselim (gcc::context *ctxt) { return new pass_cselim (ctxt); } + +unsigned int +pass_cselim::execute (function *) +{ + unsigned todo; + /* ??? We are not interested in loop related info, but the following + will create it, ICEing as we didn't init loops with pre-headers. + An interfacing issue of find_data_references_in_bb. */ + loop_optimizer_init (LOOPS_NORMAL); + scev_initialize (); + todo = tree_ssa_phiopt_worker (true, false, false); + scev_finalize (); + loop_optimizer_finalize (); + return todo; +} -- cgit v1.1 From 2f7e7bfa3c6327793cdcdcb5c770b93cecd49bd0 Mon Sep 17 00:00:00 2001 From: Mark Harmstone Date: Tue, 18 Apr 2023 13:55:35 -0600 Subject: Add -gcodeview option gcc/ * common.opt (gcodeview): Add new option. * gcc.cc (driver_handle_option); Handle OPT_gcodeview. * opts.cc (command_handle_option): Similarly. * doc/invoke.texi: Add documentation for -gcodeview. --- gcc/common.opt | 4 ++++ gcc/doc/invoke.texi | 7 +++++++ gcc/gcc.cc | 4 ++++ gcc/opts.cc | 3 +++ 4 files changed, 18 insertions(+) (limited to 'gcc') diff --git a/gcc/common.opt b/gcc/common.opt index 862c474..a28ca13 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3318,6 +3318,10 @@ gas-locview-support Common Driver Var(dwarf2out_as_locview_support) Assume assembler support for view in (DWARF2+) .loc directives. +gcodeview +Common Driver JoinedOrMissing +Generate debug information in CodeView format. + gcoff Common Driver WarnRemoved Does nothing. Preserved for backward compatibility. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a38547f..aae6bc7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -509,6 +509,7 @@ Objective-C and Objective-C++ Dialects}. -gstrict-dwarf -gno-strict-dwarf -gas-loc-support -gno-as-loc-support -gas-locview-support -gno-as-locview-support +-gcodeview @gol -gcolumn-info -gno-column-info -gdwarf32 -gdwarf64 -gstatement-frontiers -gno-statement-frontiers -gvariable-location-views -gno-variable-location-views @@ -11328,6 +11329,12 @@ at file-scope or global-scope only. Produce debugging information in Alpha/VMS debug format (if that is supported). This is the format used by DEBUG on Alpha/VMS systems. +@item -gcodeview +@opindex gcodeview +Produce debugging information in CodeView debug format (if that is +supported). This is the format used by Microsoft Visual C++ on +Windows. + @item -g@var{level} @itemx -ggdb@var{level} @itemx -gvms@var{level} diff --git a/gcc/gcc.cc b/gcc/gcc.cc index 16bb07f..39a44fa 100644 --- a/gcc/gcc.cc +++ b/gcc/gcc.cc @@ -4572,6 +4572,10 @@ driver_handle_option (struct gcc_options *opts, do_save = false; break; + case OPT_gcodeview: + add_infile ("--pdb=", "*"); + break; + default: /* Various driver options need no special processing at this point, having been handled in a prescan above or being diff --git a/gcc/opts.cc b/gcc/opts.cc index fb2e538..86b94d6 100644 --- a/gcc/opts.cc +++ b/gcc/opts.cc @@ -3134,6 +3134,9 @@ common_handle_option (struct gcc_options *opts, loc); break; + case OPT_gcodeview: + break; + case OPT_gbtf: set_debug_level (BTF_DEBUG, false, arg, opts, opts_set, loc); /* set the debug level to level 2, but if already at level 3, -- cgit v1.1 From 675b1a7f113adb1d737adaf78b4fd90be7a0ed1a Mon Sep 17 00:00:00 2001 From: Takayuki 'January June' Suwa Date: Tue, 18 Apr 2023 14:11:09 -0600 Subject: ifcvt.cc: Prevent excessive if-conversion for conditional moves gcc/ * ifcvt.cc (cond_move_process_if_block): Consider the result of targetm.noce_conversion_profitable_p() when replacing the original sequence with the converted one. --- gcc/ifcvt.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc index e4168d9..868eda9 100644 --- a/gcc/ifcvt.cc +++ b/gcc/ifcvt.cc @@ -4353,7 +4353,7 @@ cond_move_process_if_block (struct noce_if_info *if_info) goto done; } seq = end_ifcvt_sequence (if_info); - if (!seq) + if (!seq || !targetm.noce_conversion_profitable_p (seq, if_info)) goto done; loc_insn = first_active_insn (then_bb); -- cgit v1.1 From 3eeb4801d6f45f6250fc77a6d3ab4e0115f8cfdd Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 18 Apr 2023 16:28:24 -0400 Subject: doc: remove stray @gol @gol was removed in r13-6778, new doc additions can't use it. gcc/ChangeLog: * doc/invoke.texi: Remove stray @gol. --- gcc/doc/invoke.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index aae6bc7..57fb170 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -509,7 +509,7 @@ Objective-C and Objective-C++ Dialects}. -gstrict-dwarf -gno-strict-dwarf -gas-loc-support -gno-as-loc-support -gas-locview-support -gno-as-locview-support --gcodeview @gol +-gcodeview -gcolumn-info -gno-column-info -gdwarf32 -gdwarf64 -gstatement-frontiers -gno-statement-frontiers -gvariable-location-views -gno-variable-location-views -- cgit v1.1 From d5cd3eada416a89e6478b1b1c874115574eb19a8 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 19 Apr 2023 00:17:36 +0000 Subject: Daily bump. --- gcc/ChangeLog | 276 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/ada/ChangeLog | 4 + gcc/testsuite/ChangeLog | 58 ++++++++++ 4 files changed, 339 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ce5cb67..dac0d00 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,279 @@ +2023-04-18 Jason Merrill + + * doc/invoke.texi: Remove stray @gol. + +2023-04-18 Takayuki 'January June' Suwa + + * ifcvt.cc (cond_move_process_if_block): Consider the result of + targetm.noce_conversion_profitable_p() when replacing the original + sequence with the converted one. + +2023-04-18 Mark Harmstone + + * common.opt (gcodeview): Add new option. + * gcc.cc (driver_handle_option); Handle OPT_gcodeview. + * opts.cc (command_handle_option): Similarly. + * doc/invoke.texi: Add documentation for -gcodeview. + +2023-04-18 Andrew Pinski + + * tree-ssa-phiopt.cc (tree_ssa_phiopt_worker): Remove declaration. + (make_pass_phiopt): Make execute out of line. + (tree_ssa_cs_elim): Move code into ... + (pass_cselim::execute): here. + +2023-04-18 Sam James + + * system.h: Drop unused INCLUDE_PTHREAD_H. + +2023-04-18 Kevin Lee + + * tree-vect-data-refs.cc (vect_grouped_store_supported): Add new + condition. + +2023-04-18 Sinan Lin + + * config/riscv/bitmanip.md (rotr3 expander): Enable for ZBKB. + (bswapdi2, bswapsi2): Similarly. + +2023-04-18 Uros Bizjak + + PR target/94908 + * config/i386/i386-builtin.def (__builtin_ia32_insertps128): + Use CODE_FOR_sse4_1_insertps_v4sf. + * config/i386/i386-expand.cc (expand_vec_perm_insertps): New. + (expand_vec_perm_1): Call expand_vec_per_insertps. + * config/i386/i386.md ("unspec"): Declare UNSPEC_INSERTPS here. + * config/i386/mmx.md (mmxscalarmode): New mode attribute. + (@sse4_1_insertps_): New insn pattern. + * config/i386/sse.md (@sse4_1_insertps_): Macroize insn + pattern from sse4_1_insertps using VI4F_128 mode iterator. + +2023-04-18 Aldy Hernandez + + * value-range.cc (gt_ggc_mx): New. + (gt_pch_nx): New. + * value-range.h (class vrange): Add GTY marker. + (class frange): Same. + (gt_ggc_mx): Remove. + (gt_pch_nx): Remove. + +2023-04-18 Victor L. Do Nascimento + + * lra-constraints.cc (constraint_unique): New. + (process_address_1): Apply constraint_unique test. + * recog.cc (constrain_operands): Allow relaxed memory + constaints. + +2023-04-18 Kito Cheng + + * doc/extend.texi (Target Builtins): Add RISC-V Vector + Intrinsics. + (RISC-V Vector Intrinsics): Document GCC implemented which + version of RISC-V vector intrinsics and its reference. + +2023-04-18 Richard Biener + + PR middle-end/108786 + * bitmap.h (bitmap_clear_first_set_bit): New. + * bitmap.cc (bitmap_first_set_bit_worker): Rename from + bitmap_first_set_bit and add optional clearing of the bit. + (bitmap_first_set_bit): Wrap bitmap_first_set_bit_worker. + (bitmap_clear_first_set_bit): Likewise. + * df-core.cc (df_worklist_dataflow_doublequeue): Use + bitmap_clear_first_set_bit. + * graphite-scop-detection.cc (scop_detection::merge_sese): + Likewise. + * sanopt.cc (sanitize_asan_mark_unpoison): Likewise. + (sanitize_asan_mark_poison): Likewise. + * tree-cfgcleanup.cc (cleanup_tree_cfg_noloop): Likewise. + * tree-into-ssa.cc (rewrite_blocks): Likewise. + * tree-ssa-dce.cc (simple_dce_from_worklist): Likewise. + * tree-ssa-sccvn.cc (do_rpo_vn_1): Likewise. + +2023-04-18 Richard Biener + + * tree-ssa-structalias.cc (dump_sa_stats): Split out from... + (dump_sa_points_to_info): ... this function. + (compute_points_to_sets): Guard large dumps with TDF_DETAILS, + and call dump_sa_stats guarded with TDF_STATS. + (ipa_pta_execute): Likewise. + (compute_may_aliases): Guard dump_alias_info with + TDF_DETAILS|TDF_ALIAS. + +2023-04-18 Andrew Pinski + + * tree-ssa-phiopt.cc (gimple_simplify_phiopt): Dump + the expression that is being tried when TDF_FOLDING + is true. + (phiopt_worker::match_simplify_replacement): Dump + the sequence which was created by gimple_simplify_phiopt + when TDF_FOLDING is true. + +2023-04-18 Andrew Pinski + + * tree-ssa-phiopt.cc (match_simplify_replacement): + Simplify code that does the movement slightly. + +2023-04-18 Kyrylo Tkachov + + * config/aarch64/aarch64.md (@aarch64_rev16): Change to + define_expand. + (rev162): Rename to... + (aarch64_rev162_alt1): ... This. + (rev162_alt): Rename to... + (*aarch64_rev162_alt2): ... This. + +2023-04-18 Aldy Hernandez + + * emit-rtl.cc (init_emit_once): Initialize dconstm0. + * gimple-range-op.cc (class cfn_signbit): Remove dconstm0 + declaration. + * range-op-float.cc (zero_range): Use dconstm0. + (zero_to_inf_range): Same. + * real.h (dconstm0): New. + * value-range.cc (frange::flush_denormals_to_zero): Use dconstm0. + (frange::set_zero): Do not declare dconstm0. + +2023-04-18 Richard Biener + + * system.h (class auto_mpz): New, + * realmpfr.h (class auto_mpfr): Likewise. + * fold-const-call.cc (do_mpfr_arg1): Use auto_mpfr. + (do_mpfr_arg2): Likewise. + * tree-ssa-loop-niter.cc (bound_difference): Use auto_mpz; + +2023-04-18 Kyrylo Tkachov + + * config/aarch64/aarch64-builtins.cc (aarch64_init_simd_intrinsics): Take + builtin flags from intrinsic data rather than hardcoded FLAG_AUTO_FP. + +2023-04-18 Aldy Hernandez + + * value-range.cc (frange::operator==): Adjust for NAN. + (range_tests_nan): Remove some NAN tests. + +2023-04-18 Aldy Hernandez + + * inchash.cc (hash::add_real_value): New. + * inchash.h (class hash): Add add_real_value. + * value-range.cc (add_vrange): New. + * value-range.h (inchash::add_vrange): New. + +2023-04-18 Richard Biener + + PR tree-optimization/109539 + * gimple-ssa-warn-access.cc (pass_waccess::check_pointer_uses): + Re-implement pointer relatedness for PHIs. + +2023-04-18 Andrew Stubbs + + * config/gcn/gcn-valu.md (SV_SFDF): New iterator. + (SV_FP): New iterator. + (scalar_mode, SCALAR_MODE): Add identity mappings for scalar modes. + (recip2): Unify the two patterns using SV_FP. + (div_scale): New insn. + (div_fmas): New insn. + (div_fixup): New insn. + (div3): Unify the two expanders and rewrite using hardfp. + * config/gcn/gcn.cc (gcn_md_reorg): Support "vccwait" attribute. + * config/gcn/gcn.md (unspec): Add UNSPEC_DIV_SCALE, UNSPEC_DIV_FMAS, + and UNSPEC_DIV_FIXUP. + (vccwait): New attribute. + +2023-04-18 Kyrylo Tkachov + + * config/aarch64/aarch64.cc (aarch64_validate_mcpu): Add hint to use -march + if the argument matches that. + +2023-04-18 Kyrylo Tkachov + + * config/aarch64/atomics.md + (*aarch64_atomic_load_rcpc_zext): + Use SD_HSDI for destination mode iterator. + +2023-04-18 Jin Ma + + * common/config/riscv/riscv-common.cc (multi_letter_subset_rank): Swap the order + of z-extensions and s-extensions. + (riscv_subset_list::parse): Likewise. + +2023-04-18 Jakub Jelinek + + PR tree-optimization/109240 + * match.pd (fneg/fadd): Rewrite such that it handles both plus as + first vec_perm operand and minus as second using fneg/fadd and + minus as first vec_perm operand and plus as second using fneg/fsub. + +2023-04-18 Aldy Hernandez + + * data-streamer.cc (bp_pack_real_value): New. + (bp_unpack_real_value): New. + * data-streamer.h (bp_pack_real_value): New. + (bp_unpack_real_value): New. + * tree-streamer-in.cc (unpack_ts_real_cst_value_fields): Use + bp_unpack_real_value. + * tree-streamer-out.cc (pack_ts_real_cst_value_fields): Use + bp_pack_real_value. + +2023-04-18 Aldy Hernandez + + * wide-int.h (WIDE_INT_MAX_HWIS): New. + (class fixed_wide_int_storage): Use it. + (trailing_wide_ints ::set_precision): Use it. + (trailing_wide_ints ::extra_size): Use it. + +2023-04-18 Xi Ruoyao + + * config/loongarch/loongarch-protos.h + (loongarch_addu16i_imm12_operand_p): New function prototype. + (loongarch_split_plus_constant): Likewise. + * config/loongarch/loongarch.cc + (loongarch_addu16i_imm12_operand_p): New function. + (loongarch_split_plus_constant): Likewise. + * config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro. + (DUAL_IMM12_OPERAND): Likewise. + (DUAL_ADDU16I_OPERAND): Likewise. + * config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New + constraint. + * config/loongarch/predicates.md (const_dual_imm12_operand): New + predicate. + (const_addu16i_operand): Likewise. + (const_addu16i_imm12_di_operand): Likewise. + (const_addu16i_imm12_si_operand): Likewise. + (plus_di_operand): Likewise. + (plus_si_operand): Likewise. + (plus_si_extend_operand): Likewise. + * config/loongarch/loongarch.md (add3): Convert to + define_insn_and_split. Use plus__operand predicate + instead of arith_operand. Add alternatives for La, Lb, Lc, Ld, + and Le constraints. + (*addsi3_extended): Convert to define_insn_and_split. Use + plus_si_extend_operand instead of arith_operand. Add + alternatives for La and Le alternatives. + +2023-04-18 Aldy Hernandez + + * value-range.h (Value_Range::Value_Range): New. + (Value_Range::contains_p): New. + +2023-04-18 Aldy Hernandez + + * value-range.h (class vrange): Make m_discriminator const. + (class irange): Make m_max_ranges const. Adjust constructors + accordingly. + (class unsupported_range): Construct vrange appropriately. + (class frange): Same. + +2023-04-18 Lulu Cheng + + * config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro + definition. + +2023-04-18 Lulu Cheng + + * doc/extend.texi: Add section for LoongArch Base Built-in functions. + 2023-04-18 Fei Gao * config/riscv/riscv.cc (riscv_first_stack_step): Make codes more diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 378aed5..59726e5 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20230418 +20230419 diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index c6aa33160..5d891dd 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,7 @@ +2023-04-18 Jin Ma + + * gcc-interface/utils.cc (unchecked_convert): Fixed typo. + 2023-04-17 Martin Liska * gnatvsn.ads: Bump Library_Version to 14. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ac704d3..84c6c5a 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,61 @@ +2023-04-18 Uros Bizjak + + PR target/94908 + * gcc.target/i386/pr94908.c: New test. + * gcc.target/i386/sse4_1-insertps-5.c: New test. + * gcc.target/i386/vperm-v4sf-2-sse4.c: New test. + +2023-04-18 Richard Biener + + * gcc.dg/ipa/ipa-pta-16.c: Use -details for dump. + * gcc.dg/tm/alias-1.c: Likewise. + * gcc.dg/tm/alias-2.c: Likewise. + * gcc.dg/torture/ipa-pta-1.c: Likewise. + * gcc.dg/torture/pr39074-2.c: Likewise. + * gcc.dg/torture/pr39074.c: Likewise. + * gcc.dg/torture/pta-callused-1.c: Likewise. + * gcc.dg/torture/pta-escape-1.c: Likewise. + * gcc.dg/torture/pta-ptrarith-1.c: Likewise. + * gcc.dg/torture/pta-ptrarith-2.c: Likewise. + * gcc.dg/torture/pta-ptrarith-3.c: Likewise. + * gcc.dg/torture/pta-structcopy-1.c: Likewise. + * gcc.dg/torture/ssa-pta-fn-1.c: Likewise. + * gcc.dg/tree-ssa/alias-19.c: Likewise. + * gcc.dg/tree-ssa/pta-callused.c: Likewise. + * gcc.dg/tree-ssa/pta-fp.c: Likewise. + * gcc.dg/tree-ssa/pta-ptrarith-1.c: Likewise. + * gcc.dg/tree-ssa/pta-ptrarith-2.c: Likewise. + +2023-04-18 Andrew Stubbs + + * gcc.target/gcn/fpdiv.c: Remove the -ffast-math requirement. + +2023-04-18 Kyrylo Tkachov + + * gcc.target/aarch64/spellcheck_11.c: New test. + +2023-04-18 Kyrylo Tkachov + + * gcc.target/aarch64/ldapr-zext.c: Add test for u8 to u16 + extension. + +2023-04-18 Jin Ma + + * gcc.target/riscv/arch-5.c: Likewise. + +2023-04-18 Jakub Jelinek + + PR tree-optimization/109240 + * gcc.target/aarch64/simd/addsub_2.c: New test. + * gcc.target/aarch64/sve/addsub_2.c: New test. + +2023-04-18 Xi Ruoyao + + * gcc.target/loongarch/add-const.c: New test. + * gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack + frame size change. + * gcc.target/loongarch/stack-check-cfa-2.c: Likewise. + 2023-04-17 Patrick Palka PR c++/109531 -- cgit v1.1 From 727be65ec40e119a7c864bfaa5d6a73547863c06 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Mon, 3 Apr 2023 10:54:55 +0800 Subject: Check hard_regno_mode_ok before setting lowest memory move cost for the mode with different reg classes. There's a potential performance issue when backend returns some unreasonable value for the mode which can be never be allocate with reg class. gcc/ChangeLog: PR rtl-optimization/109351 * ira.cc (setup_class_subset_and_memory_move_costs): Check hard_regno_mode_ok before setting lowest memory move cost for the mode with different reg classes. --- gcc/ira.cc | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'gcc') diff --git a/gcc/ira.cc b/gcc/ira.cc index 6c7f490..02dea5d 100644 --- a/gcc/ira.cc +++ b/gcc/ira.cc @@ -588,6 +588,10 @@ setup_class_subset_and_memory_move_costs (void) /* Costs for NO_REGS are used in cost calculation on the 1st pass when the preferred register classes are not known yet. In this case we take the best scenario. */ + if (!targetm.hard_regno_mode_ok (ira_class_hard_regs[cl][0], + (machine_mode) mode)) + continue; + if (ira_memory_move_cost[mode][NO_REGS][0] > ira_memory_move_cost[mode][cl][0]) ira_max_memory_move_cost[mode][NO_REGS][0] -- cgit v1.1 From cbddd574a78529b9176eb28253c20a335daefbb4 Mon Sep 17 00:00:00 2001 From: Christophe Lyon Date: Mon, 17 Apr 2023 19:23:54 +0200 Subject: install.texi: Document --enable-decimal-float for AArch64 When I committed the patches to enable support for DFP on AArch64, I forgot to update the installation documentation. This patch adds AArch64 as needed (same as i386/x86_64). 2023-04-17 Christophe Lyon gcc/ * doc/install.texi (enable-decimal-float): Add AArch64. --- gcc/doc/install.texi | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi index 15aef13..b13bc12 100644 --- a/gcc/doc/install.texi +++ b/gcc/doc/install.texi @@ -2178,13 +2178,14 @@ forward to maintain the port. @itemx --enable-decimal-float=dpd @itemx --disable-decimal-float Enable (or disable) support for the C decimal floating point extension -that is in the IEEE 754-2008 standard. This is enabled by default only -on PowerPC, i386, and x86_64 GNU/Linux systems. Other systems may also -support it, but require the user to specifically enable it. You can -optionally control which decimal floating point format is used (either -@samp{bid} or @samp{dpd}). The @samp{bid} (binary integer decimal) -format is default on i386 and x86_64 systems, and the @samp{dpd} -(densely packed decimal) format is default on PowerPC systems. +that is in the IEEE 754-2008 standard. This is enabled by default +only on AArch64, PowerPC, i386, and x86_64 GNU/Linux systems. Other +systems may also support it, but require the user to specifically +enable it. You can optionally control which decimal floating point +format is used (either @samp{bid} or @samp{dpd}). The @samp{bid} +(binary integer decimal) format is default on AArch64, i386 and x86_64 +systems, and the @samp{dpd} (densely packed decimal) format is default +on PowerPC systems. @item --enable-fixed-point @itemx --disable-fixed-point -- cgit v1.1 From 794ffdb0fb6312ce07af0bfc797bef9f4cff4c61 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 19 Apr 2023 10:01:04 +0200 Subject: testsuite: Fix up pr109524.C for -std=c++23 [PR109524] This testcase was reduced such that it isn't valid C++23, so with my usual testing with GXX_TESTSUITE_STDS=98,11,14,17,20,2b it fails: FAIL: g++.dg/pr109524.C -std=gnu++2b (test for excess errors) .../gcc/testsuite/g++.dg/pr109524.C: In function 'nn hh(nn)': .../gcc/testsuite/g++.dg/pr109524.C:35:12: error: cannot bind non-const lvalue reference of type 'nn&' to an rvalue of type 'nn' .../gcc/testsuite/g++.dg/pr109524.C:17:6: note: initializing argument 1 of 'nn::nn(nn&)' The following patch fixes that and I've verified it doesn't change anything on what the test was testing, it still ICEs in r13-7198 and passes in r13-7203, now in all language modes (except for 98 where it is intentionally UNSUPPORTED). 2023-04-19 Jakub Jelinek PR tree-optimization/109524 * g++.dg/pr109524.C (nn::nn): Change argument type from nn & to const nn &. --- gcc/testsuite/g++.dg/pr109524.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/pr109524.C b/gcc/testsuite/g++.dg/pr109524.C index a560839..644c757 100644 --- a/gcc/testsuite/g++.dg/pr109524.C +++ b/gcc/testsuite/g++.dg/pr109524.C @@ -14,7 +14,7 @@ struct _Guard { }; struct nn { int * _M_dataplus; - nn(nn &) + nn(const nn &) { f(); _Guard __guard; -- cgit v1.1 From 8f81100115f68b37fb2723e987c14a3185d1f47d Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 22 Mar 2023 10:05:19 +0100 Subject: rtl-optimization/109237 - speedup bb_is_just_return For the testcase bb_is_just_return is on top of the profile, changing it to walk BB insns backwards puts it off the profile. That's because in the forward walk you have to process possibly many debug insns but in a backward walk you very likely run into control insns first. PR rtl-optimization/109237 * cfgcleanup.cc (bb_is_just_return): Walk insns backwards. --- gcc/cfgcleanup.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/cfgcleanup.cc b/gcc/cfgcleanup.cc index 194e0e5..78f59e9 100644 --- a/gcc/cfgcleanup.cc +++ b/gcc/cfgcleanup.cc @@ -2608,14 +2608,14 @@ bb_is_just_return (basic_block bb, rtx_insn **ret, rtx_insn **use) if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) return false; - FOR_BB_INSNS (bb, insn) + FOR_BB_INSNS_REVERSE (bb, insn) if (NONDEBUG_INSN_P (insn)) { rtx pat = PATTERN (insn); if (!*ret && ANY_RETURN_P (pat)) *ret = insn; - else if (!*ret && !*use && GET_CODE (pat) == USE + else if (*ret && !*use && GET_CODE (pat) == USE && REG_P (XEXP (pat, 0)) && REG_FUNCTION_VALUE_P (XEXP (pat, 0))) *use = insn; -- cgit v1.1 From 675ac8822b51a39f6a66a44858d7c31ece8700f2 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 22 Mar 2023 09:29:49 +0100 Subject: rtl-optimization/109237 - quadraticness in delete_trivially_dead_insns The following addresses quadraticness in processing debug insns in delete_trivially_dead_insns and insn_live_p by using TREE_VISITED on the INSN_VAR_LOCATION_DECL to indicate a later debug bind with the same decl and no intervening real insn or debug marker. That gets rid of the NEXT_INSN walk in insn_live_p in favor of first clearing TREE_VISITED in the first loop over insn and the book-keeping of decls we set the bit since we need to clear them when visiting a real or debug marker insn. That improves the time spent in delete_trivially_dead_insns from 10.6s to 2.2s for the testcase. PR rtl-optimization/109237 * cse.cc (insn_live_p): Remove NEXT_INSN walk, instead check TREE_VISITED on INSN_VAR_LOCATION_DECL. (delete_trivially_dead_insns): Maintain TREE_VISITED on active debug bind INSN_VAR_LOCATION_DECL. --- gcc/cse.cc | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) (limited to 'gcc') diff --git a/gcc/cse.cc b/gcc/cse.cc index 8fbda4e..204047b 100644 --- a/gcc/cse.cc +++ b/gcc/cse.cc @@ -6906,22 +6906,12 @@ insn_live_p (rtx_insn *insn, int *counts) } else if (DEBUG_INSN_P (insn)) { - rtx_insn *next; - if (DEBUG_MARKER_INSN_P (insn)) return true; - for (next = NEXT_INSN (insn); next; next = NEXT_INSN (next)) - if (NOTE_P (next)) - continue; - else if (!DEBUG_INSN_P (next)) - return true; - /* If we find an inspection point, such as a debug begin stmt, - we want to keep the earlier debug insn. */ - else if (DEBUG_MARKER_INSN_P (next)) - return true; - else if (INSN_VAR_LOCATION_DECL (insn) == INSN_VAR_LOCATION_DECL (next)) - return false; + if (DEBUG_BIND_INSN_P (insn) + && TREE_VISITED (INSN_VAR_LOCATION_DECL (insn))) + return false; return true; } @@ -7007,8 +6997,11 @@ delete_trivially_dead_insns (rtx_insn *insns, int nreg) counts = XCNEWVEC (int, nreg * 3); for (insn = insns; insn; insn = NEXT_INSN (insn)) if (DEBUG_BIND_INSN_P (insn)) - count_reg_usage (INSN_VAR_LOCATION_LOC (insn), counts + nreg, - NULL_RTX, 1); + { + count_reg_usage (INSN_VAR_LOCATION_LOC (insn), counts + nreg, + NULL_RTX, 1); + TREE_VISITED (INSN_VAR_LOCATION_DECL (insn)) = 0; + } else if (INSN_P (insn)) { count_reg_usage (insn, counts, NULL_RTX, 1); @@ -7048,6 +7041,7 @@ delete_trivially_dead_insns (rtx_insn *insns, int nreg) the setter. Then go through DEBUG_INSNs and if a DEBUG_EXPR has been created for the unused register, replace it with the DEBUG_EXPR, otherwise reset the DEBUG_INSN. */ + auto_vec later_debug_set_vars; for (insn = get_last_insn (); insn; insn = prev) { int live_insn = 0; @@ -7110,6 +7104,21 @@ delete_trivially_dead_insns (rtx_insn *insns, int nreg) } cse_cfg_altered |= delete_insn_and_edges (insn); } + else + { + if (!DEBUG_INSN_P (insn) || DEBUG_MARKER_INSN_P (insn)) + { + for (tree var : later_debug_set_vars) + TREE_VISITED (var) = 0; + later_debug_set_vars.truncate (0); + } + else if (DEBUG_BIND_INSN_P (insn) + && !TREE_VISITED (INSN_VAR_LOCATION_DECL (insn))) + { + later_debug_set_vars.safe_push (INSN_VAR_LOCATION_DECL (insn)); + TREE_VISITED (INSN_VAR_LOCATION_DECL (insn)) = 1; + } + } } if (MAY_HAVE_DEBUG_BIND_INSNS) -- cgit v1.1 From 136330bf637b50a4f10ace017a4316541386b9c0 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 19 Apr 2023 09:34:40 +0100 Subject: aarch64: PR target/108840 Simplify register shift RTX costs and eliminate shift amount masking In this PR we fail to eliminate explicit &31 operations for variable shifts such as in: void bar (int x[3], int y) { x[0] <<= (y & 31); x[1] <<= (y & 31); x[2] <<= (y & 31); } This is rejected by RTX costs that end up giving too high a cost for: (set (reg:SI 96) (ashift:SI (reg:SI 98) (subreg:QI (and:SI (reg:SI 99) (const_int 31 [0x1f])) 0))) There is code to handle the AND-31 case in rtx costs, but it gets confused by the subreg. It's easy enough to fix by looking inside the subreg when costing the expression. While doing that I noticed that the ASHIFT case and the other shift-like cases are almost identical and we should just merge them. This code will only be used for valid insns anyway, so the code after this patch should do the Right Thing (TM) for all such shift cases. With this patch there are no more "and wn, wn, 31" instructions left in the testcase. Bootstrapped and tested on aarch64-none-linux-gnu. PR target/108840 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_rtx_costs): Merge ASHIFT and ROTATE, ROTATERT, LSHIFTRT, ASHIFTRT cases. Handle subregs in op1. gcc/testsuite/ChangeLog: * gcc.target/aarch64/pr108840.c: New test. --- gcc/config/aarch64/aarch64.cc | 63 +++++------------------------ gcc/testsuite/gcc.target/aarch64/pr108840.c | 38 +++++++++++++++++ 2 files changed, 49 insertions(+), 52 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/pr108840.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index adbdaaf..0d7470c 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -14678,6 +14678,10 @@ cost_plus: } return false; + case ROTATE: + case ROTATERT: + case LSHIFTRT: + case ASHIFTRT: case ASHIFT: op0 = XEXP (x, 0); op1 = XEXP (x, 1); @@ -14693,8 +14697,8 @@ cost_plus: } else { - /* LSL (immediate), UBMF, UBFIZ and friends. These are all - aliases. */ + /* LSL (immediate), ASR (immediate), UBMF, UBFIZ and friends. + These are all aliases. */ *cost += extra_cost->alu.shift; } } @@ -14718,9 +14722,13 @@ cost_plus: else { if (speed) - /* LSLV. */ + /* LSLV, ASRV. */ *cost += extra_cost->alu.shift_reg; + /* The register shift amount may be in a shorter mode expressed + as a lowpart SUBREG. For costing purposes just look inside. */ + if (SUBREG_P (op1) && subreg_lowpart_p (op1)) + op1 = SUBREG_REG (op1); if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) && CONST_INT_P (XEXP (op1, 1)) && known_eq (INTVAL (XEXP (op1, 1)), @@ -14735,55 +14743,6 @@ cost_plus: return false; /* All arguments need to be in registers. */ } - case ROTATE: - case ROTATERT: - case LSHIFTRT: - case ASHIFTRT: - op0 = XEXP (x, 0); - op1 = XEXP (x, 1); - - if (CONST_INT_P (op1)) - { - /* ASR (immediate) and friends. */ - if (speed) - { - if (VECTOR_MODE_P (mode)) - *cost += extra_cost->vect.alu; - else - *cost += extra_cost->alu.shift; - } - - *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed); - return true; - } - else - { - if (VECTOR_MODE_P (mode)) - { - if (speed) - /* Vector shift (register). */ - *cost += extra_cost->vect.alu; - } - else - { - if (speed) - /* ASR (register) and friends. */ - *cost += extra_cost->alu.shift_reg; - - if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) - && CONST_INT_P (XEXP (op1, 1)) - && known_eq (INTVAL (XEXP (op1, 1)), - GET_MODE_BITSIZE (mode) - 1)) - { - *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); - /* We already demanded XEXP (op1, 0) to be REG_P, so - don't recurse into it. */ - return true; - } - } - return false; /* All arguments need to be in registers. */ - } - case SYMBOL_REF: if (aarch64_cmodel == AARCH64_CMODEL_LARGE diff --git a/gcc/testsuite/gcc.target/aarch64/pr108840.c b/gcc/testsuite/gcc.target/aarch64/pr108840.c new file mode 100644 index 0000000..804c1cd --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr108840.c @@ -0,0 +1,38 @@ +/* PR target/108840. Check that the explicit &31 is eliminated. */ +/* { dg-do compile } */ +/* { dg-options "-O" } */ + +int +foo (int x, int y) +{ + return x << (y & 31); +} + +void +bar (int x[3], int y) +{ + x[0] <<= (y & 31); + x[1] <<= (y & 31); + x[2] <<= (y & 31); +} + +void +baz (int x[3], int y) +{ + y &= 31; + x[0] <<= y; + x[1] <<= y; + x[2] <<= y; +} + +void corge (int, int, int); + +void +qux (int x, int y, int z, int n) +{ + n &= 31; + corge (x << n, y << n, z >> n); +} + +/* { dg-final { scan-assembler-not {and\tw[0-9]+, w[0-9]+, 31} } } */ + -- cgit v1.1 From 2c7bf8036dfe2f603f1c135dabf6415d8d28051b Mon Sep 17 00:00:00 2001 From: Prathamesh Kulkarni Date: Wed, 19 Apr 2023 14:08:40 +0530 Subject: [aarch64] Use wzr/xzr for assigning 0 to vector element. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set_zero): New pattern. gcc/testsuite/ChangeLog: * gcc.target/aarch64/vec-set-zero.c: New test. --- gcc/config/aarch64/aarch64-simd.md | 14 +++++++++ gcc/testsuite/gcc.target/aarch64/vec-set-zero.c | 40 +++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/vec-set-zero.c (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index b63c1fe..de2b738 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1083,6 +1083,20 @@ [(set_attr "type" "neon_ins, neon_from_gp, neon_load1_one_lane")] ) +(define_insn "aarch64_simd_vec_set_zero" + [(set (match_operand:VALL_F16 0 "register_operand" "=w") + (vec_merge:VALL_F16 + (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "") + (match_operand:VALL_F16 3 "register_operand" "0") + (match_operand:SI 2 "immediate_operand" "i")))] + "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0" + { + int elt = ENDIAN_LANE_N (, exact_log2 (INTVAL (operands[2]))); + operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); + return "ins\\t%0.[%p2], zr"; + } +) + (define_insn "@aarch64_simd_vec_copy_lane" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_merge:VALL_F16 diff --git a/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c b/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c new file mode 100644 index 0000000..b34b902c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-set-zero.c @@ -0,0 +1,40 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +#include "arm_neon.h" + +#define FOO(type) \ +type f_##type(type v) \ +{ \ + v[1] = 0; \ + return v; \ +} + +FOO(int8x8_t) +FOO(int16x4_t) +FOO(int32x2_t) + +FOO(int8x16_t) +FOO(int16x8_t) +FOO(int32x4_t) +FOO(int64x2_t) + +FOO(float16x4_t) +FOO(float32x2_t) + +FOO(float16x8_t) +FOO(float32x4_t) +FOO(float64x2_t) + +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[1\], wzr} 2 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.h\[1\], wzr} 4 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.s\[1\], wzr} 4 { target aarch64_little_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.d\[1\], xzr} 2 { target aarch64_little_endian } } } */ + +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[6\], wzr} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.b\[14\], wzr} 1 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.h\[2\], wzr} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.h\[6\], wzr} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.s\[0\], wzr} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.s\[2\], wzr} 2 { target aarch64_big_endian } } } */ +/* { dg-final { scan-assembler-times {ins\tv[0-9]+\.d\[0\], xzr} 2 { target aarch64_big_endian } } } */ -- cgit v1.1 From 76f44fbfea1f11e53d4b7e83f0debd029c94a1b3 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 19 Apr 2023 11:13:11 +0200 Subject: dse: Use SUBREG_REG for copy_to_mode_reg in DSE replace_read for WORD_REGISTER_OPERATIONS targets [PR109040] While we've agreed this is not the right fix for the PR109040 bug, the patch clearly improves generated code (at least on the testcase from the PR), so I'd like to propose this as optimization heuristics improvement for GCC 14. 2023-04-19 Jakub Jelinek PR target/109040 * dse.cc (replace_read): If read_reg is a SUBREG of a word mode REG, for WORD_REGISTER_OPERATIONS copy SUBREG_REG of it into a new REG rather than the SUBREG. --- gcc/dse.cc | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/dse.cc b/gcc/dse.cc index a2db8d1c..802b949 100644 --- a/gcc/dse.cc +++ b/gcc/dse.cc @@ -2012,7 +2012,19 @@ replace_read (store_info *store_info, insn_info_t store_insn, } /* Force the value into a new register so that it won't be clobbered between the store and the load. */ - read_reg = copy_to_mode_reg (read_mode, read_reg); + if (WORD_REGISTER_OPERATIONS + && GET_CODE (read_reg) == SUBREG + && REG_P (SUBREG_REG (read_reg)) + && GET_MODE (SUBREG_REG (read_reg)) == word_mode) + { + /* For WORD_REGISTER_OPERATIONS with subreg of word_mode register + force SUBREG_REG into a new register rather than the SUBREG. */ + rtx r = copy_to_mode_reg (word_mode, SUBREG_REG (read_reg)); + read_reg = shallow_copy_rtx (read_reg); + SUBREG_REG (read_reg) = r; + } + else + read_reg = copy_to_mode_reg (read_mode, read_reg); insns = get_insns (); end_sequence (); -- cgit v1.1 From ade0a1ee5c6707b950ba284adcfed0514866c12d Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 19 Apr 2023 11:14:23 +0200 Subject: tree-vect-patterns: Improve __builtin_{clz,ctz,ffs}ll vectorization [PR109011] For __builtin_popcountll tree-vect-patterns.cc has vect_recog_popcount_pattern, which improves the vectorized code. Without that the vectorization is always multi-type vectorization in the loop (at least int and long long types) where we emit two .POPCOUNT calls with long long arguments and int return value and then widen to long long, so effectively after vectorization do the V?DImode -> V?DImode popcount twice, then pack the result into V?SImode and immediately unpack. The following patch extends that handling to __builtin_{clz,ctz,ffs}ll builtins as well (as long as there is an optab for them; more to come laster). x86 can do __builtin_popcountll with -mavx512vpopcntdq, __builtin_clzll with -mavx512cd, ppc can do __builtin_popcountll and __builtin_clzll with -mpower8-vector and __builtin_ctzll with -mpower9-vector, s390 can do __builtin_{popcount,clz,ctz}ll with -march=z13 -mzarch (i.e. VX). 2023-04-19 Jakub Jelinek PR tree-optimization/109011 * tree-vect-patterns.cc (vect_recog_popcount_pattern): Rename to ... (vect_recog_popcount_clz_ctz_ffs_pattern): ... this. Handle also CLZ, CTZ and FFS. Remove vargs variable, use gimple_build_call_internal rather than gimple_build_call_internal_vec. (vect_vect_recog_func_ptrs): Adjust popcount entry. * gcc.dg/vect/pr109011-1.c: New test. --- gcc/testsuite/gcc.dg/vect/pr109011-1.c | 48 +++++++++++ gcc/tree-vect-patterns.cc | 148 +++++++++++++++++++++++++++------ 2 files changed, 171 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr109011-1.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-1.c b/gcc/testsuite/gcc.dg/vect/pr109011-1.c new file mode 100644 index 0000000..707a82a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr109011-1.c @@ -0,0 +1,48 @@ +/* PR tree-optimization/109011 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx512cd" { target { { i?86-*-* x86_64-*-* } && avx512cd } } } */ +/* { dg-additional-options "-mavx512vpopcntdq" { target { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } } } */ +/* { dg-additional-options "-mpower8-vector" { target powerpc_p8vector_ok } } */ +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } } */ +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ + +void +foo (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_popcountll (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { target { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } } } } */ +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { target { powerpc_p8vector_ok || s390_vx } } } } */ + +void +bar (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_clzll (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target { { i?86-*-* x86_64-*-* } && avx512cd } } } } */ +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target { powerpc_p8vector_ok || s390_vx } } } } */ + +void +baz (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ctzll (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 1 "optimized" { target { powerpc_p9vector_ok || s390_vx } } } } */ + +void +qux (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ffsll (q[i]); +} diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 8802141..633998e 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1501,7 +1501,7 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, "vect_recog_widen_minus_pattern"); } -/* Function vect_recog_popcount_pattern +/* Function vect_recog_popcount_clz_ctz_ffs_pattern Try to find the following pattern: @@ -1530,16 +1530,20 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, * Return value: A new stmt that will be used to replace the sequence of stmts that constitute the pattern. In this case it will be: B = .POPCOUNT (A); + + Similarly for clz, ctz and ffs. */ static gimple * -vect_recog_popcount_pattern (vec_info *vinfo, - stmt_vec_info stmt_vinfo, tree *type_out) +vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo, + stmt_vec_info stmt_vinfo, + tree *type_out) { gassign *last_stmt = dyn_cast (stmt_vinfo->stmt); - gimple *popcount_stmt, *pattern_stmt; + gimple *call_stmt, *pattern_stmt; tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var; - auto_vec vargs; + internal_fn ifn = IFN_LAST; + int addend = 0; /* Find B = (TYPE1) temp_out. */ if (!last_stmt) @@ -1557,51 +1561,137 @@ vect_recog_popcount_pattern (vec_info *vinfo, if (TREE_CODE (rhs_oprnd) != SSA_NAME || !has_single_use (rhs_oprnd)) return NULL; - popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd); + call_stmt = SSA_NAME_DEF_STMT (rhs_oprnd); /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */ - if (!is_gimple_call (popcount_stmt)) + if (!is_gimple_call (call_stmt)) return NULL; - switch (gimple_call_combined_fn (popcount_stmt)) + switch (gimple_call_combined_fn (call_stmt)) { + int val; CASE_CFN_POPCOUNT: + ifn = IFN_POPCOUNT; + break; + CASE_CFN_CLZ: + ifn = IFN_CLZ; + /* Punt if call result is unsigned and defined value at zero + is negative, as the negative value doesn't extend correctly. */ + if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd)) + && gimple_call_internal_p (call_stmt) + && CLZ_DEFINED_VALUE_AT_ZERO + (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2 + && val < 0) + return NULL; + break; + CASE_CFN_CTZ: + ifn = IFN_CTZ; + /* Punt if call result is unsigned and defined value at zero + is negative, as the negative value doesn't extend correctly. */ + if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd)) + && gimple_call_internal_p (call_stmt) + && CTZ_DEFINED_VALUE_AT_ZERO + (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val) == 2 + && val < 0) + return NULL; + break; + CASE_CFN_FFS: + ifn = IFN_FFS; break; default: return NULL; } - if (gimple_call_num_args (popcount_stmt) != 1) + if (gimple_call_num_args (call_stmt) != 1) return NULL; - rhs_oprnd = gimple_call_arg (popcount_stmt, 0); + rhs_oprnd = gimple_call_arg (call_stmt, 0); vect_unpromoted_value unprom_diff; - rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd, - &unprom_diff); + rhs_origin + = vect_look_through_possible_promotion (vinfo, rhs_oprnd, &unprom_diff); if (!rhs_origin) return NULL; - /* Input and output of .POPCOUNT should be same-precision integer. - Also A should be unsigned or same precision as temp_in, - otherwise there would be sign_extend from A to temp_in. */ - if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type) - || (!TYPE_UNSIGNED (unprom_diff.type) - && (TYPE_PRECISION (unprom_diff.type) - != TYPE_PRECISION (TREE_TYPE (rhs_oprnd))))) + /* Input and output of .POPCOUNT should be same-precision integer. */ + if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type)) return NULL; - vargs.safe_push (unprom_diff.op); - vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt); + /* Also A should be unsigned or same precision as temp_in, otherwise + different builtins/internal functions have different behaviors. */ + if (TYPE_PRECISION (unprom_diff.type) + != TYPE_PRECISION (TREE_TYPE (rhs_oprnd))) + switch (ifn) + { + case IFN_POPCOUNT: + /* For popcount require zero extension, which doesn't add any + further bits to the count. */ + if (!TYPE_UNSIGNED (unprom_diff.type)) + return NULL; + break; + case IFN_CLZ: + /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok + if it is undefined at zero or if it matches also for the + defined value there. */ + if (!TYPE_UNSIGNED (unprom_diff.type)) + return NULL; + if (!type_has_mode_precision_p (lhs_type) + || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd))) + return NULL; + addend = (TYPE_PRECISION (TREE_TYPE (rhs_oprnd)) + - TYPE_PRECISION (lhs_type)); + if (gimple_call_internal_p (call_stmt)) + { + int val1, val2; + int d1 + = CLZ_DEFINED_VALUE_AT_ZERO + (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1); + int d2 + = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type), + val2); + if (d1 != 2) + break; + if (d2 != 2 || val1 != val2 + addend) + return NULL; + } + break; + case IFN_CTZ: + /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok + if it is undefined at zero or if it matches also for the + defined value there. */ + if (gimple_call_internal_p (call_stmt)) + { + int val1, val2; + int d1 + = CTZ_DEFINED_VALUE_AT_ZERO + (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd)), val1); + int d2 + = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type), + val2); + if (d1 != 2) + break; + if (d2 != 2 || val1 != val2) + return NULL; + } + break; + case IFN_FFS: + /* ffsll (x) == ffs (x) for unsigned or signed x. */ + break; + default: + gcc_unreachable (); + } + + vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern", + call_stmt); vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); - /* Do it only if the backend has popcount2 pattern. */ + /* Do it only if the backend has popcount2 etc. pattern. */ if (!vec_type - || !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type, + || !direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED)) return NULL; /* Create B = .POPCOUNT (A). */ new_var = vect_recog_temp_ssa_var (lhs_type, NULL); - pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs); + pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op); gimple_call_set_lhs (pattern_stmt, new_var); gimple_set_location (pattern_stmt, gimple_location (last_stmt)); *type_out = vec_type; @@ -1609,6 +1699,14 @@ vect_recog_popcount_pattern (vec_info *vinfo, if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "created pattern stmt: %G", pattern_stmt); + + if (addend) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); + pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var, + build_int_cst (lhs_type, addend)); + } return pattern_stmt; } @@ -6051,7 +6149,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_sad_pattern, "sad" }, { vect_recog_widen_sum_pattern, "widen_sum" }, { vect_recog_pow_pattern, "pow" }, - { vect_recog_popcount_pattern, "popcount" }, + { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" }, { vect_recog_widen_shift_pattern, "widen_shift" }, { vect_recog_rotate_pattern, "rotate" }, { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, -- cgit v1.1 From 9bc407c787771baad6c69cee3e392f15a5b9163d Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 19 Apr 2023 10:32:07 +0100 Subject: aarch64: Delete __builtin_aarch64_neg* builtins and their use I don't think we need to keep the __builtin_aarch64_neg* builtins around. They are only used once in the vnegh_f16 intrinsic in arm_fp16.h and I AFAICT it was added this way only for the sake of orthogonality in https://gcc.gnu.org/g:d7f33f07d88984cbe769047e3d07fc21067fbba9 We already use normal "-" negation in the other vneg* intrinsics, so do so here as well. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd-builtins.def (neg): Delete builtins definition. * config/aarch64/arm_fp16.h (vnegh_f16): Reimplement using normal negation. --- gcc/config/aarch64/aarch64-simd-builtins.def | 3 --- gcc/config/aarch64/arm_fp16.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index ea5fd33..1beaa08 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -848,9 +848,6 @@ BUILTIN_VHSDF_HSDF (BINOP_USS, cmle, 0, FP) BUILTIN_VHSDF_HSDF (BINOP_USS, cmlt, 0, FP) - /* Implemented by neg2. */ - BUILTIN_VHSDF_HSDF (UNOP, neg, 2, ALL) - /* Implemented by aarch64_fac. */ BUILTIN_VHSDF_HSDF (BINOP_USS, faclt, 0, FP) BUILTIN_VHSDF_HSDF (BINOP_USS, facle, 0, FP) diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h index a8fa4db..350f8cc 100644 --- a/gcc/config/aarch64/arm_fp16.h +++ b/gcc/config/aarch64/arm_fp16.h @@ -334,7 +334,7 @@ __extension__ extern __inline float16_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vnegh_f16 (float16_t __a) { - return __builtin_aarch64_neghf (__a); + return -__a; } __extension__ extern __inline float16_t -- cgit v1.1 From 2c800ed8d59cffce678ef08a6d172465c17f015d Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 16 Dec 2022 13:48:58 +0100 Subject: Simplify gimple_assign_load The following simplifies and outlines gimple_assign_load. In particular it is not necessary to get at the base of the possibly loaded expression but just handle the case of a single handled component wrapping a non-memory operand. * gimple.h (gimple_assign_load): Outline... * gimple.cc (gimple_assign_load): ... here. Avoid get_base_address and instead just strip the outermost handled component, treating a remaining handled component as load. --- gcc/gimple.cc | 20 ++++++++++++++++++++ gcc/gimple.h | 18 +----------------- 2 files changed, 21 insertions(+), 17 deletions(-) (limited to 'gcc') diff --git a/gcc/gimple.cc b/gcc/gimple.cc index 5e4eda4..e0ba42a 100644 --- a/gcc/gimple.cc +++ b/gcc/gimple.cc @@ -1788,6 +1788,26 @@ gimple_assign_unary_nop_p (gimple *gs) == TYPE_MODE (TREE_TYPE (gimple_assign_rhs1 (gs))))); } +/* Return true if GS is an assignment that loads from its rhs1. */ + +bool +gimple_assign_load_p (const gimple *gs) +{ + tree rhs; + if (!gimple_assign_single_p (gs)) + return false; + rhs = gimple_assign_rhs1 (gs); + if (TREE_CODE (rhs) == WITH_SIZE_EXPR) + return true; + if (handled_component_p (rhs)) + rhs = TREE_OPERAND (rhs, 0); + return (handled_component_p (rhs) + || DECL_P (rhs) + || TREE_CODE (rhs) == MEM_REF + || TREE_CODE (rhs) == TARGET_MEM_REF); +} + + /* Set BB to be the basic block holding G. */ void diff --git a/gcc/gimple.h b/gcc/gimple.h index 081d18e..daf5524 100644 --- a/gcc/gimple.h +++ b/gcc/gimple.h @@ -1629,6 +1629,7 @@ tree gimple_call_nonnull_arg (gcall *); bool gimple_assign_copy_p (gimple *); bool gimple_assign_ssa_name_copy_p (gimple *); bool gimple_assign_unary_nop_p (gimple *); +bool gimple_assign_load_p (const gimple *); void gimple_set_bb (gimple *, basic_block); void gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *, tree); void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code, @@ -2952,23 +2953,6 @@ gimple_store_p (const gimple *gs) return lhs && !is_gimple_reg (lhs); } -/* Return true if GS is an assignment that loads from its rhs1. */ - -inline bool -gimple_assign_load_p (const gimple *gs) -{ - tree rhs; - if (!gimple_assign_single_p (gs)) - return false; - rhs = gimple_assign_rhs1 (gs); - if (TREE_CODE (rhs) == WITH_SIZE_EXPR) - return true; - rhs = get_base_address (rhs); - return (DECL_P (rhs) - || TREE_CODE (rhs) == MEM_REF || TREE_CODE (rhs) == TARGET_MEM_REF); -} - - /* Return true if S is a type-cast assignment. */ inline bool -- cgit v1.1 From 01e79e21bbb2d10ecac784d383cefb88d2e20692 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 9 Mar 2023 10:56:57 +0100 Subject: Avoid unnecessary epilogues from tree_unroll_loop The following fixes the condition determining whether we need an epilogue. * tree-ssa-loop-manip.cc (determine_exit_conditions): Fix no epilogue condition. --- gcc/tree-ssa-loop-manip.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 09acc1c..4ef27ba 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -1010,7 +1010,7 @@ determine_exit_conditions (class loop *loop, class tree_niter_desc *desc, /* Convert the latch count to an iteration count. */ tree niter = fold_build2 (PLUS_EXPR, type, desc->niter, build_one_cst (type)); - if (multiple_of_p (type, niter, bigstep)) + if (multiple_of_p (type, niter, build_int_cst (type, factor))) return; } -- cgit v1.1 From 81c6501445fcddad653363f815cd04ca6fdb488e Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Wed, 29 Mar 2023 01:36:09 +0800 Subject: LoongArch: Improve GAR store for va_list LoongArch backend used to save all GARs for a function with variable arguments. But sometimes a function only accepts variable arguments for a purpose like C++ function overloading. For example, POSIX defines open() as: int open(const char *path, int oflag, ...); But only two forms are actually used: int open(const char *pathname, int flags); int open(const char *pathname, int flags, mode_t mode); So it's obviously a waste to save all 8 GARs in open(). We can use the cfun->va_list_gpr_size field set by the stdarg pass to only save the GARs necessary to be saved. If the va_list escapes (for example, in fprintf() we pass it to vfprintf()), stdarg would set cfun->va_list_gpr_size to 255 so we don't need a special case. With this patch, only one GAR ($a2/$r6) is saved in open(). Ideally even this stack store should be omitted too, but doing so is not trivial and AFAIK there are no compilers (for any target) performing the "ideal" optimization here, see https://godbolt.org/z/n1YqWq9c9. Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk (GCC 14 or now)? gcc/ChangeLog: * config/loongarch/loongarch.cc (loongarch_setup_incoming_varargs): Don't save more GARs than cfun->va_list_gpr_size / UNITS_PER_WORD. gcc/testsuite/ChangeLog: * gcc.target/loongarch/va_arg.c: New test. --- gcc/config/loongarch/loongarch.cc | 4 +++- gcc/testsuite/gcc.target/loongarch/va_arg.c | 24 ++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/va_arg.c (limited to 'gcc') diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 34532d8..dfb731f 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -764,7 +764,9 @@ loongarch_setup_incoming_varargs (cumulative_args_t cum, loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); /* Found out how many registers we need to save. */ - gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; + gp_saved = cfun->va_list_gpr_size / UNITS_PER_WORD; + if (gp_saved > (int) (MAX_ARGS_IN_REGISTERS - local_cum.num_gprs)) + gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; if (!no_rtl && gp_saved > 0) { diff --git a/gcc/testsuite/gcc.target/loongarch/va_arg.c b/gcc/testsuite/gcc.target/loongarch/va_arg.c new file mode 100644 index 0000000..980c96d --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/va_arg.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* Technically we shouldn't save any register for this function: it should be + compiled as if it accepts 3 named arguments. But AFAIK no compilers can + achieve this "perfect" optimization now, so just ensure we are using the + knowledge provided by stdarg pass and we won't save GARs impossible to be + accessed with __builtin_va_arg () when the va_list does not escape. */ + +/* { dg-final { scan-assembler-not "st.*r7" } } */ + +int +test (int a0, ...) +{ + void *arg; + int a1, a2; + + __builtin_va_start (arg, a0); + a1 = __builtin_va_arg (arg, int); + a2 = __builtin_va_arg (arg, int); + __builtin_va_end (arg); + + return a0 + a1 + a2; +} -- cgit v1.1 From 6d7e0bcfa49e4ddc84dabe520bba8a023bc52692 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Wed, 12 Apr 2023 11:45:48 +0000 Subject: LoongArch: Improve cpymemsi expansion [PR109465] We'd been generating really bad block move sequences which is recently complained by kernel developers who tried __builtin_memcpy. To improve it: 1. Take the advantage of -mno-strict-align. When it is set, set mode size to UNITS_PER_WORD regardless of the alignment. 2. Half the mode size when (block size) % (mode size) != 0, instead of falling back to ld.bu/st.b at once. 3. Limit the length of block move sequence considering the number of instructions, not the size of block. When -mstrict-align is set and the block is not aligned, the old size limit for straight-line implementation (64 bytes) was definitely too large (we don't have 64 registers anyway). Change since v1: add a comment about the calculation of num_reg. gcc/ChangeLog: PR target/109465 * config/loongarch/loongarch-protos.h (loongarch_expand_block_move): Add a parameter as alignment RTX. * config/loongarch/loongarch.h: (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER): Remove. (LARCH_MAX_MOVE_BYTES_STRAIGHT): Remove. (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER): Define. (LARCH_MAX_MOVE_OPS_STRAIGHT): Define. (MOVE_RATIO): Use LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER. * config/loongarch/loongarch.cc (loongarch_expand_block_move): Take the alignment from the parameter, but set it to UNITS_PER_WORD if !TARGET_STRICT_ALIGN. Limit the length of straight-line implementation with LARCH_MAX_MOVE_OPS_STRAIGHT instead of LARCH_MAX_MOVE_BYTES_STRAIGHT. (loongarch_block_move_straight): When there are left-over bytes, half the mode size instead of falling back to byte mode at once. (loongarch_block_move_loop): Limit the length of loop body with LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER. * config/loongarch/loongarch.md (cpymemsi): Pass the alignment to loongarch_expand_block_move. gcc/testsuite/ChangeLog: PR target/109465 * gcc.target/loongarch/pr109465-1.c: New test. * gcc.target/loongarch/pr109465-2.c: New test. * gcc.target/loongarch/pr109465-3.c: New test. --- gcc/config/loongarch/loongarch-protos.h | 2 +- gcc/config/loongarch/loongarch.cc | 95 ++++++++++++++----------- gcc/config/loongarch/loongarch.h | 10 ++- gcc/config/loongarch/loongarch.md | 3 +- gcc/testsuite/gcc.target/loongarch/pr109465-1.c | 9 +++ gcc/testsuite/gcc.target/loongarch/pr109465-2.c | 9 +++ gcc/testsuite/gcc.target/loongarch/pr109465-3.c | 12 ++++ 7 files changed, 91 insertions(+), 49 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-1.c create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-2.c create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-3.c (limited to 'gcc') diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index 83df489..b71b188 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -95,7 +95,7 @@ extern void loongarch_expand_conditional_trap (rtx); #endif extern void loongarch_set_return_address (rtx, rtx); extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int); -extern bool loongarch_expand_block_move (rtx, rtx, rtx); +extern bool loongarch_expand_block_move (rtx, rtx, rtx, rtx); extern bool loongarch_do_optimize_block_move_p (void); extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT, diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index dfb731f..d808cb3 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4459,41 +4459,46 @@ loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, Assume that the areas do not overlap. */ static void -loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length) +loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length, + HOST_WIDE_INT delta) { - HOST_WIDE_INT offset, delta; - unsigned HOST_WIDE_INT bits; + HOST_WIDE_INT offs, delta_cur; int i; machine_mode mode; rtx *regs; - bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))); - - mode = int_mode_for_size (bits, 0).require (); - delta = bits / BITS_PER_UNIT; + /* Calculate how many registers we'll need for the block move. + We'll emit length / delta move operations with delta as the size + first. Then we may still have length % delta bytes not copied. + We handle these remaining bytes by move operations with smaller + (halfed) sizes. For example, if length = 21 and delta = 8, we'll + emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b + pair. For each load/store pair we use a dedicated register to keep + the pipeline as populated as possible. */ + HOST_WIDE_INT num_reg = length / delta; + for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2) + num_reg += !!(length & delta_cur); /* Allocate a buffer for the temporary registers. */ - regs = XALLOCAVEC (rtx, length / delta); + regs = XALLOCAVEC (rtx, num_reg); - /* Load as many BITS-sized chunks as possible. Use a normal load if - the source has enough alignment, otherwise use left/right pairs. */ - for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) + for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2) { - regs[i] = gen_reg_rtx (mode); - loongarch_emit_move (regs[i], adjust_address (src, mode, offset)); - } + mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require (); - for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++) - loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]); + for (; offs + delta_cur <= length; offs += delta_cur, i++) + { + regs[i] = gen_reg_rtx (mode); + loongarch_emit_move (regs[i], adjust_address (src, mode, offs)); + } + } - /* Mop up any left-over bytes. */ - if (offset < length) + for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2) { - src = adjust_address (src, BLKmode, offset); - dest = adjust_address (dest, BLKmode, offset); - move_by_pieces (dest, src, length - offset, - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), - (enum memop_ret) 0); + mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require (); + + for (; offs + delta_cur <= length; offs += delta_cur, i++) + loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]); } } @@ -4523,10 +4528,11 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, static void loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, - HOST_WIDE_INT bytes_per_iter) + HOST_WIDE_INT align) { rtx_code_label *label; rtx src_reg, dest_reg, final_src, test; + HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER; HOST_WIDE_INT leftover; leftover = length % bytes_per_iter; @@ -4546,7 +4552,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, emit_label (label); /* Emit the loop body. */ - loongarch_block_move_straight (dest, src, bytes_per_iter); + loongarch_block_move_straight (dest, src, bytes_per_iter, align); /* Move on to the next block. */ loongarch_emit_move (src_reg, @@ -4563,7 +4569,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, /* Mop up any left-over bytes. */ if (leftover) - loongarch_block_move_straight (dest, src, leftover); + loongarch_block_move_straight (dest, src, leftover, align); else /* Temporary fix for PR79150. */ emit_insn (gen_nop ()); @@ -4573,25 +4579,32 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length, memory reference SRC to memory reference DEST. */ bool -loongarch_expand_block_move (rtx dest, rtx src, rtx length) +loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align) { - int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT; + if (!CONST_INT_P (r_length)) + return false; + + HOST_WIDE_INT length = INTVAL (r_length); + if (length > loongarch_max_inline_memcpy_size) + return false; + + HOST_WIDE_INT align = INTVAL (r_align); + + if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD) + align = UNITS_PER_WORD; - if (CONST_INT_P (length) - && INTVAL (length) <= loongarch_max_inline_memcpy_size) + if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT) { - if (INTVAL (length) <= max_move_bytes) - { - loongarch_block_move_straight (dest, src, INTVAL (length)); - return true; - } - else if (optimize) - { - loongarch_block_move_loop (dest, src, INTVAL (length), - LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER); - return true; - } + loongarch_block_move_straight (dest, src, length, align); + return true; + } + + if (optimize) + { + loongarch_block_move_loop (dest, src, length, align); + return true; } + return false; } diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index 277facb..a9eff6a 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -1062,13 +1062,13 @@ typedef struct { /* The maximum number of bytes that can be copied by one iteration of a cpymemsi loop; see loongarch_block_move_loop. */ -#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4) +#define LARCH_MAX_MOVE_OPS_PER_LOOP_ITER 4 /* The maximum number of bytes that can be copied by a straight-line implementation of cpymemsi; see loongarch_block_move_straight. We want to make sure that any loop-based implementation will iterate at least twice. */ -#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2) +#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2) /* The base cost of a memcpy call, for MOVE_RATIO and friends. These values were determined experimentally by benchmarking with CSiBE. @@ -1076,7 +1076,7 @@ typedef struct { #define LARCH_CALL_RATIO 8 /* Any loop-based implementation of cpymemsi will have at least - LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory + LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory moves, so allow individual copies of fewer elements. When cpymemsi is not available, use a value approximating @@ -1087,9 +1087,7 @@ typedef struct { value of LARCH_CALL_RATIO to take that into account. */ #define MOVE_RATIO(speed) \ - (HAVE_cpymemsi \ - ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \ - : CLEAR_RATIO (speed) / 2) + (HAVE_cpymemsi ? LARCH_MAX_MOVE_OPS_PER_LOOP_ITER : CLEAR_RATIO (speed) / 2) /* For CLEAR_RATIO, when optimizing for size, give a better estimate of the length of a memset call, but use the default otherwise. */ diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md index 628ecc7..816a943 100644 --- a/gcc/config/loongarch/loongarch.md +++ b/gcc/config/loongarch/loongarch.md @@ -2488,7 +2488,8 @@ "" { if (TARGET_DO_OPTIMIZE_BLOCK_MOVE_P - && loongarch_expand_block_move (operands[0], operands[1], operands[2])) + && loongarch_expand_block_move (operands[0], operands[1], + operands[2], operands[3])) DONE; else FAIL; diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-1.c b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c new file mode 100644 index 0000000..4cd35d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mabi=lp64d -mno-strict-align" } */ +/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.h" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.b" 1 } } */ + +extern char a[], b[]; +void test() { __builtin_memcpy(a, b, 15); } diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-2.c b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c new file mode 100644 index 0000000..703eb95 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */ +/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.h" 1 } } */ +/* { dg-final { scan-assembler-times "st\\.b" 1 } } */ + +extern long a[], b[]; +void test() { __builtin_memcpy(a, b, 15); } diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-3.c b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c new file mode 100644 index 0000000..d6a8065 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */ + +/* Three loop iterations each contains 4 st.b, and 3 st.b after the loop */ +/* { dg-final { scan-assembler-times "st\\.b" 7 } } */ + +/* { dg-final { scan-assembler-not "st\\.h" } } */ +/* { dg-final { scan-assembler-not "st\\.w|stptr\\.w" } } */ +/* { dg-final { scan-assembler-not "st\\.d|stptr\\.d" } } */ + +extern char a[], b[]; +void test() { __builtin_memcpy(a, b, 15); } -- cgit v1.1 From a243ce2a52a6c62bc0d6be0b756a85dd9c1bceb7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 9 Mar 2023 09:02:07 +0100 Subject: tree-optimization/44794 - avoid excessive RTL unrolling on epilogues The following adjusts tree_[transform_and_]unroll_loop to set an upper bound on the number of iterations on the epilogue loop it creates. For the testcase at hand which involves array prefetching this avoids applying RTL unrolling to them when -funroll-loops is specified. Other users of this API includes predictive commoning and unroll-and-jam. PR tree-optimization/44794 * tree-ssa-loop-manip.cc (tree_transform_and_unroll_loop): If an epilogue loop is required set its iteration upper bound. --- gcc/tree-ssa-loop-manip.cc | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'gcc') diff --git a/gcc/tree-ssa-loop-manip.cc b/gcc/tree-ssa-loop-manip.cc index 4ef27ba..a52277a 100644 --- a/gcc/tree-ssa-loop-manip.cc +++ b/gcc/tree-ssa-loop-manip.cc @@ -1297,6 +1297,12 @@ tree_transform_and_unroll_loop (class loop *loop, unsigned factor, } remove_path (exit); + + /* The epilog loop latch executes at most factor - 1 times. + Since the epilog is entered unconditionally it will need to handle + up to factor executions of its body. */ + new_loop->any_upper_bound = 1; + new_loop->nb_iterations_upper_bound = factor - 1; } else new_exit = single_dom_exit (loop); -- cgit v1.1 From 7838574b5aca179e347eb972880ea8376a2cc6b5 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 Mar 2023 14:39:32 +0100 Subject: Avoid non-unified nodes on the topological sorting for PTA solving Since we do not update successor edges when merging nodes we have to deal with this in the users. The following avoids putting those on the topo order vector. * tree-ssa-structalias.cc (topo_visit): Look at the real destination of edges. --- gcc/tree-ssa-structalias.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index fa3a2e4..8976cc9 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1632,8 +1632,9 @@ topo_visit (constraint_graph_t graph, struct topo_info *ti, if (graph->succs[n]) EXECUTE_IF_SET_IN_BITMAP (graph->succs[n], 0, j, bi) { - if (!bitmap_bit_p (ti->visited, j)) - topo_visit (graph, ti, j); + unsigned k = find (j); + if (!bitmap_bit_p (ti->visited, k)) + topo_visit (graph, ti, k); } ti->topo_order.safe_push (n); -- cgit v1.1 From 9d218c45e318dcec04312f1d4a14c6ff904404e1 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 Mar 2023 14:42:10 +0100 Subject: Remove senseless store in do_sd_constraint * tree-ssa-structalias.cc (do_sd_constraint): Do not write to the LHS varinfo solution member. --- gcc/tree-ssa-structalias.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 8976cc9..89027ab 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1724,10 +1724,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t c, done: /* If the LHS solution changed, mark the var as changed. */ if (flag) - { - get_varinfo (lhs)->solution = sol; - bitmap_set_bit (changed, lhs); - } + bitmap_set_bit (changed, lhs); } /* Process a constraint C that represents *(x + off) = y using DELTA -- cgit v1.1 From 8366e6764e18ca6526d9be87f5bb54ae8339d7f7 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 15 Mar 2023 08:43:06 +0100 Subject: Fix do_sd_constraint escape special casing The following fixes the escape special casing to test the proper variable IDs. * tree-ssa-structalias.cc (do_sd_constraint): Fixup escape special casing. --- gcc/tree-ssa-structalias.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 89027ab..4f350bf 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1706,7 +1706,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t c, flag |= bitmap_ior_into (sol, get_varinfo (t)->solution); /* Merging the solution from ESCAPED needlessly increases the set. Use ESCAPED as representative instead. */ - else if (v->id == escaped_id) + else if (t == find (escaped_id)) flag |= bitmap_set_bit (sol, escaped_id); else if (v->may_have_pointers && add_graph_edge (graph, lhs, t)) -- cgit v1.1 From 6702fdcdf8b8c8ed27954972bb10b2464f4912fb Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 15 Mar 2023 08:46:04 +0100 Subject: Remove special-cased edges when solving copies The following makes sure to remove the copy edges we ignore or need to special-case only once. * tree-ssa-structalias.cc (solve_graph): Remove self-copy edges, remove edges from escaped after special-casing them. --- gcc/tree-ssa-structalias.cc | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 4f350bf..39c342f 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -2873,19 +2873,22 @@ solve_graph (constraint_graph_t graph) } /* Don't try to propagate to ourselves. */ if (to == i) - continue; - - bitmap tmp = get_varinfo (to)->solution; - bool flag = false; - - /* If we propagate from ESCAPED use ESCAPED as - placeholder. */ + { + to_remove = j; + continue; + } + /* Early node unification can lead to edges from + escaped - remove them. */ if (i == eff_escaped_id) - flag = bitmap_set_bit (tmp, escaped_id); - else - flag = bitmap_ior_into (tmp, pts); + { + to_remove = j; + if (bitmap_set_bit (get_varinfo (to)->solution, + escaped_id)) + bitmap_set_bit (changed, to); + continue; + } - if (flag) + if (bitmap_ior_into (get_varinfo (to)->solution, pts)) bitmap_set_bit (changed, to); } if (to_remove != ~0U) -- cgit v1.1 From 210617b53eee01d0a19117f886f5cf7717aa2319 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 19 Apr 2023 09:45:55 +0200 Subject: Transform more gmp/mpfr uses to use RAII The following picks up the coccinelle generated patch from Bernhard, leaving out the fortran frontend parts and fixing up the rest. In particular both gmp.h and mpfr.h contain macros like #define mpfr_inf_p(_x) ((_x)->_mpfr_exp == __MPFR_EXP_INF) for which I add operator-> overloads to the auto_* classes. * system.h (auto_mpz::operator->()): New. * realmpfr.h (auto_mpfr::operator->()): New. * builtins.cc (do_mpfr_lgamma_r): Use auto_mpfr. * real.cc (real_from_string): Likewise. (dconst_e_ptr): Likewise. (dconst_sqrt2_ptr): Likewise. * tree-ssa-loop-niter.cc (refine_value_range_using_guard): Use auto_mpz. (bound_difference_of_offsetted_base): Likewise. (number_of_iterations_ne): Likewise. (number_of_iterations_lt_to_ne): Likewise. * ubsan.cc: Include realmpfr.h. (ubsan_instrument_float_cast): Use auto_mpfr. --- gcc/builtins.cc | 4 +--- gcc/real.cc | 22 +++++----------------- gcc/realmpfr.h | 1 + gcc/system.h | 1 + gcc/tree-ssa-loop-niter.cc | 29 ++++++++--------------------- gcc/ubsan.cc | 9 ++++----- 6 files changed, 20 insertions(+), 46 deletions(-) (limited to 'gcc') diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 1bfdc59..80b8b89 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -11084,15 +11084,13 @@ do_mpfr_lgamma_r (tree arg, tree arg_sg, tree type) const int prec = fmt->p; const mpfr_rnd_t rnd = fmt->round_towards_zero? MPFR_RNDZ : MPFR_RNDN; int inexact, sg; - mpfr_t m; tree result_lg; - mpfr_init2 (m, prec); + auto_mpfr m (prec); mpfr_from_real (m, ra, MPFR_RNDN); mpfr_clear_flags (); inexact = mpfr_lgamma (m, &sg, m, rnd); result_lg = do_mpfr_ckconv (m, type, inexact); - mpfr_clear (m); if (result_lg) { tree result_sg; diff --git a/gcc/real.cc b/gcc/real.cc index 126695b..cf164e5 100644 --- a/gcc/real.cc +++ b/gcc/real.cc @@ -2131,7 +2131,6 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str) { /* Decimal floating point. */ const char *cstr = str; - mpfr_t m; bool inexact; while (*cstr == '0') @@ -2148,21 +2147,15 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str) goto is_a_zero; /* Nonzero value, possibly overflowing or underflowing. */ - mpfr_init2 (m, SIGNIFICAND_BITS); + auto_mpfr m (SIGNIFICAND_BITS); inexact = mpfr_strtofr (m, str, NULL, 10, MPFR_RNDZ); /* The result should never be a NaN, and because the rounding is toward zero should never be an infinity. */ gcc_assert (!mpfr_nan_p (m) && !mpfr_inf_p (m)); if (mpfr_zero_p (m) || mpfr_get_exp (m) < -MAX_EXP + 4) - { - mpfr_clear (m); - goto underflow; - } + goto underflow; else if (mpfr_get_exp (m) > MAX_EXP - 4) - { - mpfr_clear (m); - goto overflow; - } + goto overflow; else { real_from_mpfr (r, m, NULL_TREE, MPFR_RNDZ); @@ -2173,7 +2166,6 @@ real_from_string (REAL_VALUE_TYPE *r, const char *str) gcc_assert (r->cl == rvc_normal); /* Set a sticky bit if mpfr_strtofr was inexact. */ r->sig[0] |= inexact; - mpfr_clear (m); } } @@ -2474,12 +2466,10 @@ dconst_e_ptr (void) These constants need to be given to at least 160 bits precision. */ if (value.cl == rvc_zero) { - mpfr_t m; - mpfr_init2 (m, SIGNIFICAND_BITS); + auto_mpfr m (SIGNIFICAND_BITS); mpfr_set_ui (m, 1, MPFR_RNDN); mpfr_exp (m, m, MPFR_RNDN); real_from_mpfr (&value, m, NULL_TREE, MPFR_RNDN); - mpfr_clear (m); } return &value; @@ -2517,11 +2507,9 @@ dconst_sqrt2_ptr (void) These constants need to be given to at least 160 bits precision. */ if (value.cl == rvc_zero) { - mpfr_t m; - mpfr_init2 (m, SIGNIFICAND_BITS); + auto_mpfr m (SIGNIFICAND_BITS); mpfr_sqrt_ui (m, 2, MPFR_RNDN); real_from_mpfr (&value, m, NULL_TREE, MPFR_RNDN); - mpfr_clear (m); } return &value; } diff --git a/gcc/realmpfr.h b/gcc/realmpfr.h index 3824e62..a2b1bf6 100644 --- a/gcc/realmpfr.h +++ b/gcc/realmpfr.h @@ -32,6 +32,7 @@ public: ~auto_mpfr () { mpfr_clear (m_mpfr); } operator mpfr_t& () { return m_mpfr; } + mpfr_ptr operator-> () { return m_mpfr; } auto_mpfr (const auto_mpfr &) = delete; auto_mpfr &operator= (const auto_mpfr &) = delete; diff --git a/gcc/system.h b/gcc/system.h index 65d514d..c67bc42 100644 --- a/gcc/system.h +++ b/gcc/system.h @@ -709,6 +709,7 @@ public: ~auto_mpz () { mpz_clear (m_mpz); } operator mpz_t& () { return m_mpz; } + mpz_ptr operator-> () { return m_mpz; } auto_mpz (const auto_mpz &) = delete; auto_mpz &operator= (const auto_mpz &) = delete; diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc index dcfba2f..adf9937 100644 --- a/gcc/tree-ssa-loop-niter.cc +++ b/gcc/tree-ssa-loop-niter.cc @@ -159,17 +159,13 @@ refine_value_range_using_guard (tree type, tree var, if (operand_equal_p (var, c0, 0)) { - mpz_t valc1; - /* Case of comparing VAR with its below/up bounds. */ - mpz_init (valc1); + auto_mpz valc1; wi::to_mpz (wi::to_wide (c1), valc1, TYPE_SIGN (type)); if (mpz_cmp (valc1, below) == 0) cmp = GT_EXPR; if (mpz_cmp (valc1, up) == 0) cmp = LT_EXPR; - - mpz_clear (valc1); } else { @@ -506,7 +502,6 @@ bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y, { int rel = mpz_cmp (x, y); bool may_wrap = !nowrap_type_p (type); - mpz_t m; /* If X == Y, then the expressions are always equal. If X > Y, there are the following possibilities: @@ -529,7 +524,7 @@ bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y, return; } - mpz_init (m); + auto_mpz m; wi::to_mpz (wi::minus_one (TYPE_PRECISION (type)), m, UNSIGNED); mpz_add_ui (m, m, 1); mpz_sub (bnds->up, x, y); @@ -542,8 +537,6 @@ bound_difference_of_offsetted_base (tree type, mpz_t x, mpz_t y, else mpz_add (bnds->up, bnds->up, m); } - - mpz_clear (m); } /* From condition C0 CMP C1 derives information regarding the @@ -975,7 +968,6 @@ number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv, { tree niter_type = unsigned_type_for (type); tree s, c, d, bits, assumption, tmp, bound; - mpz_t max; niter->control = *iv; niter->bound = final; @@ -1003,12 +995,11 @@ number_of_iterations_ne (class loop *loop, tree type, affine_iv *iv, fold_convert (niter_type, iv->base)); } - mpz_init (max); + auto_mpz max; number_of_iterations_ne_max (max, iv->no_overflow, c, s, bnds, exit_must_be_taken); niter->max = widest_int::from (wi::from_mpz (niter_type, max, false), TYPE_SIGN (niter_type)); - mpz_clear (max); /* Compute no-overflow information for the control iv. This can be proven when below two conditions are satisfied: @@ -1155,9 +1146,8 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, tree niter_type = TREE_TYPE (step); tree mod = fold_build2 (FLOOR_MOD_EXPR, niter_type, *delta, step); tree tmod; - mpz_t mmod; tree assumption = boolean_true_node, bound, noloop; - bool ret = false, fv_comp_no_overflow; + bool fv_comp_no_overflow; tree type1 = type; if (POINTER_TYPE_P (type)) type1 = sizetype; @@ -1168,7 +1158,7 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, mod = fold_build2 (MINUS_EXPR, niter_type, step, mod); tmod = fold_convert (type1, mod); - mpz_init (mmod); + auto_mpz mmod; wi::to_mpz (wi::to_wide (mod), mmod, UNSIGNED); mpz_neg (mmod, mmod); @@ -1200,7 +1190,7 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, assumption = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, bound); if (integer_zerop (assumption)) - goto end; + return false; } if (mpz_cmp (mmod, bnds->below) < 0) noloop = boolean_false_node; @@ -1226,7 +1216,7 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, assumption = fold_build2 (GE_EXPR, boolean_type_node, iv0->base, bound); if (integer_zerop (assumption)) - goto end; + return false; } if (mpz_cmp (mmod, bnds->below) < 0) noloop = boolean_false_node; @@ -1254,10 +1244,7 @@ number_of_iterations_lt_to_ne (tree type, affine_iv *iv0, affine_iv *iv1, bounds_add (bnds, wi::to_widest (mod), type); *delta = fold_build2 (PLUS_EXPR, niter_type, *delta, mod); - ret = true; -end: - mpz_clear (mmod); - return ret; + return true; } /* Add assertions to NITER that ensure that the control variable of the loop diff --git a/gcc/ubsan.cc b/gcc/ubsan.cc index 08c1127..e6ffea3 100644 --- a/gcc/ubsan.cc +++ b/gcc/ubsan.cc @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-cfg.h" #include "gimple-fold.h" #include "varasm.h" +#include "realmpfr.h" /* Map from a tree to a VAR_DECL tree. */ @@ -1877,16 +1878,15 @@ ubsan_instrument_float_cast (location_t loc, tree type, tree expr) /* For _Decimal128 up to 34 decimal digits, - sign, dot, e, exponent. */ char buf[64]; - mpfr_t m; int p = REAL_MODE_FORMAT (mode)->p; REAL_VALUE_TYPE maxval, minval; /* Use mpfr_snprintf rounding to compute the smallest representable decimal number greater or equal than 1 << (prec - !uns_p). */ - mpfr_init2 (m, prec + 2); + auto_mpfr m (prec + 2); mpfr_set_ui_2exp (m, 1, prec - !uns_p, MPFR_RNDN); - mpfr_snprintf (buf, sizeof buf, "%.*RUe", p - 1, m); + mpfr_snprintf (buf, sizeof buf, "%.*RUe", p - 1, (mpfr_srcptr) m); decimal_real_from_string (&maxval, buf); max = build_real (expr_type, maxval); @@ -1900,11 +1900,10 @@ ubsan_instrument_float_cast (location_t loc, tree type, tree expr) (-1 << (prec - 1)) - 1. */ mpfr_set_si_2exp (m, -1, prec - 1, MPFR_RNDN); mpfr_sub_ui (m, m, 1, MPFR_RNDN); - mpfr_snprintf (buf, sizeof buf, "%.*RDe", p - 1, m); + mpfr_snprintf (buf, sizeof buf, "%.*RDe", p - 1, (mpfr_srcptr) m); decimal_real_from_string (&minval, buf); min = build_real (expr_type, minval); } - mpfr_clear (m); } else return NULL_TREE; -- cgit v1.1 From 4c9f8cd69435f4f50d8874a1603aafa5c2e7b8ce Mon Sep 17 00:00:00 2001 From: Aldy Hernandez Date: Mon, 6 Mar 2023 13:53:15 +0100 Subject: Fix pointer sharing in Value_Range constructor. gcc/ChangeLog: * value-range.h (Value_Range::Value_Range): Avoid pointer sharing. --- gcc/value-range.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/value-range.h b/gcc/value-range.h index 0eeea79..33ef3b5 100644 --- a/gcc/value-range.h +++ b/gcc/value-range.h @@ -583,7 +583,7 @@ Value_Range::Value_Range (tree min, tree max, value_range_kind kind) inline Value_Range::Value_Range (const Value_Range &r) { - m_vrange = r.m_vrange; + *this = *r.m_vrange; } // Initialize object so it is possible to store temporaries of TYPE -- cgit v1.1 From 258aecd7021b15c23a067323c00f1450dca0a506 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 19 Apr 2023 15:38:31 +0200 Subject: Introduce VIRTUAL_REGISTER_P and VIRTUAL_REGISTER_NUM_P predicates These two predicates are similar to existing HARD_REGISTER_P and HARD_REGISTER_NUM_P predicates and return 1 if the given register corresponds to a virtual register. gcc/ChangeLog: * rtl.h (VIRTUAL_REGISTER_P): New predicate. (VIRTUAL_REGISTER_NUM_P): Ditto. (REGNO_PTR_FRAME_P): Use VIRTUAL_REGISTER_NUM_P predicate. * expr.cc (force_operand): Use VIRTUAL_REGISTER_P predicate. * function.cc (instantiate_decl_rtl): Ditto. * rtlanal.cc (rtx_addr_can_trap_p_1): Ditto. (nonzero_address_p): Ditto. (refers_to_regno_p): Use VIRTUAL_REGISTER_NUM_P predicate. --- gcc/expr.cc | 3 +-- gcc/function.cc | 3 +-- gcc/rtl.h | 12 +++++++++--- gcc/rtlanal.cc | 8 +++----- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'gcc') diff --git a/gcc/expr.cc b/gcc/expr.cc index f8f5cc5..758dda9 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -8178,8 +8178,7 @@ force_operand (rtx value, rtx target) if (code == PLUS && CONST_INT_P (op2) && GET_CODE (XEXP (value, 0)) == PLUS && REG_P (XEXP (XEXP (value, 0), 0)) - && REGNO (XEXP (XEXP (value, 0), 0)) >= FIRST_VIRTUAL_REGISTER - && REGNO (XEXP (XEXP (value, 0), 0)) <= LAST_VIRTUAL_REGISTER) + && VIRTUAL_REGISTER_P (XEXP (XEXP (value, 0), 0))) { rtx temp = expand_simple_binop (GET_MODE (value), code, XEXP (XEXP (value, 0), 0), op2, diff --git a/gcc/function.cc b/gcc/function.cc index edf0b2e..f0ae641 100644 --- a/gcc/function.cc +++ b/gcc/function.cc @@ -1838,8 +1838,7 @@ instantiate_decl_rtl (rtx x) addr = XEXP (x, 0); if (CONSTANT_P (addr) || (REG_P (addr) - && (REGNO (addr) < FIRST_VIRTUAL_REGISTER - || REGNO (addr) > LAST_VIRTUAL_REGISTER))) + && !VIRTUAL_REGISTER_P (addr))) return; instantiate_virtual_regs_in_rtx (&XEXP (x, 0)); diff --git a/gcc/rtl.h b/gcc/rtl.h index 60852ae..f634cab 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -1967,11 +1967,18 @@ set_regno_raw (rtx x, unsigned int regno, unsigned int nregs) (RTL_FLAG_CHECK1 ("MEM_POINTER", (RTX), MEM)->frame_related) /* 1 if the given register REG corresponds to a hard register. */ -#define HARD_REGISTER_P(REG) (HARD_REGISTER_NUM_P (REGNO (REG))) +#define HARD_REGISTER_P(REG) HARD_REGISTER_NUM_P (REGNO (REG)) /* 1 if the given register number REG_NO corresponds to a hard register. */ #define HARD_REGISTER_NUM_P(REG_NO) ((REG_NO) < FIRST_PSEUDO_REGISTER) +/* 1 if the given register REG corresponds to a virtual register. */ +#define VIRTUAL_REGISTER_P(REG) VIRTUAL_REGISTER_NUM_P (REGNO (REG)) + +/* 1 if the given register number REG_NO corresponds to a virtual register. */ +#define VIRTUAL_REGISTER_NUM_P(REG_NO) \ + IN_RANGE (REG_NO, FIRST_VIRTUAL_REGISTER, LAST_VIRTUAL_REGISTER) + /* For a CONST_INT rtx, INTVAL extracts the integer. */ #define INTVAL(RTX) XCWINT (RTX, 0, CONST_INT) #define UINTVAL(RTX) ((unsigned HOST_WIDE_INT) INTVAL (RTX)) @@ -4078,8 +4085,7 @@ PUT_MODE (rtx x, machine_mode mode) || (REGNUM) == FRAME_POINTER_REGNUM \ || (REGNUM) == HARD_FRAME_POINTER_REGNUM \ || (REGNUM) == ARG_POINTER_REGNUM \ - || ((REGNUM) >= FIRST_VIRTUAL_REGISTER \ - && (REGNUM) <= LAST_VIRTUAL_POINTER_REGISTER)) + || VIRTUAL_REGISTER_NUM_P (REGNUM)) /* REGNUM never really appearing in the INSN stream. */ #define INVALID_REGNUM (~(unsigned int) 0) diff --git a/gcc/rtlanal.cc b/gcc/rtlanal.cc index e69d2e8..c96a88c 100644 --- a/gcc/rtlanal.cc +++ b/gcc/rtlanal.cc @@ -643,8 +643,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size, return 1; } /* All of the virtual frame registers are stack references. */ - if (REGNO (x) >= FIRST_VIRTUAL_REGISTER - && REGNO (x) <= LAST_VIRTUAL_REGISTER) + if (VIRTUAL_REGISTER_P (x)) return 0; return 1; @@ -733,8 +732,7 @@ nonzero_address_p (const_rtx x) || (x == arg_pointer_rtx && fixed_regs[ARG_POINTER_REGNUM])) return true; /* All of the virtual frame registers are stack references. */ - if (REGNO (x) >= FIRST_VIRTUAL_REGISTER - && REGNO (x) <= LAST_VIRTUAL_REGISTER) + if (VIRTUAL_REGISTER_P (x)) return true; return false; @@ -1769,7 +1767,7 @@ refers_to_regno_p (unsigned int regno, unsigned int endregno, const_rtx x, || (FRAME_POINTER_REGNUM != ARG_POINTER_REGNUM && x_regno == ARG_POINTER_REGNUM) || x_regno == FRAME_POINTER_REGNUM) - && regno >= FIRST_VIRTUAL_REGISTER && regno <= LAST_VIRTUAL_REGISTER) + && VIRTUAL_REGISTER_NUM_P (regno)) return true; return endregno > x_regno && regno < END_REGNO (x); -- cgit v1.1 From 1da16c1141ebf3948de69eaeb1148778bf0790a9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Wed, 22 Mar 2023 14:13:02 +0100 Subject: Remove odd code from gimple_can_merge_blocks_p The following removes a special case to not merge a block with only a non-local label. We have a restriction of non-local labels to be the first statement (and label) in a block, but otherwise nothing, if the last stmt of A is a non-local label then it will be still the first statement of the combined A + B. In particular we'd happily merge when there's a stmt after that label. The check originates from the tree-ssa merge. Bootstrapped and tested on x86_64-unknown-linux-gnu with all languages. * tree-cfg.cc (gimple_can_merge_blocks_p): Remove condition rejecting the merge when A contains only a non-local label. --- gcc/tree-cfg.cc | 6 ------ 1 file changed, 6 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc index a9fcc7f..ae53e15 100644 --- a/gcc/tree-cfg.cc +++ b/gcc/tree-cfg.cc @@ -1960,12 +1960,6 @@ gimple_can_merge_blocks_p (basic_block a, basic_block b) if (stmt && stmt_ends_bb_p (stmt)) return false; - /* Do not allow a block with only a non-local label to be merged. */ - if (stmt) - if (glabel *label_stmt = dyn_cast (stmt)) - if (DECL_NONLOCAL (gimple_label_label (label_stmt))) - return false; - /* Examine the labels at the beginning of B. */ for (gimple_stmt_iterator gsi = gsi_start_bb (b); !gsi_end_p (gsi); gsi_next (&gsi)) -- cgit v1.1 From 2cef0d0964d74dd61d3f4df855d521a0ff219a03 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 Mar 2023 14:38:01 +0100 Subject: Split out solve_add_graph_edge Split out a worker with all the special-casings when adding a graph edge during solving. * tree-ssa-structalias.cc (solve_add_graph_edge): New function, split out from ... (do_sd_constraint): ... here. --- gcc/tree-ssa-structalias.cc | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 39c342f..84c625d 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1640,6 +1640,29 @@ topo_visit (constraint_graph_t graph, struct topo_info *ti, ti->topo_order.safe_push (n); } +/* Add a copy edge FROM -> TO, optimizing special cases. Returns TRUE + if the solution of TO changed. */ + +static bool +solve_add_graph_edge (constraint_graph_t graph, unsigned int to, + unsigned int from) +{ + /* Adding edges from the special vars is pointless. + They don't have sets that can change. */ + if (get_varinfo (from)->is_special_var) + return bitmap_ior_into (get_varinfo (to)->solution, + get_varinfo (from)->solution); + /* Merging the solution from ESCAPED needlessly increases + the set. Use ESCAPED as representative instead. */ + else if (from == find (escaped_id)) + return bitmap_set_bit (get_varinfo (to)->solution, escaped_id); + else if (get_varinfo (from)->may_have_pointers + && add_graph_edge (graph, to, from)) + return bitmap_ior_into (get_varinfo (to)->solution, + get_varinfo (from)->solution); + return false; +} + /* Process a constraint C that represents x = *(y + off), using DELTA as the starting solution for y. */ @@ -1700,17 +1723,7 @@ do_sd_constraint (constraint_graph_t graph, constraint_t c, { t = find (v->id); - /* Adding edges from the special vars is pointless. - They don't have sets that can change. */ - if (get_varinfo (t)->is_special_var) - flag |= bitmap_ior_into (sol, get_varinfo (t)->solution); - /* Merging the solution from ESCAPED needlessly increases - the set. Use ESCAPED as representative instead. */ - else if (t == find (escaped_id)) - flag |= bitmap_set_bit (sol, escaped_id); - else if (v->may_have_pointers - && add_graph_edge (graph, lhs, t)) - flag |= bitmap_ior_into (sol, get_varinfo (t)->solution); + flag |= solve_add_graph_edge (graph, lhs, t); if (v->is_full_var || v->next == 0) -- cgit v1.1 From 57aecdbc118d4c1900d651cb3ada2c9632a67ad0 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Tue, 14 Mar 2023 14:39:17 +0100 Subject: Use solve_add_graph_edge in more places The following makes sure to use solve_add_graph_edge and honoring special-cases, especially edges from escaped, in the remaining places the solver adds edges. * tree-ssa-structalias.cc (do_ds_constraint): Use solve_add_graph_edge. --- gcc/tree-ssa-structalias.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'gcc') diff --git a/gcc/tree-ssa-structalias.cc b/gcc/tree-ssa-structalias.cc index 84c625d..47808a3 100644 --- a/gcc/tree-ssa-structalias.cc +++ b/gcc/tree-ssa-structalias.cc @@ -1767,11 +1767,8 @@ do_ds_constraint (constraint_t c, bitmap delta, bitmap *expanded_delta) if (bitmap_bit_p (delta, anything_id)) { unsigned t = find (storedanything_id); - if (add_graph_edge (graph, t, rhs)) - { - if (bitmap_ior_into (get_varinfo (t)->solution, sol)) - bitmap_set_bit (changed, t); - } + if (solve_add_graph_edge (graph, t, rhs)) + bitmap_set_bit (changed, t); return; } @@ -1825,8 +1822,8 @@ do_ds_constraint (constraint_t c, bitmap delta, bitmap *expanded_delta) break; t = find (v->id); - if (add_graph_edge (graph, t, rhs) - && bitmap_ior_into (get_varinfo (t)->solution, sol)) + + if (solve_add_graph_edge (graph, t, rhs)) bitmap_set_bit (changed, t); } -- cgit v1.1 From a30078d5d974c3b2c784c522a84fd12df74767dd Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Wed, 19 Apr 2023 15:43:49 +0100 Subject: aarch64: Factorise widening add/sub high-half expanders with iterators I noticed these define_expand are almost identical modulo some string substitutions. This patch compresses them together with a couple of code iterators. No functional change intended. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_saddw2): Delete. (aarch64_uaddw2): Delete. (aarch64_ssubw2): Delete. (aarch64_usubw2): Delete. (aarch64_w2): New define_expand. --- gcc/config/aarch64/aarch64-simd.md | 66 ++++++++++++-------------------------- 1 file changed, 20 insertions(+), 46 deletions(-) (limited to 'gcc') diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index de2b738..1bed244 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4713,52 +4713,26 @@ [(set_attr "type" "neon_add_widen")] ) -(define_expand "aarch64_saddw2" - [(match_operand: 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); - emit_insn (gen_aarch64_saddw2_internal (operands[0], operands[1], - operands[2], p)); - DONE; -}) - -(define_expand "aarch64_uaddw2" - [(match_operand: 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); - emit_insn (gen_aarch64_uaddw2_internal (operands[0], operands[1], - operands[2], p)); - DONE; -}) - - -(define_expand "aarch64_ssubw2" - [(match_operand: 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); - emit_insn (gen_aarch64_ssubw2_internal (operands[0], operands[1], - operands[2], p)); - DONE; -}) - -(define_expand "aarch64_usubw2" - [(match_operand: 0 "register_operand") - (match_operand: 1 "register_operand") - (match_operand:VQW 2 "register_operand")] - "TARGET_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (mode, , true); - emit_insn (gen_aarch64_usubw2_internal (operands[0], operands[1], - operands[2], p)); +(define_expand "aarch64_w2" + [(set (match_operand: 0 "register_operand") + (ADDSUB: + (ANY_EXTEND: + (vec_select: + (match_operand:VQW 2 "register_operand") + (match_dup 3))) + (match_operand: 1 "register_operand")))] + "TARGET_SIMD" +{ + /* We still do an emit_insn rather than relying on the pattern above + because for the MINUS case the operands would need to be swapped + around. */ + operands[3] + = aarch64_simd_vect_par_cnst_half (mode, , true); + emit_insn (gen_aarch64_w2_internal( + operands[0], + operands[1], + operands[2], + operands[3])); DONE; }) -- cgit v1.1 From 0df6d181230f0480547ed08b4e4354db68242724 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 19 Apr 2023 17:00:52 +0200 Subject: i386: Emit compares between high registers and memory Following code: typedef __SIZE_TYPE__ size_t; struct S1s { char pad1; char val; short pad2; }; extern char ts[256]; _Bool foo (struct S1s a, size_t i) { return (ts[i] > a.val); } compiles with -O2 to: movl %edi, %eax movsbl %ah, %edi cmpb %dil, ts(%rsi) setg %al ret the compare could use high register %ah instead of %dil: movl %edi, %eax cmpb ts(%rsi), %ah setl %al ret Use any_extract code iterator to handle signed and unsigned extracts from high register and introduce peephole2 patterns to propagate norex memory opeerand into the compare insn. gcc/ChangeLog: PR target/78904 PR target/78952 * config/i386/i386.md (*cmpqi_ext_1_mem_rex64): New insn pattern. (*cmpqi_ext_1): Use nonimmediate_operand predicate for operand 0. Use any_extract code iterator. (*cmpqi_ext_1 peephole2): New peephole2 pattern. (*cmpqi_ext_2): Use any_extract code iterator. (*cmpqi_ext_3_mem_rex64): New insn pattern. (*cmpqi_ext_1): Use general_operand predicate for operand 1. Use any_extract code iterator. (*cmpqi_ext_3 peephole2): New peephole2 pattern. (*cmpqi_ext_4): Use any_extract code iterator. gcc/testsuite/ChangeLog: PR target/78904 PR target/78952 * gcc.target/i386/pr78952-3.c: New test. --- gcc/config/i386/i386.md | 94 +++++++++++++++++++++++++++---- gcc/testsuite/gcc.target/i386/pr78952-3.c | 40 +++++++++++++ 2 files changed, 124 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr78952-3.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 1419ea4..0f95d8e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1005,6 +1005,9 @@ ;; Mapping of extend operators (define_code_iterator any_extend [sign_extend zero_extend]) +;; Mapping of extract operators +(define_code_iterator any_extract [sign_extract zero_extract]) + ;; Mapping of highpart multiply operators (define_code_iterator any_mul_highpart [smul_highpart umul_highpart]) @@ -1454,12 +1457,27 @@ [(set_attr "type" "icmp") (set_attr "mode" "")]) +(define_insn "*cmpqi_ext_1_mem_rex64" + [(set (reg FLAGS_REG) + (compare + (match_operand:QI 0 "norex_memory_operand" "Bn") + (subreg:QI + (any_extract:SWI248 + (match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)) 0)))] + "TARGET_64BIT && reload_completed + && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + (define_insn "*cmpqi_ext_1" [(set (reg FLAGS_REG) (compare - (match_operand:QI 0 "nonimm_x64constmem_operand" "QBc,m") + (match_operand:QI 0 "nonimmediate_operand" "QBc,m") (subreg:QI - (zero_extract:SWI248 + (any_extract:SWI248 (match_operand 1 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)) 0)))] @@ -1469,11 +1487,33 @@ (set_attr "type" "icmp") (set_attr "mode" "QI")]) +(define_peephole2 + [(set (match_operand:QI 0 "register_operand") + (match_operand:QI 1 "norex_memory_operand")) + (set (match_operand 3 "flags_reg_operand") + (match_operator 4 "compare_operator" + [(match_dup 0) + (subreg:QI + (any_extract:SWI248 + (match_operand 2 "int248_register_operand") + (const_int 8) + (const_int 8)) 0)]))] + "TARGET_64BIT + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 3) + (match_op_dup 4 + [(match_dup 1) + (subreg:QI + (any_extract:SWI248 + (match_dup 2) + (const_int 8) + (const_int 8)) 0)]))]) + (define_insn "*cmpqi_ext_2" [(set (reg FLAGS_REG) (compare (subreg:QI - (zero_extract:SWI248 + (any_extract:SWI248 (match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)) 0) @@ -1494,31 +1534,68 @@ (const_int 8)) 0) (match_operand:QI 1 "const_int_operand")))]) +(define_insn "*cmpqi_ext_3_mem_rex64" + [(set (reg FLAGS_REG) + (compare + (subreg:QI + (any_extract:SWI248 + (match_operand 0 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)) 0) + (match_operand:QI 1 "norex_memory_operand" "Bn")))] + "TARGET_64BIT && reload_completed + && ix86_match_ccmode (insn, CCmode)" + "cmp{b}\t{%1, %h0|%h0, %1}" + [(set_attr "type" "icmp") + (set_attr "mode" "QI")]) + (define_insn "*cmpqi_ext_3" [(set (reg FLAGS_REG) (compare (subreg:QI - (zero_extract:SWI248 + (any_extract:SWI248 (match_operand 0 "int248_register_operand" "Q,Q") (const_int 8) (const_int 8)) 0) - (match_operand:QI 1 "general_x64constmem_operand" "QnBc,m")))] + (match_operand:QI 1 "general_operand" "QnBc,m")))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %h0|%h0, %1}" [(set_attr "isa" "*,nox64") (set_attr "type" "icmp") (set_attr "mode" "QI")]) +(define_peephole2 + [(set (match_operand:QI 0 "register_operand") + (match_operand:QI 1 "norex_memory_operand")) + (set (match_operand 3 "flags_reg_operand") + (match_operator 4 "compare_operator" + [(subreg:QI + (any_extract:SWI248 + (match_operand 2 "int248_register_operand") + (const_int 8) + (const_int 8)) 0) + (match_dup 0)]))] + "TARGET_64BIT + && peep2_reg_dead_p (2, operands[0])" + [(set (match_dup 3) + (match_op_dup 4 + [(subreg:QI + (any_extract:SWI248 + (match_dup 2) + (const_int 8) + (const_int 8)) 0) + (match_dup 1)]))]) + (define_insn "*cmpqi_ext_4" [(set (reg FLAGS_REG) (compare (subreg:QI - (zero_extract:SWI248 + (any_extract:SWI248 (match_operand 0 "int248_register_operand" "Q") (const_int 8) (const_int 8)) 0) (subreg:QI - (zero_extract:SWI248 + (any_extract:SWI248 (match_operand 1 "int248_register_operand" "Q") (const_int 8) (const_int 8)) 0)))] @@ -3374,9 +3451,6 @@ operands[4] = gen_int_mode (tmp, mode); }) - -(define_code_iterator any_extract [sign_extract zero_extract]) - (define_insn "*insvqi_2" [(set (zero_extract:SWI248 (match_operand 0 "int248_register_operand" "+Q") diff --git a/gcc/testsuite/gcc.target/i386/pr78952-3.c b/gcc/testsuite/gcc.target/i386/pr78952-3.c new file mode 100644 index 0000000..ab00c55 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr78952-3.c @@ -0,0 +1,40 @@ +/* PR target/78952 */ +/* { dg-do compile } */ +/* { dg-require-effective-target nonpic } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-additional-options "-mregparm=1" { target ia32 } } */ +/* { dg-final { scan-assembler-not "mov\[sz]bl" } } */ + +typedef __SIZE_TYPE__ size_t; + +struct S1s +{ + char pad1; + char val; + short pad2; +}; + +extern char ts[256]; + +_Bool foo (struct S1s a, size_t i) +{ + return (ts[i] > a.val); +} + +/* { dg-final { scan-assembler "cmpb\[ \\t]+ts\[^\n]*%.h" } } */ + +struct S1u +{ + unsigned char pad1; + unsigned char val; + unsigned short pad2; +}; + +extern unsigned char tu[256]; + +_Bool bar (struct S1u a, size_t i) +{ + return (tu[i] > a.val); +} + +/* { dg-final { scan-assembler "cmpb\[ \\t]+tu\[^\n]*%.h" } } */ -- cgit v1.1 From 978e8f02e8edebaf21ce32768cce603f650459e4 Mon Sep 17 00:00:00 2001 From: Pan Li Date: Wed, 19 Apr 2023 17:18:20 +0800 Subject: RISC-V: Align IOR optimization MODE_CLASS condition to AND. This patch aligned the MODE_CLASS condition of the IOR to the AND. Then more MODE_CLASS besides SCALAR_INT can able to perform the optimization A | (~A) -> -1 similar to AND operator. For example as below sample code. vbool32_t test_shortcut_for_riscv_vmorn_case_5(vbool32_t v1, size_t vl) { return __riscv_vmorn_mm_b32(v1, v1, vl); } Before this patch: vsetvli a5,zero,e8,mf4,ta,ma vlm.v v24,0(a1) vsetvli zero,a2,e8,mf4,ta,ma vmorn.mm v24,v24,v24 vsetvli a5,zero,e8,mf4,ta,ma vsm.v v24,0(a0) ret After this patch: vsetvli zero,a2,e8,mf4,ta,ma vmset.m v24 vsetvli a5,zero,e8,mf4,ta,ma vsm.v v24,0(a0) ret Or in RTL's perspective, from: (ior:VNx2BI (reg/v:VNx2BI 137 [ v1 ]) (not:VNx2BI (reg/v:VNx2BI 137 [ v1 ]))) to: (const_vector:VNx2BI repeat [ (const_int 1 [0x1]) ]) The similar optimization like VMANDN has enabled already. There should be no difference execpt the operator when compare the VMORN and VMANDN for such kind of optimization. The patch aligns the IOR MODE_CLASS condition of the simplification to the AND operator. gcc/ChangeLog: * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): Align IOR (A | (~A) -> -1) optimization MODE_CLASS condition to AND. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: Update check condition. * gcc.target/riscv/simplify_ior_optimization.c: New test. Signed-off-by: Pan Li --- gcc/simplify-rtx.cc | 4 +- .../gcc.target/riscv/rvv/base/mask_insn_shortcut.c | 3 +- .../gcc.target/riscv/simplify_ior_optimization.c | 50 ++++++++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c (limited to 'gcc') diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc index c57ff33..d4aeebc 100644 --- a/gcc/simplify-rtx.cc +++ b/gcc/simplify-rtx.cc @@ -3370,8 +3370,8 @@ simplify_context::simplify_binary_operation_1 (rtx_code code, if (((GET_CODE (op0) == NOT && rtx_equal_p (XEXP (op0, 0), op1)) || (GET_CODE (op1) == NOT && rtx_equal_p (XEXP (op1, 0), op0))) && ! side_effects_p (op0) - && SCALAR_INT_MODE_P (mode)) - return constm1_rtx; + && GET_MODE_CLASS (mode) != MODE_CC) + return CONSTM1_RTX (mode); /* (ior A C) is C if all bits of A that might be nonzero are on in C. */ if (CONST_INT_P (op1) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c index 83cc4a1..57d0241 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/mask_insn_shortcut.c @@ -233,9 +233,8 @@ vbool64_t test_shortcut_for_riscv_vmxnor_case_6(vbool64_t v1, size_t vl) { /* { dg-final { scan-assembler-not {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */ /* { dg-final { scan-assembler-not {vmor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */ /* { dg-final { scan-assembler-not {vmnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */ -/* { dg-final { scan-assembler-times {vmorn\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 7 } } */ /* { dg-final { scan-assembler-not {vmxnor\.mm\s+v[0-9]+,\s*v[0-9]+} } } */ /* { dg-final { scan-assembler-times {vmclr\.m\s+v[0-9]+} 14 } } */ -/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 7 } } */ +/* { dg-final { scan-assembler-times {vmset\.m\s+v[0-9]+} 14 } } */ /* { dg-final { scan-assembler-times {vmmv\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } */ /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 14 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c new file mode 100644 index 0000000..ec3bd0b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */ + +#include + +uint8_t test_simplify_ior_scalar_case_0 (uint8_t a) +{ + return a | ~a; +} + +uint16_t test_simplify_ior_scalar_case_1 (uint16_t a) +{ + return a | ~a; +} + +uint32_t test_simplify_ior_scalar_case_2 (uint32_t a) +{ + return a | ~a; +} + +uint64_t test_simplify_ior_scalar_case_3 (uint64_t a) +{ + return a | ~a; +} + +int8_t test_simplify_ior_scalar_case_4 (int8_t a) +{ + return a | ~a; +} + +int16_t test_simplify_ior_scalar_case_5 (int16_t a) +{ + return a | ~a; +} + +int32_t test_simplify_ior_scalar_case_6 (int32_t a) +{ + return a | ~a; +} + +int64_t test_simplify_ior_scalar_case_7 (int64_t a) +{ + return a | ~a; +} + +/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*-1} 6 } } */ +/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*255} 1 } } */ +/* { dg-final { scan-assembler-times {li\s+a[0-9]+,\s*65536} 1 } } */ +/* { dg-final { scan-assembler-not {or\s+a[0-9]+} } } */ +/* { dg-final { scan-assembler-not {not\s+a[0-9]+} } } */ -- cgit v1.1 From 9fdea28d6ace8c8e5ac64a55685d310ba8dfa3cf Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Wed, 19 Apr 2023 20:33:46 +0800 Subject: RISC-V: Support 128 bit vector chunk RISC-V has provide different VLEN configuration by different ISA extension like `zve32x`, `zve64x` and `v` zve32x just guarantee the minimal VLEN is 32 bits, zve64x guarantee the minimal VLEN is 64 bits, and v guarantee the minimal VLEN is 128 bits, Current status (without this patch): Zve32x: Mode for one vector register mode is VNx1SImode and VNx1DImode is invalid mode - one vector register could hold 1 + 1x SImode where x is 0~n, so it might hold just one SI Zve64x: Mode for one vector register mode is VNx1DImode or VNx2SImode - one vector register could hold 1 + 1x DImode where x is 0~n, so it might hold just one DI. - one vector register could hold 2 + 2x SImode where x is 0~n, so it might hold just two SI. However `v` extension guarantees the minimal VLEN is 128 bits. We introduce another type/mode mapping for this configure: v: Mode for one vector register mode is VNx2DImode or VNx4SImode - one vector register could hold 2 + 2x DImode where x is 0~n, so it will hold at least two DI - one vector register could hold 4 + 4x SImode where x is 0~n, so it will hold at least four DI This patch model the mode more precisely for the RVV, and help some middle-end optimization that assume number of element must be a multiple of two. gcc/ChangeLog: * config/riscv/riscv-modes.def (FLOAT_MODE): Add chunk 128 support. (VECTOR_BOOL_MODE): Ditto. (ADJUST_NUNITS): Ditto. (ADJUST_ALIGNMENT): Ditto. (ADJUST_BYTESIZE): Ditto. (ADJUST_PRECISION): Ditto. (RVV_MODES): Ditto. (VECTOR_MODE_WITH_PREFIX): Ditto. * config/riscv/riscv-v.cc (ENTRY): Ditto. (get_vlmul): Ditto. (get_ratio): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Ditto. * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Ditto. (vbool64_t): Ditto. (vbool32_t): Ditto. (vbool16_t): Ditto. (vbool8_t): Ditto. (vbool4_t): Ditto. (vbool2_t): Ditto. (vbool1_t): Ditto. (vint8mf8_t): Ditto. (vuint8mf8_t): Ditto. (vint8mf4_t): Ditto. (vuint8mf4_t): Ditto. (vint8mf2_t): Ditto. (vuint8mf2_t): Ditto. (vint8m1_t): Ditto. (vuint8m1_t): Ditto. (vint8m2_t): Ditto. (vuint8m2_t): Ditto. (vint8m4_t): Ditto. (vuint8m4_t): Ditto. (vint8m8_t): Ditto. (vuint8m8_t): Ditto. (vint16mf4_t): Ditto. (vuint16mf4_t): Ditto. (vint16mf2_t): Ditto. (vuint16mf2_t): Ditto. (vint16m1_t): Ditto. (vuint16m1_t): Ditto. (vint16m2_t): Ditto. (vuint16m2_t): Ditto. (vint16m4_t): Ditto. (vuint16m4_t): Ditto. (vint16m8_t): Ditto. (vuint16m8_t): Ditto. (vint32mf2_t): Ditto. (vuint32mf2_t): Ditto. (vint32m1_t): Ditto. (vuint32m1_t): Ditto. (vint32m2_t): Ditto. (vuint32m2_t): Ditto. (vint32m4_t): Ditto. (vuint32m4_t): Ditto. (vint32m8_t): Ditto. (vuint32m8_t): Ditto. (vint64m1_t): Ditto. (vuint64m1_t): Ditto. (vint64m2_t): Ditto. (vuint64m2_t): Ditto. (vint64m4_t): Ditto. (vuint64m4_t): Ditto. (vint64m8_t): Ditto. (vuint64m8_t): Ditto. (vfloat32mf2_t): Ditto. (vfloat32m1_t): Ditto. (vfloat32m2_t): Ditto. (vfloat32m4_t): Ditto. (vfloat32m8_t): Ditto. (vfloat64m1_t): Ditto. (vfloat64m2_t): Ditto. (vfloat64m4_t): Ditto. (vfloat64m8_t): Ditto. * config/riscv/riscv-vector-switch.def (ENTRY): Ditto. * config/riscv/riscv.cc (riscv_legitimize_poly_move): Ditto. (riscv_convert_vector_bits): Ditto. * config/riscv/riscv.md: * config/riscv/vector-iterators.md: * config/riscv/vector.md (@pred_indexed_store): Ditto. (@pred_indexed_store): Ditto. (@pred_indexed_store): Ditto. (@pred_indexed_store): Ditto. (@pred_indexed_store): Ditto. (@pred_reduc_): Ditto. (@pred_widen_reduc_plus): Ditto. (@pred_reduc_plus): Ditto. (@pred_widen_reduc_plus): Ditto. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/pr108185-4.c: Adapt testcase. * gcc.target/riscv/rvv/base/spill-1.c: Ditto. * gcc.target/riscv/rvv/base/spill-11.c: Ditto. * gcc.target/riscv/rvv/base/spill-2.c: Ditto. * gcc.target/riscv/rvv/base/spill-3.c: Ditto. * gcc.target/riscv/rvv/base/spill-5.c: Ditto. * gcc.target/riscv/rvv/base/spill-9.c: Ditto. --- gcc/config/riscv/riscv-modes.def | 89 ++-- gcc/config/riscv/riscv-v.cc | 17 +- gcc/config/riscv/riscv-vector-builtins.cc | 11 +- gcc/config/riscv/riscv-vector-builtins.def | 172 ++++--- gcc/config/riscv/riscv-vector-switch.def | 105 ++-- gcc/config/riscv/riscv.cc | 12 +- gcc/config/riscv/riscv.md | 14 +- gcc/config/riscv/vector-iterators.md | 571 ++++++++++++--------- gcc/config/riscv/vector.md | 233 +++++++-- .../gcc.target/riscv/rvv/base/pr108185-4.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c | 2 +- gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c | 2 +- 16 files changed, 783 insertions(+), 455 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def index 4cf7cf8..b166960 100644 --- a/gcc/config/riscv/riscv-modes.def +++ b/gcc/config/riscv/riscv-modes.def @@ -27,15 +27,16 @@ FLOAT_MODE (TF, 16, ieee_quad_format); /* Encode the ratio of SEW/LMUL into the mask types. There are the following * mask types. */ -/* | Mode | MIN_VLEN = 32 | MIN_VLEN = 64 | - | | SEW/LMUL | SEW/LMUL | - | VNx1BI | 32 | 64 | - | VNx2BI | 16 | 32 | - | VNx4BI | 8 | 16 | - | VNx8BI | 4 | 8 | - | VNx16BI | 2 | 4 | - | VNx32BI | 1 | 2 | - | VNx64BI | N/A | 1 | */ +/* | Mode | MIN_VLEN = 32 | MIN_VLEN = 64 | MIN_VLEN = 128 | + | | SEW/LMUL | SEW/LMUL | SEW/LMUL | + | VNx1BI | 32 | 64 | 128 | + | VNx2BI | 16 | 32 | 64 | + | VNx4BI | 8 | 16 | 32 | + | VNx8BI | 4 | 8 | 16 | + | VNx16BI | 2 | 4 | 8 | + | VNx32BI | 1 | 2 | 4 | + | VNx64BI | N/A | 1 | 2 | + | VNx128BI | N/A | N/A | 1 | */ /* For RVV modes, each boolean value occupies 1-bit. 4th argument is specify the minmial possible size of the vector mode, @@ -47,6 +48,7 @@ VECTOR_BOOL_MODE (VNx8BI, 8, BI, 1); VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2); VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4); VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8); +VECTOR_BOOL_MODE (VNx128BI, 128, BI, 16); ADJUST_NUNITS (VNx1BI, riscv_v_adjust_nunits (VNx1BImode, 1)); ADJUST_NUNITS (VNx2BI, riscv_v_adjust_nunits (VNx2BImode, 2)); @@ -55,6 +57,7 @@ ADJUST_NUNITS (VNx8BI, riscv_v_adjust_nunits (VNx8BImode, 8)); ADJUST_NUNITS (VNx16BI, riscv_v_adjust_nunits (VNx16BImode, 16)); ADJUST_NUNITS (VNx32BI, riscv_v_adjust_nunits (VNx32BImode, 32)); ADJUST_NUNITS (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 64)); +ADJUST_NUNITS (VNx128BI, riscv_v_adjust_nunits (VNx128BImode, 128)); ADJUST_ALIGNMENT (VNx1BI, 1); ADJUST_ALIGNMENT (VNx2BI, 1); @@ -63,6 +66,7 @@ ADJUST_ALIGNMENT (VNx8BI, 1); ADJUST_ALIGNMENT (VNx16BI, 1); ADJUST_ALIGNMENT (VNx32BI, 1); ADJUST_ALIGNMENT (VNx64BI, 1); +ADJUST_ALIGNMENT (VNx128BI, 1); ADJUST_BYTESIZE (VNx1BI, riscv_v_adjust_bytesize (VNx1BImode, 1)); ADJUST_BYTESIZE (VNx2BI, riscv_v_adjust_bytesize (VNx2BImode, 1)); @@ -71,6 +75,7 @@ ADJUST_BYTESIZE (VNx8BI, riscv_v_adjust_bytesize (VNx8BImode, 1)); ADJUST_BYTESIZE (VNx16BI, riscv_v_adjust_bytesize (VNx16BImode, 2)); ADJUST_BYTESIZE (VNx32BI, riscv_v_adjust_bytesize (VNx32BImode, 4)); ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_bytesize (VNx64BImode, 8)); +ADJUST_BYTESIZE (VNx128BI, riscv_v_adjust_bytesize (VNx128BImode, 16)); ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1)); ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2)); @@ -79,38 +84,47 @@ ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8)); ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16)); ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32)); ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64)); +ADJUST_PRECISION (VNx128BI, riscv_v_adjust_precision (VNx128BImode, 128)); /* - | Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 | - | | LMUL | SEW/LMUL | LMUL | SEW/LMUL | - | VNx1QI | MF4 | 32 | MF8 | 64 | - | VNx2QI | MF2 | 16 | MF4 | 32 | - | VNx4QI | M1 | 8 | MF2 | 16 | - | VNx8QI | M2 | 4 | M1 | 8 | - | VNx16QI | M4 | 2 | M2 | 4 | - | VNx32QI | M8 | 1 | M4 | 2 | - | VNx64QI | N/A | N/A | M8 | 1 | - | VNx1(HI|HF) | MF2 | 32 | MF4 | 64 | - | VNx2(HI|HF) | M1 | 16 | MF2 | 32 | - | VNx4(HI|HF) | M2 | 8 | M1 | 16 | - | VNx8(HI|HF) | M4 | 4 | M2 | 8 | - | VNx16(HI|HF)| M8 | 2 | M4 | 4 | - | VNx32(HI|HF)| N/A | N/A | M8 | 2 | - | VNx1(SI|SF) | M1 | 32 | MF2 | 64 | - | VNx2(SI|SF) | M2 | 16 | M1 | 32 | - | VNx4(SI|SF) | M4 | 8 | M2 | 16 | - | VNx8(SI|SF) | M8 | 4 | M4 | 8 | - | VNx16(SI|SF)| N/A | N/A | M8 | 4 | - | VNx1(DI|DF) | N/A | N/A | M1 | 64 | - | VNx2(DI|DF) | N/A | N/A | M2 | 32 | - | VNx4(DI|DF) | N/A | N/A | M4 | 16 | - | VNx8(DI|DF) | N/A | N/A | M8 | 8 | + | Mode | MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 | MIN_VLEN=128 | MIN_VLEN=128 | + | | LMUL | SEW/LMUL | LMUL | SEW/LMUL | LMUL | SEW/LMUL | + | VNx1QI | MF4 | 32 | MF8 | 64 | N/A | N/A | + | VNx2QI | MF2 | 16 | MF4 | 32 | MF8 | 64 | + | VNx4QI | M1 | 8 | MF2 | 16 | MF4 | 32 | + | VNx8QI | M2 | 4 | M1 | 8 | MF2 | 16 | + | VNx16QI | M4 | 2 | M2 | 4 | M1 | 8 | + | VNx32QI | M8 | 1 | M4 | 2 | M2 | 4 | + | VNx64QI | N/A | N/A | M8 | 1 | M4 | 2 | + | VNx128QI | N/A | N/A | N/A | N/A | M8 | 1 | + | VNx1(HI|HF) | MF2 | 32 | MF4 | 64 | N/A | N/A | + | VNx2(HI|HF) | M1 | 16 | MF2 | 32 | MF4 | 64 | + | VNx4(HI|HF) | M2 | 8 | M1 | 16 | MF2 | 32 | + | VNx8(HI|HF) | M4 | 4 | M2 | 8 | M1 | 16 | + | VNx16(HI|HF)| M8 | 2 | M4 | 4 | M2 | 8 | + | VNx32(HI|HF)| N/A | N/A | M8 | 2 | M4 | 4 | + | VNx64(HI|HF)| N/A | N/A | N/A | N/A | M8 | 2 | + | VNx1(SI|SF) | M1 | 32 | MF2 | 64 | MF2 | 64 | + | VNx2(SI|SF) | M2 | 16 | M1 | 32 | M1 | 32 | + | VNx4(SI|SF) | M4 | 8 | M2 | 16 | M2 | 16 | + | VNx8(SI|SF) | M8 | 4 | M4 | 8 | M4 | 8 | + | VNx16(SI|SF)| N/A | N/A | M8 | 4 | M8 | 4 | + | VNx1(DI|DF) | N/A | N/A | M1 | 64 | N/A | N/A | + | VNx2(DI|DF) | N/A | N/A | M2 | 32 | M1 | 64 | + | VNx4(DI|DF) | N/A | N/A | M4 | 16 | M2 | 32 | + | VNx8(DI|DF) | N/A | N/A | M8 | 8 | M4 | 16 | + | VNx16(DI|DF)| N/A | N/A | N/A | N/A | M8 | 8 | */ /* Define RVV modes whose sizes are multiples of 64-bit chunks. */ #define RVV_MODES(NVECS, VB, VH, VS, VD) \ - VECTOR_MODES_WITH_PREFIX (VNx, INT, 8 * NVECS, 0); \ - VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, INT, QI, 8 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, INT, HI, 4 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, HF, 4 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, INT, SI, 2 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, SF, 2 * NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, INT, DI, NVECS, 0); \ + VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, DF, NVECS, 0); \ \ ADJUST_NUNITS (VB##QI, riscv_v_adjust_nunits (VB##QI##mode, NVECS * 8)); \ ADJUST_NUNITS (VH##HI, riscv_v_adjust_nunits (VH##HI##mode, NVECS * 4)); \ @@ -128,14 +142,11 @@ ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64)); ADJUST_ALIGNMENT (VS##SF, 4); \ ADJUST_ALIGNMENT (VD##DF, 8); -/* 'VECTOR_MODES_WITH_PREFIX' does not allow ncomponents < 2. - So we use 'VECTOR_MODE_WITH_PREFIX' to define VNx1DImode and VNx1DFmode. */ -VECTOR_MODE_WITH_PREFIX (VNx, INT, DI, 1, 0); -VECTOR_MODE_WITH_PREFIX (VNx, FLOAT, DF, 1, 0); RVV_MODES (1, VNx8, VNx4, VNx2, VNx1) RVV_MODES (2, VNx16, VNx8, VNx4, VNx2) RVV_MODES (4, VNx32, VNx16, VNx8, VNx4) RVV_MODES (8, VNx64, VNx32, VNx16, VNx8) +RVV_MODES (16, VNx128, VNx64, VNx32, VNx16) VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 0); VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 0); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 392f5d0..99c414c 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -340,14 +340,19 @@ struct mode_vtype_group uint8_t ratio_for_min_vlen32[NUM_MACHINE_MODES]; enum vlmul_type vlmul_for_min_vlen64[NUM_MACHINE_MODES]; uint8_t ratio_for_min_vlen64[NUM_MACHINE_MODES]; + enum vlmul_type vlmul_for_for_vlen128[NUM_MACHINE_MODES]; + uint8_t ratio_for_for_vlen128[NUM_MACHINE_MODES]; mode_vtype_group () { #define ENTRY(MODE, REQUIREMENT, VLMUL_FOR_MIN_VLEN32, RATIO_FOR_MIN_VLEN32, \ - VLMUL_FOR_MIN_VLEN64, RATIO_FOR_MIN_VLEN64) \ + VLMUL_FOR_MIN_VLEN64, RATIO_FOR_MIN_VLEN64, \ + VLMUL_FOR_FOR_VLEN128, RATIO_FOR_FOR_VLEN128) \ vlmul_for_min_vlen32[MODE##mode] = VLMUL_FOR_MIN_VLEN32; \ ratio_for_min_vlen32[MODE##mode] = RATIO_FOR_MIN_VLEN32; \ vlmul_for_min_vlen64[MODE##mode] = VLMUL_FOR_MIN_VLEN64; \ - ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; + ratio_for_min_vlen64[MODE##mode] = RATIO_FOR_MIN_VLEN64; \ + vlmul_for_for_vlen128[MODE##mode] = VLMUL_FOR_FOR_VLEN128; \ + ratio_for_for_vlen128[MODE##mode] = RATIO_FOR_FOR_VLEN128; #include "riscv-vector-switch.def" } }; @@ -358,7 +363,9 @@ static mode_vtype_group mode_vtype_infos; enum vlmul_type get_vlmul (machine_mode mode) { - if (TARGET_MIN_VLEN == 32) + if (TARGET_MIN_VLEN >= 128) + return mode_vtype_infos.vlmul_for_for_vlen128[mode]; + else if (TARGET_MIN_VLEN == 32) return mode_vtype_infos.vlmul_for_min_vlen32[mode]; else return mode_vtype_infos.vlmul_for_min_vlen64[mode]; @@ -368,7 +375,9 @@ get_vlmul (machine_mode mode) unsigned int get_ratio (machine_mode mode) { - if (TARGET_MIN_VLEN == 32) + if (TARGET_MIN_VLEN >= 128) + return mode_vtype_infos.ratio_for_for_vlen128[mode]; + else if (TARGET_MIN_VLEN == 32) return mode_vtype_infos.ratio_for_min_vlen32[mode]; else return mode_vtype_infos.ratio_for_min_vlen64[mode]; diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index 01cea23..434bd8e 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -107,7 +107,8 @@ const char *const operand_suffixes[NUM_OP_TYPES] = { /* Static information about type suffix for each RVV type. */ const rvv_builtin_suffixes type_suffixes[NUM_VECTOR_TYPES + 1] = { -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, \ + VECTOR_MODE_MIN_VLEN_128, VECTOR_MODE_MIN_VLEN_64, \ VECTOR_MODE_MIN_VLEN_32, VECTOR_SUFFIX, SCALAR_SUFFIX, \ VSETVL_SUFFIX) \ {#VECTOR_SUFFIX, #SCALAR_SUFFIX, #VSETVL_SUFFIX}, @@ -2350,10 +2351,12 @@ register_builtin_types () tree int64_type_node = get_typenode_from_name (INT64_TYPE); machine_mode mode; -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, \ + VECTOR_MODE_MIN_VLEN_128, VECTOR_MODE_MIN_VLEN_64, \ VECTOR_MODE_MIN_VLEN_32, ARGS...) \ - mode = TARGET_MIN_VLEN > 32 ? VECTOR_MODE##mode \ - : VECTOR_MODE_MIN_VLEN_32##mode; \ + mode = TARGET_MIN_VLEN >= 128 ? VECTOR_MODE_MIN_VLEN_128##mode \ + : TARGET_MIN_VLEN >= 64 ? VECTOR_MODE_MIN_VLEN_64##mode \ + : VECTOR_MODE_MIN_VLEN_32##mode; \ register_builtin_type (VECTOR_TYPE_##NAME, SCALAR_TYPE##_type_node, mode); #include "riscv-vector-builtins.def" } diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index 563ad35..64c09b5 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -42,7 +42,8 @@ along with GCC; see the file COPYING3. If not see */ #ifndef DEF_RVV_TYPE -#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, VECTOR_MODE, \ +#define DEF_RVV_TYPE(NAME, NCHARS, ABI_NAME, SCALAR_TYPE, \ + VECTOR_MODE_MIN_VLEN_128, VECTOR_MODE_MIN_VLEN_64, \ VECTOR_MODE_MIN_VLEN_32, VECTOR_SUFFIX, SCALAR_SUFFIX, \ VSETVL_SUFFIX) #endif @@ -79,212 +80,247 @@ along with GCC; see the file COPYING3. If not see #endif /* SEW/LMUL = 64: - Only enable when TARGET_MIN_VLEN > 32 and machine mode = VNx1BImode. */ -DEF_RVV_TYPE (vbool64_t, 14, __rvv_bool64_t, boolean, VNx1BI, VOID, _b64, , ) + Only enable when TARGET_MIN_VLEN > 32. + Machine mode = VNx1BImode when TARGET_MIN_VLEN < 128. + Machine mode = VNx2BImode when TARGET_MIN_VLEN >= 128. */ +DEF_RVV_TYPE (vbool64_t, 14, __rvv_bool64_t, boolean, VNx2BI, VNx1BI, VOID, _b64, , ) /* SEW/LMUL = 32: Machine mode = VNx2BImode when TARGET_MIN_VLEN > 32. Machine mode = VNx1BImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vbool32_t, 14, __rvv_bool32_t, boolean, VNx2BI, VNx1BI, _b32, , ) +DEF_RVV_TYPE (vbool32_t, 14, __rvv_bool32_t, boolean, VNx4BI, VNx2BI, VNx1BI, _b32, , ) /* SEW/LMUL = 16: + Machine mode = VNx8BImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx2BImode when TARGET_MIN_VLEN = 32. Machine mode = VNx4BImode when TARGET_MIN_VLEN > 32. */ -DEF_RVV_TYPE (vbool16_t, 14, __rvv_bool16_t, boolean, VNx4BI, VNx2BI, _b16, , ) +DEF_RVV_TYPE (vbool16_t, 14, __rvv_bool16_t, boolean, VNx8BI, VNx4BI, VNx2BI, _b16, , ) /* SEW/LMUL = 8: + Machine mode = VNx16BImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx8BImode when TARGET_MIN_VLEN > 32. Machine mode = VNx4BImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vbool8_t, 13, __rvv_bool8_t, boolean, VNx8BI, VNx4BI, _b8, , ) +DEF_RVV_TYPE (vbool8_t, 13, __rvv_bool8_t, boolean, VNx16BI, VNx8BI, VNx4BI, _b8, , ) /* SEW/LMUL = 4: + Machine mode = VNx32BImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx16BImode when TARGET_MIN_VLEN > 32. Machine mode = VNx8BImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vbool4_t, 13, __rvv_bool4_t, boolean, VNx16BI, VNx8BI, _b4, , ) +DEF_RVV_TYPE (vbool4_t, 13, __rvv_bool4_t, boolean, VNx32BI, VNx16BI, VNx8BI, _b4, , ) /* SEW/LMUL = 2: + Machine mode = VNx64BImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx32BImode when TARGET_MIN_VLEN > 32. Machine mode = VNx16BImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vbool2_t, 13, __rvv_bool2_t, boolean, VNx32BI, VNx16BI, _b2, , ) +DEF_RVV_TYPE (vbool2_t, 13, __rvv_bool2_t, boolean, VNx64BI, VNx32BI, VNx16BI, _b2, , ) /* SEW/LMUL = 1: + Machine mode = VNx128BImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx64BImode when TARGET_MIN_VLEN > 32. Machine mode = VNx32BImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vbool1_t, 13, __rvv_bool1_t, boolean, VNx64BI, VNx32BI, _b1, , ) +DEF_RVV_TYPE (vbool1_t, 13, __rvv_bool1_t, boolean, VNx128BI, VNx64BI, VNx32BI, _b1, , ) /* LMUL = 1/8: - Only enble when TARGET_MIN_VLEN > 32 and machine mode = VNx1QImode. */ -DEF_RVV_TYPE (vint8mf8_t, 15, __rvv_int8mf8_t, int8, VNx1QI, VOID, _i8mf8, _i8, + Only enble when TARGET_MIN_VLEN > 32. + Machine mode = VNx1QImode when TARGET_MIN_VLEN < 128. + Machine mode = VNx2QImode when TARGET_MIN_VLEN >= 128. */ +DEF_RVV_TYPE (vint8mf8_t, 15, __rvv_int8mf8_t, int8, VNx2QI, VNx1QI, VOID, _i8mf8, _i8, _e8mf8) -DEF_RVV_TYPE (vuint8mf8_t, 16, __rvv_uint8mf8_t, uint8, VNx1QI, VOID, _u8mf8, +DEF_RVV_TYPE (vuint8mf8_t, 16, __rvv_uint8mf8_t, uint8, VNx2QI, VNx1QI, VOID, _u8mf8, _u8, _e8mf8) /* LMUL = 1/4: + Machine mode = VNx4QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx2QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx1QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8mf4_t, 15, __rvv_int8mf4_t, int8, VNx2QI, VNx1QI, _i8mf4, +DEF_RVV_TYPE (vint8mf4_t, 15, __rvv_int8mf4_t, int8, VNx4QI, VNx2QI, VNx1QI, _i8mf4, _i8, _e8mf4) -DEF_RVV_TYPE (vuint8mf4_t, 16, __rvv_uint8mf4_t, uint8, VNx2QI, VNx1QI, _u8mf4, +DEF_RVV_TYPE (vuint8mf4_t, 16, __rvv_uint8mf4_t, uint8, VNx4QI, VNx2QI, VNx1QI, _u8mf4, _u8, _e8mf4) /* LMUL = 1/2: + Machine mode = VNx8QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx4QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx2QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8mf2_t, 15, __rvv_int8mf2_t, int8, VNx4QI, VNx2QI, _i8mf2, +DEF_RVV_TYPE (vint8mf2_t, 15, __rvv_int8mf2_t, int8, VNx8QI, VNx4QI, VNx2QI, _i8mf2, _i8, _e8mf2) -DEF_RVV_TYPE (vuint8mf2_t, 16, __rvv_uint8mf2_t, uint8, VNx4QI, VNx2QI, _u8mf2, +DEF_RVV_TYPE (vuint8mf2_t, 16, __rvv_uint8mf2_t, uint8, VNx8QI, VNx4QI, VNx2QI, _u8mf2, _u8, _e8mf2) /* LMUL = 1: + Machine mode = VNx16QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx8QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx4QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8m1_t, 14, __rvv_int8m1_t, int8, VNx8QI, VNx4QI, _i8m1, _i8, +DEF_RVV_TYPE (vint8m1_t, 14, __rvv_int8m1_t, int8, VNx16QI, VNx8QI, VNx4QI, _i8m1, _i8, _e8m1) -DEF_RVV_TYPE (vuint8m1_t, 15, __rvv_uint8m1_t, uint8, VNx8QI, VNx4QI, _u8m1, +DEF_RVV_TYPE (vuint8m1_t, 15, __rvv_uint8m1_t, uint8, VNx16QI, VNx8QI, VNx4QI, _u8m1, _u8, _e8m1) /* LMUL = 2: + Machine mode = VNx32QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx16QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx8QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8m2_t, 14, __rvv_int8m2_t, int8, VNx16QI, VNx8QI, _i8m2, _i8, +DEF_RVV_TYPE (vint8m2_t, 14, __rvv_int8m2_t, int8, VNx32QI, VNx16QI, VNx8QI, _i8m2, _i8, _e8m2) -DEF_RVV_TYPE (vuint8m2_t, 15, __rvv_uint8m2_t, uint8, VNx16QI, VNx8QI, _u8m2, +DEF_RVV_TYPE (vuint8m2_t, 15, __rvv_uint8m2_t, uint8, VNx32QI, VNx16QI, VNx8QI, _u8m2, _u8, _e8m2) /* LMUL = 4: + Machine mode = VNx64QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx32QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx16QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8m4_t, 14, __rvv_int8m4_t, int8, VNx32QI, VNx16QI, _i8m4, _i8, +DEF_RVV_TYPE (vint8m4_t, 14, __rvv_int8m4_t, int8, VNx64QI, VNx32QI, VNx16QI, _i8m4, _i8, _e8m4) -DEF_RVV_TYPE (vuint8m4_t, 15, __rvv_uint8m4_t, uint8, VNx32QI, VNx16QI, _u8m4, +DEF_RVV_TYPE (vuint8m4_t, 15, __rvv_uint8m4_t, uint8, VNx64QI, VNx32QI, VNx16QI, _u8m4, _u8, _e8m4) /* LMUL = 8: + Machine mode = VNx128QImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx64QImode when TARGET_MIN_VLEN > 32. Machine mode = VNx32QImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint8m8_t, 14, __rvv_int8m8_t, int8, VNx64QI, VNx32QI, _i8m8, _i8, +DEF_RVV_TYPE (vint8m8_t, 14, __rvv_int8m8_t, int8, VNx128QI, VNx64QI, VNx32QI, _i8m8, _i8, _e8m8) -DEF_RVV_TYPE (vuint8m8_t, 15, __rvv_uint8m8_t, uint8, VNx64QI, VNx32QI, _u8m8, +DEF_RVV_TYPE (vuint8m8_t, 15, __rvv_uint8m8_t, uint8, VNx128QI, VNx64QI, VNx32QI, _u8m8, _u8, _e8m8) /* LMUL = 1/4: - Only enble when TARGET_MIN_VLEN > 32 and machine mode = VNx1HImode. */ -DEF_RVV_TYPE (vint16mf4_t, 16, __rvv_int16mf4_t, int16, VNx1HI, VOID, _i16mf4, + Only enble when TARGET_MIN_VLEN > 32. + Machine mode = VNx1HImode when TARGET_MIN_VLEN < 128. + Machine mode = VNx2HImode when TARGET_MIN_VLEN >= 128. */ +DEF_RVV_TYPE (vint16mf4_t, 16, __rvv_int16mf4_t, int16, VNx2HI, VNx1HI, VOID, _i16mf4, _i16, _e16mf4) -DEF_RVV_TYPE (vuint16mf4_t, 17, __rvv_uint16mf4_t, uint16, VNx1HI, VOID, +DEF_RVV_TYPE (vuint16mf4_t, 17, __rvv_uint16mf4_t, uint16, VNx2HI, VNx1HI, VOID, _u16mf4, _u16, _e16mf4) /* LMUL = 1/2: + Machine mode = VNx4HImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx2HImode when TARGET_MIN_VLEN > 32. Machine mode = VNx1HImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint16mf2_t, 16, __rvv_int16mf2_t, int16, VNx2HI, VNx1HI, _i16mf2, +DEF_RVV_TYPE (vint16mf2_t, 16, __rvv_int16mf2_t, int16, VNx4HI, VNx2HI, VNx1HI, _i16mf2, _i16, _e16mf2) -DEF_RVV_TYPE (vuint16mf2_t, 17, __rvv_uint16mf2_t, uint16, VNx2HI, VNx1HI, +DEF_RVV_TYPE (vuint16mf2_t, 17, __rvv_uint16mf2_t, uint16, VNx4HI, VNx2HI, VNx1HI, _u16mf2, _u16, _e16mf2) /* LMUL = 1: + Machine mode = VNx8HImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx4HImode when TARGET_MIN_VLEN > 32. Machine mode = VNx2HImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint16m1_t, 15, __rvv_int16m1_t, int16, VNx4HI, VNx2HI, _i16m1, +DEF_RVV_TYPE (vint16m1_t, 15, __rvv_int16m1_t, int16, VNx8HI, VNx4HI, VNx2HI, _i16m1, _i16, _e16m1) -DEF_RVV_TYPE (vuint16m1_t, 16, __rvv_uint16m1_t, uint16, VNx4HI, VNx2HI, _u16m1, +DEF_RVV_TYPE (vuint16m1_t, 16, __rvv_uint16m1_t, uint16, VNx8HI, VNx4HI, VNx2HI, _u16m1, _u16, _e16m1) /* LMUL = 2: + Machine mode = VNx16HImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx8HImode when TARGET_MIN_VLEN > 32. Machine mode = VNx4HImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint16m2_t, 15, __rvv_int16m2_t, int16, VNx8HI, VNx4HI, _i16m2, +DEF_RVV_TYPE (vint16m2_t, 15, __rvv_int16m2_t, int16, VNx16HI, VNx8HI, VNx4HI, _i16m2, _i16, _e16m2) -DEF_RVV_TYPE (vuint16m2_t, 16, __rvv_uint16m2_t, uint16, VNx8HI, VNx4HI, _u16m2, +DEF_RVV_TYPE (vuint16m2_t, 16, __rvv_uint16m2_t, uint16, VNx16HI, VNx8HI, VNx4HI, _u16m2, _u16, _e16m2) /* LMUL = 4: + Machine mode = VNx32HImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx16HImode when TARGET_MIN_VLEN > 32. Machine mode = VNx8HImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint16m4_t, 15, __rvv_int16m4_t, int16, VNx16HI, VNx8HI, _i16m4, +DEF_RVV_TYPE (vint16m4_t, 15, __rvv_int16m4_t, int16, VNx32HI, VNx16HI, VNx8HI, _i16m4, _i16, _e16m4) -DEF_RVV_TYPE (vuint16m4_t, 16, __rvv_uint16m4_t, uint16, VNx16HI, VNx8HI, +DEF_RVV_TYPE (vuint16m4_t, 16, __rvv_uint16m4_t, uint16, VNx32HI, VNx16HI, VNx8HI, _u16m4, _u16, _e16m4) /* LMUL = 8: + Machine mode = VNx64HImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx32HImode when TARGET_MIN_VLEN > 32. Machine mode = VNx16HImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint16m8_t, 15, __rvv_int16m8_t, int16, VNx32HI, VNx16HI, _i16m8, +DEF_RVV_TYPE (vint16m8_t, 15, __rvv_int16m8_t, int16, VNx64HI, VNx32HI, VNx16HI, _i16m8, _i16, _e16m8) -DEF_RVV_TYPE (vuint16m8_t, 16, __rvv_uint16m8_t, uint16, VNx32HI, VNx16HI, +DEF_RVV_TYPE (vuint16m8_t, 16, __rvv_uint16m8_t, uint16, VNx64HI, VNx32HI, VNx16HI, _u16m8, _u16, _e16m8) /* LMUL = 1/2: - Only enble when TARGET_MIN_VLEN > 32 and machine mode = VNx1SImode. */ -DEF_RVV_TYPE (vint32mf2_t, 16, __rvv_int32mf2_t, int32, VNx1SI, VOID, _i32mf2, + Only enble when TARGET_MIN_VLEN > 32. + Machine mode = VNx1SImode when TARGET_MIN_VLEN < 128. + Machine mode = VNx2SImode when TARGET_MIN_VLEN >= 128. */ +DEF_RVV_TYPE (vint32mf2_t, 16, __rvv_int32mf2_t, int32, VNx2SI, VNx1SI, VOID, _i32mf2, _i32, _e32mf2) -DEF_RVV_TYPE (vuint32mf2_t, 17, __rvv_uint32mf2_t, uint32, VNx1SI, VOID, +DEF_RVV_TYPE (vuint32mf2_t, 17, __rvv_uint32mf2_t, uint32, VNx2SI, VNx1SI, VOID, _u32mf2, _u32, _e32mf2) /* LMUL = 1: + Machine mode = VNx4SImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx2SImode when TARGET_MIN_VLEN > 32. Machine mode = VNx1SImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint32m1_t, 15, __rvv_int32m1_t, int32, VNx2SI, VNx1SI, _i32m1, +DEF_RVV_TYPE (vint32m1_t, 15, __rvv_int32m1_t, int32, VNx4SI, VNx2SI, VNx1SI, _i32m1, _i32, _e32m1) -DEF_RVV_TYPE (vuint32m1_t, 16, __rvv_uint32m1_t, uint32, VNx2SI, VNx1SI, _u32m1, +DEF_RVV_TYPE (vuint32m1_t, 16, __rvv_uint32m1_t, uint32, VNx4SI, VNx2SI, VNx1SI, _u32m1, _u32, _e32m1) /* LMUL = 2: + Machine mode = VNx8SImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx4SImode when TARGET_MIN_VLEN > 32. Machine mode = VNx2SImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint32m2_t, 15, __rvv_int32m2_t, int32, VNx4SI, VNx2SI, _i32m2, +DEF_RVV_TYPE (vint32m2_t, 15, __rvv_int32m2_t, int32, VNx8SI, VNx4SI, VNx2SI, _i32m2, _i32, _e32m2) -DEF_RVV_TYPE (vuint32m2_t, 16, __rvv_uint32m2_t, uint32, VNx4SI, VNx2SI, _u32m2, +DEF_RVV_TYPE (vuint32m2_t, 16, __rvv_uint32m2_t, uint32, VNx8SI, VNx4SI, VNx2SI, _u32m2, _u32, _e32m2) /* LMUL = 4: + Machine mode = VNx16SImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx8SImode when TARGET_MIN_VLEN > 32. Machine mode = VNx4SImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint32m4_t, 15, __rvv_int32m4_t, int32, VNx8SI, VNx4SI, _i32m4, +DEF_RVV_TYPE (vint32m4_t, 15, __rvv_int32m4_t, int32, VNx16SI, VNx8SI, VNx4SI, _i32m4, _i32, _e32m4) -DEF_RVV_TYPE (vuint32m4_t, 16, __rvv_uint32m4_t, uint32, VNx8SI, VNx4SI, _u32m4, +DEF_RVV_TYPE (vuint32m4_t, 16, __rvv_uint32m4_t, uint32, VNx16SI, VNx8SI, VNx4SI, _u32m4, _u32, _e32m4) /* LMUL = 8: + Machine mode = VNx32SImode when TARGET_MIN_VLEN >= 128. Machine mode = VNx16SImode when TARGET_MIN_VLEN > 32. Machine mode = VNx8SImode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vint32m8_t, 15, __rvv_int32m8_t, int32, VNx16SI, VNx8SI, _i32m8, +DEF_RVV_TYPE (vint32m8_t, 15, __rvv_int32m8_t, int32, VNx32SI, VNx16SI, VNx8SI, _i32m8, _i32, _e32m8) -DEF_RVV_TYPE (vuint32m8_t, 16, __rvv_uint32m8_t, uint32, VNx16SI, VNx8SI, +DEF_RVV_TYPE (vuint32m8_t, 16, __rvv_uint32m8_t, uint32, VNx32SI, VNx16SI, VNx8SI, _u32m8, _u32, _e32m8) /* SEW = 64: - Enable when TARGET_MIN_VLEN > 32. */ -DEF_RVV_TYPE (vint64m1_t, 15, __rvv_int64m1_t, int64, VNx1DI, VOID, _i64m1, + Disable when !TARGET_VECTOR_ELEN_64. */ +DEF_RVV_TYPE (vint64m1_t, 15, __rvv_int64m1_t, int64, VNx2DI, VNx1DI, VOID, _i64m1, _i64, _e64m1) -DEF_RVV_TYPE (vuint64m1_t, 16, __rvv_uint64m1_t, uint64, VNx1DI, VOID, _u64m1, +DEF_RVV_TYPE (vuint64m1_t, 16, __rvv_uint64m1_t, uint64, VNx2DI, VNx1DI, VOID, _u64m1, _u64, _e64m1) -DEF_RVV_TYPE (vint64m2_t, 15, __rvv_int64m2_t, int64, VNx2DI, VOID, _i64m2, +DEF_RVV_TYPE (vint64m2_t, 15, __rvv_int64m2_t, int64, VNx4DI, VNx2DI, VOID, _i64m2, _i64, _e64m2) -DEF_RVV_TYPE (vuint64m2_t, 16, __rvv_uint64m2_t, uint64, VNx2DI, VOID, _u64m2, +DEF_RVV_TYPE (vuint64m2_t, 16, __rvv_uint64m2_t, uint64, VNx4DI, VNx2DI, VOID, _u64m2, _u64, _e64m2) -DEF_RVV_TYPE (vint64m4_t, 15, __rvv_int64m4_t, int64, VNx4DI, VOID, _i64m4, +DEF_RVV_TYPE (vint64m4_t, 15, __rvv_int64m4_t, int64, VNx8DI, VNx4DI, VOID, _i64m4, _i64, _e64m4) -DEF_RVV_TYPE (vuint64m4_t, 16, __rvv_uint64m4_t, uint64, VNx4DI, VOID, _u64m4, +DEF_RVV_TYPE (vuint64m4_t, 16, __rvv_uint64m4_t, uint64, VNx8DI, VNx4DI, VOID, _u64m4, _u64, _e64m4) -DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, int64, VNx8DI, VOID, _i64m8, +DEF_RVV_TYPE (vint64m8_t, 15, __rvv_int64m8_t, int64, VNx16DI, VNx8DI, VOID, _i64m8, _i64, _e64m8) -DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx8DI, VOID, _u64m8, +DEF_RVV_TYPE (vuint64m8_t, 16, __rvv_uint64m8_t, uint64, VNx16DI, VNx8DI, VOID, _u64m8, _u64, _e64m8) +/* Disable all when !TARGET_VECTOR_ELEN_FP_32. */ /* LMUL = 1/2: - Only enble when TARGET_MIN_VLEN > 32 and machine mode = VNx1SFmode. */ -DEF_RVV_TYPE (vfloat32mf2_t, 18, __rvv_float32mf2_t, float, VNx1SF, VOID, + Only enble when TARGET_MIN_VLEN > 32. + Machine mode = VNx1SFmode when TARGET_MIN_VLEN < 128. + Machine mode = VNx2SFmode when TARGET_MIN_VLEN >= 128. */ +DEF_RVV_TYPE (vfloat32mf2_t, 18, __rvv_float32mf2_t, float, VNx2SF, VNx1SF, VOID, _f32mf2, _f32, _e32mf2) /* LMUL = 1: + Machine mode = VNx4SFmode when TARGET_MIN_VLEN >= 128. Machine mode = VNx2SFmode when TARGET_MIN_VLEN > 32. Machine mode = VNx1SFmode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vfloat32m1_t, 17, __rvv_float32m1_t, float, VNx2SF, VNx1SF, +DEF_RVV_TYPE (vfloat32m1_t, 17, __rvv_float32m1_t, float, VNx4SF, VNx2SF, VNx1SF, _f32m1, _f32, _e32m1) /* LMUL = 2: + Machine mode = VNx8SFmode when TARGET_MIN_VLEN >= 128. Machine mode = VNx4SFmode when TARGET_MIN_VLEN > 32. Machine mode = VNx2SFmode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vfloat32m2_t, 17, __rvv_float32m2_t, float, VNx4SF, VNx2SF, +DEF_RVV_TYPE (vfloat32m2_t, 17, __rvv_float32m2_t, float, VNx8SF, VNx4SF, VNx2SF, _f32m2, _f32, _e32m2) /* LMUL = 4: + Machine mode = VNx16SFmode when TARGET_MIN_VLEN >= 128. Machine mode = VNx8SFmode when TARGET_MIN_VLEN > 32. Machine mode = VNx4SFmode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vfloat32m4_t, 17, __rvv_float32m4_t, float, VNx8SF, VNx4SF, +DEF_RVV_TYPE (vfloat32m4_t, 17, __rvv_float32m4_t, float, VNx16SF, VNx8SF, VNx4SF, _f32m4, _f32, _e32m4) /* LMUL = 8: + Machine mode = VNx32SFmode when TARGET_MIN_VLEN >= 128. Machine mode = VNx16SFmode when TARGET_MIN_VLEN > 32. Machine mode = VNx8SFmode when TARGET_MIN_VLEN = 32. */ -DEF_RVV_TYPE (vfloat32m8_t, 17, __rvv_float32m8_t, float, VNx16SF, VNx8SF, +DEF_RVV_TYPE (vfloat32m8_t, 17, __rvv_float32m8_t, float, VNx32SF, VNx16SF, VNx8SF, _f32m8, _f32, _e32m8) /* SEW = 64: - Enable when TARGET_VECTOR_FP64. */ -DEF_RVV_TYPE (vfloat64m1_t, 17, __rvv_float64m1_t, double, VNx1DF, VOID, _f64m1, + Disable when !TARGET_VECTOR_ELEN_FP_64. */ +DEF_RVV_TYPE (vfloat64m1_t, 17, __rvv_float64m1_t, double, VNx2DF, VNx1DF, VOID, _f64m1, _f64, _e64m1) -DEF_RVV_TYPE (vfloat64m2_t, 17, __rvv_float64m2_t, double, VNx2DF, VOID, _f64m2, +DEF_RVV_TYPE (vfloat64m2_t, 17, __rvv_float64m2_t, double, VNx4DF, VNx2DF, VOID, _f64m2, _f64, _e64m2) -DEF_RVV_TYPE (vfloat64m4_t, 17, __rvv_float64m4_t, double, VNx4DF, VOID, _f64m4, +DEF_RVV_TYPE (vfloat64m4_t, 17, __rvv_float64m4_t, double, VNx8DF, VNx4DF, VOID, _f64m4, _f64, _e64m4) -DEF_RVV_TYPE (vfloat64m8_t, 17, __rvv_float64m8_t, double, VNx8DF, VOID, _f64m8, +DEF_RVV_TYPE (vfloat64m8_t, 17, __rvv_float64m8_t, double, VNx16DF, VNx8DF, VOID, _f64m8, _f64, _e64m8) DEF_RVV_OP_TYPE (vv) diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def index 3b94454..8aae22d 100644 --- a/gcc/config/riscv/riscv-vector-switch.def +++ b/gcc/config/riscv/riscv-vector-switch.def @@ -81,69 +81,80 @@ TODO: FP16 vector needs support of 'zvfh', we don't support it yet. */ For example: 'MODE' = VNx64BImode needs TARGET_MIN_VLEN > 32. */ #ifndef ENTRY #define ENTRY(MODE, REQUIREMENT, VLMUL_FOR_MIN_VLEN32, RATIO_FOR_MIN_VLEN32, \ - VLMUL_FOR_MIN_VLEN64, RATIO_FOR_MIN_VLEN64) + VLMUL_FOR_MIN_VLEN64, RATIO_FOR_MIN_VLEN64, \ + VLMUL_FOR_MIN_VLEN128, RATIO_FOR_MIN_VLEN128) #endif /* Mask modes. Disable VNx64BImode when TARGET_MIN_VLEN == 32. */ -ENTRY (VNx64BI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1) -ENTRY (VNx32BI, true, LMUL_8, 1, LMUL_4, 2) -ENTRY (VNx16BI, true, LMUL_4, 2, LMUL_2, 4) -ENTRY (VNx8BI, true, LMUL_2, 4, LMUL_1, 8) -ENTRY (VNx4BI, true, LMUL_1, 8, LMUL_F2, 16) -ENTRY (VNx2BI, true, LMUL_F2, 16, LMUL_F4, 32) -ENTRY (VNx1BI, true, LMUL_F4, 32, LMUL_F8, 64) +ENTRY (VNx128BI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 1) +ENTRY (VNx64BI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1, LMUL_4, 2) +ENTRY (VNx32BI, true, LMUL_8, 1, LMUL_4, 2, LMUL_2, 4) +ENTRY (VNx16BI, true, LMUL_4, 2, LMUL_2, 4, LMUL_1, 8) +ENTRY (VNx8BI, true, LMUL_2, 4, LMUL_1, 8, LMUL_F2, 16) +ENTRY (VNx4BI, true, LMUL_1, 8, LMUL_F2, 16, LMUL_F4, 32) +ENTRY (VNx2BI, true, LMUL_F2, 16, LMUL_F4, 32, LMUL_F8, 64) +ENTRY (VNx1BI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED, 0) /* SEW = 8. Disable VNx64QImode when TARGET_MIN_VLEN == 32. */ -ENTRY (VNx64QI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1) -ENTRY (VNx32QI, true, LMUL_8, 1, LMUL_4, 2) -ENTRY (VNx16QI, true, LMUL_4, 2, LMUL_2, 4) -ENTRY (VNx8QI, true, LMUL_2, 4, LMUL_1, 8) -ENTRY (VNx4QI, true, LMUL_1, 8, LMUL_F2, 16) -ENTRY (VNx2QI, true, LMUL_F2, 16, LMUL_F4, 32) -ENTRY (VNx1QI, true, LMUL_F4, 32, LMUL_F8, 64) +ENTRY (VNx128QI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 1) +ENTRY (VNx64QI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 1, LMUL_4, 2) +ENTRY (VNx32QI, true, LMUL_8, 1, LMUL_4, 2, LMUL_2, 4) +ENTRY (VNx16QI, true, LMUL_4, 2, LMUL_2, 4, LMUL_1, 8) +ENTRY (VNx8QI, true, LMUL_2, 4, LMUL_1, 8, LMUL_F2, 16) +ENTRY (VNx4QI, true, LMUL_1, 8, LMUL_F2, 16, LMUL_F4, 32) +ENTRY (VNx2QI, true, LMUL_F2, 16, LMUL_F4, 32, LMUL_F8, 64) +ENTRY (VNx1QI, TARGET_MIN_VLEN < 128, LMUL_F4, 32, LMUL_F8, 64, LMUL_RESERVED, 0) /* SEW = 16. Disable VNx32HImode when TARGET_MIN_VLEN == 32. */ -ENTRY (VNx32HI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 2) -ENTRY (VNx16HI, true, LMUL_8, 2, LMUL_4, 4) -ENTRY (VNx8HI, true, LMUL_4, 4, LMUL_2, 8) -ENTRY (VNx4HI, true, LMUL_2, 8, LMUL_1, 16) -ENTRY (VNx2HI, true, LMUL_1, 16, LMUL_F2, 32) -ENTRY (VNx1HI, true, LMUL_F2, 32, LMUL_F4, 64) +ENTRY (VNx64HI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 2) +ENTRY (VNx32HI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 2, LMUL_4, 4) +ENTRY (VNx16HI, true, LMUL_8, 2, LMUL_4, 4, LMUL_2, 8) +ENTRY (VNx8HI, true, LMUL_4, 4, LMUL_2, 8, LMUL_1, 16) +ENTRY (VNx4HI, true, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32) +ENTRY (VNx2HI, true, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64) +ENTRY (VNx1HI, TARGET_MIN_VLEN < 128, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0) /* TODO:Disable all FP16 vector, enable them when 'zvfh' is supported. */ -ENTRY (VNx32HF, false, LMUL_RESERVED, 0, LMUL_8, 2) -ENTRY (VNx16HF, false, LMUL_8, 2, LMUL_4, 4) -ENTRY (VNx8HF, false, LMUL_4, 4, LMUL_2, 8) -ENTRY (VNx4HF, false, LMUL_2, 8, LMUL_1, 16) -ENTRY (VNx2HF, false, LMUL_1, 16, LMUL_F2, 32) -ENTRY (VNx1HF, false, LMUL_F2, 32, LMUL_F4, 64) +ENTRY (VNx64HF, false, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 2) +ENTRY (VNx32HF, false, LMUL_RESERVED, 0, LMUL_8, 2, LMUL_4, 4) +ENTRY (VNx16HF, false, LMUL_8, 2, LMUL_4, 4, LMUL_2, 8) +ENTRY (VNx8HF, false, LMUL_4, 4, LMUL_2, 8, LMUL_1, 16) +ENTRY (VNx4HF, false, LMUL_2, 8, LMUL_1, 16, LMUL_F2, 32) +ENTRY (VNx2HF, false, LMUL_1, 16, LMUL_F2, 32, LMUL_F4, 64) +ENTRY (VNx1HF, false, LMUL_F2, 32, LMUL_F4, 64, LMUL_RESERVED, 0) /* SEW = 32. Disable VNx16SImode when TARGET_MIN_VLEN == 32. For single-precision floating-point, we need TARGET_VECTOR_ELEN_FP_32 to be true. */ -ENTRY (VNx16SI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 4) -ENTRY (VNx8SI, true, LMUL_8, 4, LMUL_4, 8) -ENTRY (VNx4SI, true, LMUL_4, 8, LMUL_2, 16) -ENTRY (VNx2SI, true, LMUL_2, 16, LMUL_1, 32) -ENTRY (VNx1SI, true, LMUL_1, 32, LMUL_F2, 64) - -ENTRY (VNx16SF, TARGET_VECTOR_ELEN_FP_32, LMUL_RESERVED, 0, LMUL_8, 4) -ENTRY (VNx8SF, TARGET_VECTOR_ELEN_FP_32, LMUL_8, 4, LMUL_4, 8) -ENTRY (VNx4SF, TARGET_VECTOR_ELEN_FP_32, LMUL_4, 8, LMUL_2, 16) -ENTRY (VNx2SF, TARGET_VECTOR_ELEN_FP_32, LMUL_2, 16, LMUL_1, 32) -ENTRY (VNx1SF, TARGET_VECTOR_ELEN_FP_32, LMUL_1, 32, LMUL_F2, 64) +ENTRY (VNx32SI, TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 4) +ENTRY (VNx16SI, TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, LMUL_8, 4, LMUL_4, 8) +ENTRY (VNx8SI, true, LMUL_8, 4, LMUL_4, 8, LMUL_2, 16) +ENTRY (VNx4SI, true, LMUL_4, 8, LMUL_2, 16, LMUL_1, 32) +ENTRY (VNx2SI, true, LMUL_2, 16, LMUL_1, 32, LMUL_F2, 64) +ENTRY (VNx1SI, TARGET_MIN_VLEN < 128, LMUL_1, 32, LMUL_F2, 64, LMUL_RESERVED, 0) + +ENTRY (VNx32SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 4) +ENTRY (VNx16SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, + LMUL_8, 4, LMUL_4, 8) +ENTRY (VNx8SF, TARGET_VECTOR_ELEN_FP_32, LMUL_8, 4, LMUL_4, 8, LMUL_2, 16) +ENTRY (VNx4SF, TARGET_VECTOR_ELEN_FP_32, LMUL_4, 8, LMUL_2, 16, LMUL_1, 32) +ENTRY (VNx2SF, TARGET_VECTOR_ELEN_FP_32, LMUL_2, 16, LMUL_1, 32, LMUL_F2, 64) +ENTRY (VNx1SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128, LMUL_1, 32, LMUL_F2, 64, LMUL_RESERVED, 0) /* SEW = 64. Enable when TARGET_VECTOR_ELEN_64 is true. For double-precision floating-point, we need TARGET_VECTOR_ELEN_FP_64 to be true. */ -ENTRY (VNx8DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_8, 8) -ENTRY (VNx4DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_4, 16) -ENTRY (VNx2DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_2, 32) -ENTRY (VNx1DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_1, 64) - -ENTRY (VNx8DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_8, 8) -ENTRY (VNx4DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_4, 16) -ENTRY (VNx2DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_2, 32) -ENTRY (VNx1DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_1, 64) +ENTRY (VNx16DI, TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 8) +ENTRY (VNx8DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_8, 8, LMUL_4, 16) +ENTRY (VNx4DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_4, 16, LMUL_2, 32) +ENTRY (VNx2DI, TARGET_VECTOR_ELEN_64, LMUL_RESERVED, 0, LMUL_2, 32, LMUL_1, 64) +ENTRY (VNx1DI, TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0, LMUL_1, 64, LMUL_RESERVED, 0) + +ENTRY (VNx16DF, TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128, LMUL_RESERVED, 0, LMUL_RESERVED, 0, LMUL_8, 8) +ENTRY (VNx8DF, TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN > 32, LMUL_RESERVED, 0, + LMUL_8, 8, LMUL_4, 16) +ENTRY (VNx4DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_4, 16, LMUL_2, 32) +ENTRY (VNx2DF, TARGET_VECTOR_ELEN_FP_64, LMUL_RESERVED, 0, LMUL_2, 32, LMUL_1, 64) +ENTRY (VNx1DF, TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128, LMUL_RESERVED, 0, LMUL_1, 64, LMUL_RESERVED, 0) #undef ENTRY diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index cdb47e8..5d25508 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1974,6 +1974,8 @@ riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src) div_factor = 4; else if ((factor % (vlenb / 8)) == 0) div_factor = 8; + else if ((factor % (vlenb / 16)) == 0) + div_factor = 16; else gcc_unreachable (); @@ -6192,7 +6194,15 @@ riscv_init_machine_status (void) static poly_uint16 riscv_convert_vector_bits (void) { - if (TARGET_MIN_VLEN > 32) + if (TARGET_MIN_VLEN >= 128) + { + /* We have Full 'V' extension for application processors. It's specified + by -march=rv64gcv/rv32gcv, The 'V' extension depends upon the Zvl128b + and Zve64d extensions. Thus the number of bytes in a vector is 16 + 16 + * x1 which is riscv_vector_chunks * 16 = poly_int (16, 16). */ + riscv_bytes_per_vector_chunk = 16; + } + else if (TARGET_MIN_VLEN > 32) { /* When targetting minimum VLEN > 32, we should use 64-bit chunk size. Otherwise we can not include SEW = 64bits. diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index bc384d9..1fb29da 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -163,13 +163,13 @@ ;; Main data type used by the insn (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,HF,SF,DF,TF, - VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI, - VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI, - VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI, - VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI, - VNx1DI,VNx2DI,VNx4DI,VNx8DI, - VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF, - VNx1DF,VNx2DF,VNx4DF,VNx8DF" + VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI,VNx128BI, + VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI,VNx128QI, + VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI,VNx64HI, + VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,VNx32SI, + VNx1DI,VNx2DI,VNx4DI,VNx8DI,VNx16DI, + VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF,VNx32SF, + VNx1DF,VNx2DF,VNx4DF,VNx8DF,VNx16DF" (const_string "unknown")) ;; True if the main data type is twice the size of a word. diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index 70ad85b..3c65752 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -83,157 +83,181 @@ ]) (define_mode_iterator V [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWEXT2 [ - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWEXT4 [ - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWEXT8 [ (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWTRUNC2 [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI - VNx1SI VNx2SI VNx4SI VNx8SI - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWTRUNC4 [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI - VNx1HI VNx2HI VNx4HI VNx8HI + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI (VNx32QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI (VNx16HI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEEWTRUNC8 [ - VNx1QI VNx2QI VNx4QI VNx8QI + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI (VNx16QI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VLMULEXT2 [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI - VNx1SI VNx2SI VNx4SI VNx8SI - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") + (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") (VNx1DF "TARGET_VECTOR_ELEN_FP_64") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8DF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VLMULEXT4 [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI - VNx1HI VNx2HI VNx4HI VNx8HI - VNx1SI VNx2SI VNx4SI - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI (VNx32QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI (VNx16HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI (VNx8SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") + (VNx4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VLMULEXT8 [ - VNx1QI VNx2QI VNx4QI VNx8QI - VNx1HI VNx2HI VNx4HI - VNx1SI VNx2SI - (VNx1DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI (VNx16QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI (VNx8HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI (VNx4SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") + (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx2DF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VLMULEXT16 [ - VNx1QI VNx2QI VNx4QI - VNx1HI VNx2HI - VNx1SI - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI (VNx8QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI (VNx4HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") (VNx2SI "TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VLMULEXT32 [ - VNx1QI VNx2QI - VNx1HI + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI (VNx4QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") ]) (define_mode_iterator VLMULEXT64 [ - VNx1QI + (VNx1QI "TARGET_MIN_VLEN < 128") (VNx2QI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VEI16 [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VI [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VI_ZVE64 [ + VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI + VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI + VNx1SI VNx2SI VNx4SI VNx8SI VNx16SI + VNx1DI VNx2DI VNx4DI VNx8DI ]) (define_mode_iterator VI_ZVE32 [ @@ -243,9 +267,15 @@ ]) (define_mode_iterator VWI [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VWI_ZVE64 [ + VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI VNx64QI + VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI VNx32HI + VNx1SI VNx2SI VNx4SI VNx8SI VNx16SI ]) (define_mode_iterator VWI_ZVE32 [ @@ -254,15 +284,22 @@ ]) (define_mode_iterator VF [ - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VF_ZVE64 [ + VNx1SF VNx2SF VNx4SF VNx8SF VNx16SF + VNx1DF VNx2DF VNx4DF VNx8DF ]) (define_mode_iterator VF_ZVE32 [ @@ -273,38 +310,40 @@ ]) (define_mode_iterator VWF [ - VNx1SF VNx2SF VNx4SF VNx8SF (VNx16SF "TARGET_MIN_VLEN > 32") + (VNx1SF "TARGET_MIN_VLEN < 128") VNx2SF VNx4SF VNx8SF (VNx16SF "TARGET_MIN_VLEN > 32") (VNx32SF "TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VWF_ZVE64 [ + VNx1SF VNx2SF VNx4SF VNx8SF VNx16SF ]) (define_mode_iterator VFULLI [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_FULL_V") (VNx2DI "TARGET_FULL_V") - (VNx4DI "TARGET_FULL_V") (VNx8DI "TARGET_FULL_V") + (VNx1QI "!TARGET_FULL_V") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_FULL_V") + (VNx1HI "!TARGET_FULL_V") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_FULL_V") + (VNx1SI "!TARGET_FULL_V") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_FULL_V") + (VNx2DI "TARGET_FULL_V") (VNx4DI "TARGET_FULL_V") (VNx8DI "TARGET_FULL_V") (VNx16DI "TARGET_FULL_V") ]) (define_mode_iterator VI_QHS [ - VNx1QI VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI VNx4QI VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VI_D [ - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VFULLI_D [ - (VNx1DI "TARGET_FULL_V") (VNx2DI "TARGET_FULL_V") - (VNx4DI "TARGET_FULL_V") (VNx8DI "TARGET_FULL_V") + (VNx2DI "TARGET_FULL_V") (VNx4DI "TARGET_FULL_V") (VNx8DI "TARGET_FULL_V") (VNx16DI "TARGET_FULL_V") ]) (define_mode_iterator VNX1_QHSD [ - VNx1QI VNx1HI VNx1SI - (VNx1DI "TARGET_VECTOR_ELEN_64") - (VNx1SF "TARGET_VECTOR_ELEN_FP_32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx1QI "TARGET_MIN_VLEN < 128") (VNx1HI "TARGET_MIN_VLEN < 128") (VNx1SI "TARGET_MIN_VLEN < 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") + (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") ]) (define_mode_iterator VNX2_QHSD [ @@ -331,18 +370,24 @@ (define_mode_iterator VNX16_QHS [ VNx16QI VNx16HI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) -(define_mode_iterator VNX32_QH [ - VNx32QI (VNx32HI "TARGET_MIN_VLEN > 32") +(define_mode_iterator VNX32_QHS [ + VNx32QI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) -(define_mode_iterator VNX64_Q [ +(define_mode_iterator VNX64_QH [ (VNx64QI "TARGET_MIN_VLEN > 32") + (VNx64HI "TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VNX128_Q [ + (VNx128QI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VNX1_QHSDI [ - VNx1QI VNx1HI VNx1SI + (VNx1QI "TARGET_MIN_VLEN < 128") (VNx1HI "TARGET_MIN_VLEN < 128") (VNx1SI "TARGET_MIN_VLEN < 128") (VNx1DI "TARGET_64BIT && TARGET_MIN_VLEN > 32") ]) @@ -362,298 +407,325 @@ ]) (define_mode_iterator VNX16_QHSI [ - VNx16QI VNx16HI (VNx16SI "TARGET_MIN_VLEN > 32") + VNx16QI VNx16HI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx16DI "TARGET_MIN_VLEN >= 128") ]) -(define_mode_iterator VNX32_QHI [ - VNx32QI (VNx32HI "TARGET_MIN_VLEN > 32") +(define_mode_iterator VNX32_QHSI [ + VNx32QI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") +]) + +(define_mode_iterator VNX64_QHI [ + VNx64QI (VNx64HI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator V_WHOLE [ - (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") - (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") - (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx4QI "TARGET_MIN_VLEN == 32") VNx8QI VNx16QI VNx32QI (VNx64QI "TARGET_MIN_VLEN > 32") (VNx128QI "TARGET_MIN_VLEN >= 128") + (VNx2HI "TARGET_MIN_VLEN == 32") VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN == 32") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") (VNx16DI "TARGET_MIN_VLEN >= 128") (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN == 32") (VNx2SF "TARGET_VECTOR_ELEN_FP_32") (VNx4SF "TARGET_VECTOR_ELEN_FP_32") (VNx8SF "TARGET_VECTOR_ELEN_FP_32") (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator V_FRACT [ - VNx1QI VNx2QI (VNx4QI "TARGET_MIN_VLEN > 32") - VNx1HI (VNx2HI "TARGET_MIN_VLEN > 32") - (VNx1SI "TARGET_MIN_VLEN > 32") + (VNx1QI "TARGET_MIN_VLEN < 128") VNx2QI (VNx4QI "TARGET_MIN_VLEN > 32") (VNx8QI "TARGET_MIN_VLEN >= 128") + (VNx1HI "TARGET_MIN_VLEN < 128") (VNx2HI "TARGET_MIN_VLEN > 32") (VNx4HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN > 32 && TARGET_MIN_VLEN < 128") (VNx2SI "TARGET_MIN_VLEN >= 128") (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VB [ - VNx1BI VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI - (VNx64BI "TARGET_MIN_VLEN > 32") + (VNx1BI "TARGET_MIN_VLEN < 128") VNx2BI VNx4BI VNx8BI VNx16BI VNx32BI + (VNx64BI "TARGET_MIN_VLEN > 32") (VNx128BI "TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VWEXTI [ - VNx1HI VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx1HI "TARGET_MIN_VLEN < 128") VNx2HI VNx4HI VNx8HI VNx16HI (VNx32HI "TARGET_MIN_VLEN > 32") (VNx64HI "TARGET_MIN_VLEN >= 128") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VWEXTF [ - (VNx1DF "TARGET_VECTOR_ELEN_FP_64") + (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128") (VNx2DF "TARGET_VECTOR_ELEN_FP_64") (VNx4DF "TARGET_VECTOR_ELEN_FP_64") (VNx8DF "TARGET_VECTOR_ELEN_FP_64") + (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VWCONVERTI [ - (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") (VNx4DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") (VNx8DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VQEXTI [ - VNx1SI VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI "TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_iterator VOEXTI [ - (VNx1DI "TARGET_VECTOR_ELEN_64") (VNx2DI "TARGET_VECTOR_ELEN_64") + (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI "TARGET_VECTOR_ELEN_64") (VNx4DI "TARGET_VECTOR_ELEN_64") (VNx8DI "TARGET_VECTOR_ELEN_64") + (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128") ]) (define_mode_attr VLMULX2 [ - (VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") - (VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") - (VNx1SI "VNx2SI") (VNx2SI "VNx4SI") (VNx4SI "VNx8SI") (VNx8SI "VNx16SI") - (VNx1DI "VNx2DI") (VNx2DI "VNx4DI") (VNx4DI "VNx8DI") - (VNx1SF "VNx2SF") (VNx2SF "VNx4SF") (VNx4SF "VNx8SF") (VNx8SF "VNx16SF") - (VNx1DF "VNx2DF") (VNx2DF "VNx4DF") (VNx4DF "VNx8DF") + (VNx1QI "VNx2QI") (VNx2QI "VNx4QI") (VNx4QI "VNx8QI") (VNx8QI "VNx16QI") (VNx16QI "VNx32QI") (VNx32QI "VNx64QI") (VNx64QI "VNx128QI") + (VNx1HI "VNx2HI") (VNx2HI "VNx4HI") (VNx4HI "VNx8HI") (VNx8HI "VNx16HI") (VNx16HI "VNx32HI") (VNx32HI "VNx64HI") + (VNx1SI "VNx2SI") (VNx2SI "VNx4SI") (VNx4SI "VNx8SI") (VNx8SI "VNx16SI") (VNx16SI "VNx32SI") + (VNx1DI "VNx2DI") (VNx2DI "VNx4DI") (VNx4DI "VNx8DI") (VNx8DI "VNx16DI") + (VNx1SF "VNx2SF") (VNx2SF "VNx4SF") (VNx4SF "VNx8SF") (VNx8SF "VNx16SF") (VNx16SF "VNx32SF") + (VNx1DF "VNx2DF") (VNx2DF "VNx4DF") (VNx4DF "VNx8DF") (VNx8DF "VNx16DF") ]) (define_mode_attr VLMULX4 [ - (VNx1QI "VNx4QI") (VNx2QI "VNx8QI") (VNx4QI "VNx16QI") (VNx8QI "VNx32QI") (VNx16QI "VNx64QI") - (VNx1HI "VNx4HI") (VNx2HI "VNx8HI") (VNx4HI "VNx16HI") (VNx8HI "VNx32HI") - (VNx1SI "VNx4SI") (VNx2SI "VNx8SI") (VNx4SI "VNx16SI") - (VNx1DI "VNx4DI") (VNx2DI "VNx8DI") - (VNx1SF "VNx4SF") (VNx2SF "VNx8SF") (VNx4SF "VNx16SF") - (VNx1DF "VNx4DF") (VNx2DF "VNx8DF") + (VNx1QI "VNx4QI") (VNx2QI "VNx8QI") (VNx4QI "VNx16QI") (VNx8QI "VNx32QI") (VNx16QI "VNx64QI") (VNx32QI "VNx128QI") + (VNx1HI "VNx4HI") (VNx2HI "VNx8HI") (VNx4HI "VNx16HI") (VNx8HI "VNx32HI") (VNx16HI "VNx64HI") + (VNx1SI "VNx4SI") (VNx2SI "VNx8SI") (VNx4SI "VNx16SI") (VNx8SI "VNx32SI") + (VNx1DI "VNx4DI") (VNx2DI "VNx8DI") (VNx4DI "VNx16DI") + (VNx1SF "VNx4SF") (VNx2SF "VNx8SF") (VNx4SF "VNx16SF") (VNx8SF "VNx32SF") + (VNx1DF "VNx4DF") (VNx2DF "VNx8DF") (VNx4DF "VNx16DF") ]) (define_mode_attr VLMULX8 [ - (VNx1QI "VNx8QI") (VNx2QI "VNx16QI") (VNx4QI "VNx32QI") (VNx8QI "VNx64QI") - (VNx1HI "VNx8HI") (VNx2HI "VNx16HI") (VNx4HI "VNx32HI") - (VNx1SI "VNx8SI") (VNx2SI "VNx16SI") - (VNx1DI "VNx8DI") - (VNx1SF "VNx8SF") (VNx2SF "VNx16SF") - (VNx1DF "VNx8DF") + (VNx1QI "VNx8QI") (VNx2QI "VNx16QI") (VNx4QI "VNx32QI") (VNx8QI "VNx64QI") (VNx16QI "VNx128QI") + (VNx1HI "VNx8HI") (VNx2HI "VNx16HI") (VNx4HI "VNx32HI") (VNx8HI "VNx64HI") + (VNx1SI "VNx8SI") (VNx2SI "VNx16SI") (VNx4SI "VNx32SI") + (VNx1DI "VNx8DI") (VNx2DI "VNx16DI") + (VNx1SF "VNx8SF") (VNx2SF "VNx16SF") (VNx4SF "VNx32SF") + (VNx1DF "VNx8DF") (VNx2DF "VNx16DF") ]) (define_mode_attr VLMULX16 [ - (VNx1QI "VNx16QI") (VNx2QI "VNx32QI") (VNx4QI "VNx64QI") - (VNx1HI "VNx16HI") (VNx2HI "VNx32HI") - (VNx1SI "VNx16SI") - (VNx1SF "VNx16SF") + (VNx1QI "VNx16QI") (VNx2QI "VNx32QI") (VNx4QI "VNx64QI") (VNx8QI "VNx128QI") + (VNx1HI "VNx16HI") (VNx2HI "VNx32HI") (VNx4HI "VNx64HI") + (VNx1SI "VNx16SI") (VNx2SI "VNx32SI") + (VNx1SF "VNx16SF") (VNx2SF "VNx32SF") ]) (define_mode_attr VLMULX32 [ - (VNx1QI "VNx32QI") (VNx2QI "VNx64QI") - (VNx1HI "VNx32HI") + (VNx1QI "VNx32QI") (VNx2QI "VNx64QI") (VNx4QI "VNx128QI") + (VNx1HI "VNx32HI") (VNx2HI "VNx64HI") ]) (define_mode_attr VLMULX64 [ - (VNx1QI "VNx64QI") + (VNx1QI "VNx64QI") (VNx2QI "VNx128QI") ]) (define_mode_attr VINDEX [ (VNx1QI "VNx1QI") (VNx2QI "VNx2QI") (VNx4QI "VNx4QI") (VNx8QI "VNx8QI") - (VNx16QI "VNx16QI") (VNx32QI "VNx32QI") (VNx64QI "VNx64QI") + (VNx16QI "VNx16QI") (VNx32QI "VNx32QI") (VNx64QI "VNx64QI") (VNx128QI "VNx128QI") (VNx1HI "VNx1HI") (VNx2HI "VNx2HI") (VNx4HI "VNx4HI") (VNx8HI "VNx8HI") - (VNx16HI "VNx16HI") (VNx32HI "VNx32HI") + (VNx16HI "VNx16HI") (VNx32HI "VNx32HI") (VNx64HI "VNx64HI") (VNx1SI "VNx1SI") (VNx2SI "VNx2SI") (VNx4SI "VNx4SI") (VNx8SI "VNx8SI") - (VNx16SI "VNx16SI") - (VNx1DI "VNx1DI") (VNx2DI "VNx2DI") (VNx4DI "VNx4DI") (VNx8DI "VNx8DI") + (VNx16SI "VNx16SI") (VNx32SI "VNx32SI") + (VNx1DI "VNx1DI") (VNx2DI "VNx2DI") (VNx4DI "VNx4DI") (VNx8DI "VNx8DI") (VNx16DI "VNx16DI") (VNx1SF "VNx1SI") (VNx2SF "VNx2SI") (VNx4SF "VNx4SI") (VNx8SF "VNx8SI") - (VNx16SF "VNx16SI") - (VNx1DF "VNx1DI") (VNx2DF "VNx2DI") (VNx4DF "VNx4DI") (VNx8DF "VNx8DI") + (VNx16SF "VNx16SI") (VNx32SF "VNx32SI") + (VNx1DF "VNx1DI") (VNx2DF "VNx2DI") (VNx4DF "VNx4DI") (VNx8DF "VNx8DI") (VNx16DF "VNx16DI") ]) (define_mode_attr VINDEXEI16 [ (VNx1QI "VNx1HI") (VNx2QI "VNx2HI") (VNx4QI "VNx4HI") (VNx8QI "VNx8HI") - (VNx16QI "VNx16HI") (VNx32QI "VNx32HI") + (VNx16QI "VNx16HI") (VNx32QI "VNx32HI") (VNx64QI "VNx64HI") (VNx1HI "VNx1HI") (VNx2HI "VNx2HI") (VNx4HI "VNx4HI") (VNx8HI "VNx8HI") - (VNx16HI "VNx16HI") (VNx32HI "VNx32HI") + (VNx16HI "VNx16HI") (VNx32HI "VNx32HI") (VNx64HI "VNx64HI") (VNx1SI "VNx1HI") (VNx2SI "VNx2HI") (VNx4SI "VNx4HI") (VNx8SI "VNx8HI") - (VNx16SI "VNx16HI") - (VNx1DI "VNx1HI") (VNx2DI "VNx2HI") (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") + (VNx16SI "VNx16HI") (VNx32SI "VNx32HI") + (VNx1DI "VNx1HI") (VNx2DI "VNx2HI") (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") (VNx16DI "VNx16HI") (VNx1SF "VNx1HI") (VNx2SF "VNx2HI") (VNx4SF "VNx4HI") (VNx8SF "VNx8HI") - (VNx16SF "VNx16HI") - (VNx1DF "VNx1HI") (VNx2DF "VNx2HI") (VNx4DF "VNx4HI") (VNx8DF "VNx8HI") + (VNx16SF "VNx16HI") (VNx32SF "VNx32HI") + (VNx1DF "VNx1HI") (VNx2DF "VNx2HI") (VNx4DF "VNx4HI") (VNx8DF "VNx8HI") (VNx16DF "VNx16HI") ]) (define_mode_attr VM [ - (VNx1QI "VNx1BI") (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI") (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") - (VNx1HI "VNx1BI") (VNx2HI "VNx2BI") (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI") - (VNx1SI "VNx1BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI") (VNx16SI "VNx16BI") - (VNx1DI "VNx1BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI") (VNx8DI "VNx8BI") - (VNx1SF "VNx1BI") (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI") - (VNx1DF "VNx1BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI") + (VNx1QI "VNx1BI") (VNx2QI "VNx2BI") (VNx4QI "VNx4BI") (VNx8QI "VNx8BI") (VNx16QI "VNx16BI") (VNx32QI "VNx32BI") (VNx64QI "VNx64BI") (VNx128QI "VNx128BI") + (VNx1HI "VNx1BI") (VNx2HI "VNx2BI") (VNx4HI "VNx4BI") (VNx8HI "VNx8BI") (VNx16HI "VNx16BI") (VNx32HI "VNx32BI") (VNx64HI "VNx64BI") + (VNx1SI "VNx1BI") (VNx2SI "VNx2BI") (VNx4SI "VNx4BI") (VNx8SI "VNx8BI") (VNx16SI "VNx16BI") (VNx32SI "VNx32BI") + (VNx1DI "VNx1BI") (VNx2DI "VNx2BI") (VNx4DI "VNx4BI") (VNx8DI "VNx8BI") (VNx16DI "VNx16BI") + (VNx1SF "VNx1BI") (VNx2SF "VNx2BI") (VNx4SF "VNx4BI") (VNx8SF "VNx8BI") (VNx16SF "VNx16BI") (VNx32SF "VNx32BI") + (VNx1DF "VNx1BI") (VNx2DF "VNx2BI") (VNx4DF "VNx4BI") (VNx8DF "VNx8BI") (VNx16DF "VNx16BI") ]) (define_mode_attr vm [ - (VNx1QI "vnx1bi") (VNx2QI "vnx2bi") (VNx4QI "vnx4bi") (VNx8QI "vnx8bi") (VNx16QI "vnx16bi") (VNx32QI "vnx32bi") (VNx64QI "vnx64bi") - (VNx1HI "vnx1bi") (VNx2HI "vnx2bi") (VNx4HI "vnx4bi") (VNx8HI "vnx8bi") (VNx16HI "vnx16bi") (VNx32HI "vnx32bi") - (VNx1SI "vnx1bi") (VNx2SI "vnx2bi") (VNx4SI "vnx4bi") (VNx8SI "vnx8bi") (VNx16SI "vnx16bi") - (VNx1DI "vnx1bi") (VNx2DI "vnx2bi") (VNx4DI "vnx4bi") (VNx8DI "vnx8bi") - (VNx1SF "vnx1bi") (VNx2SF "vnx2bi") (VNx4SF "vnx4bi") (VNx8SF "vnx8bi") (VNx16SF "vnx16bi") - (VNx1DF "vnx1bi") (VNx2DF "vnx2bi") (VNx4DF "vnx4bi") (VNx8DF "vnx8bi") + (VNx1QI "vnx1bi") (VNx2QI "vnx2bi") (VNx4QI "vnx4bi") (VNx8QI "vnx8bi") (VNx16QI "vnx16bi") (VNx32QI "vnx32bi") (VNx64QI "vnx64bi") (VNx128QI "vnx128bi") + (VNx1HI "vnx1bi") (VNx2HI "vnx2bi") (VNx4HI "vnx4bi") (VNx8HI "vnx8bi") (VNx16HI "vnx16bi") (VNx32HI "vnx32bi") (VNx64HI "vnx64bi") + (VNx1SI "vnx1bi") (VNx2SI "vnx2bi") (VNx4SI "vnx4bi") (VNx8SI "vnx8bi") (VNx16SI "vnx16bi") (VNx32SI "vnx32bi") + (VNx1DI "vnx1bi") (VNx2DI "vnx2bi") (VNx4DI "vnx4bi") (VNx8DI "vnx8bi") (VNx16DI "vnx16bi") + (VNx1SF "vnx1bi") (VNx2SF "vnx2bi") (VNx4SF "vnx4bi") (VNx8SF "vnx8bi") (VNx16SF "vnx16bi") (VNx32SF "vnx32bi") + (VNx1DF "vnx1bi") (VNx2DF "vnx2bi") (VNx4DF "vnx4bi") (VNx8DF "vnx8bi") (VNx16DF "vnx16bi") ]) (define_mode_attr VEL [ - (VNx1QI "QI") (VNx2QI "QI") (VNx4QI "QI") (VNx8QI "QI") (VNx16QI "QI") (VNx32QI "QI") (VNx64QI "QI") - (VNx1HI "HI") (VNx2HI "HI") (VNx4HI "HI") (VNx8HI "HI") (VNx16HI "HI") (VNx32HI "HI") - (VNx1SI "SI") (VNx2SI "SI") (VNx4SI "SI") (VNx8SI "SI") (VNx16SI "SI") - (VNx1DI "DI") (VNx2DI "DI") (VNx4DI "DI") (VNx8DI "DI") - (VNx1SF "SF") (VNx2SF "SF") (VNx4SF "SF") (VNx8SF "SF") (VNx16SF "SF") - (VNx1DF "DF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF") + (VNx1QI "QI") (VNx2QI "QI") (VNx4QI "QI") (VNx8QI "QI") (VNx16QI "QI") (VNx32QI "QI") (VNx64QI "QI") (VNx128QI "QI") + (VNx1HI "HI") (VNx2HI "HI") (VNx4HI "HI") (VNx8HI "HI") (VNx16HI "HI") (VNx32HI "HI") (VNx64HI "HI") + (VNx1SI "SI") (VNx2SI "SI") (VNx4SI "SI") (VNx8SI "SI") (VNx16SI "SI") (VNx32SI "SI") + (VNx1DI "DI") (VNx2DI "DI") (VNx4DI "DI") (VNx8DI "DI") (VNx16DI "DI") + (VNx1SF "SF") (VNx2SF "SF") (VNx4SF "SF") (VNx8SF "SF") (VNx16SF "SF") (VNx32SF "SF") + (VNx1DF "DF") (VNx2DF "DF") (VNx4DF "DF") (VNx8DF "DF") (VNx16DF "DF") ]) (define_mode_attr VSUBEL [ - (VNx1HI "QI") (VNx2HI "QI") (VNx4HI "QI") (VNx8HI "QI") (VNx16HI "QI") (VNx32HI "QI") - (VNx1SI "HI") (VNx2SI "HI") (VNx4SI "HI") (VNx8SI "HI") (VNx16SI "HI") - (VNx1DI "SI") (VNx2DI "SI") (VNx4DI "SI") (VNx8DI "SI") - (VNx1SF "HF") (VNx2SF "HF") (VNx4SF "HF") (VNx8SF "HF") (VNx16SF "HF") - (VNx1DF "SF") (VNx2DF "SF") (VNx4DF "SF") (VNx8DF "SF") + (VNx1HI "QI") (VNx2HI "QI") (VNx4HI "QI") (VNx8HI "QI") (VNx16HI "QI") (VNx32HI "QI") (VNx64HI "QI") + (VNx1SI "HI") (VNx2SI "HI") (VNx4SI "HI") (VNx8SI "HI") (VNx16SI "HI") (VNx32SI "HI") + (VNx1DI "SI") (VNx2DI "SI") (VNx4DI "SI") (VNx8DI "SI") (VNx16DI "SI") + (VNx1SF "HF") (VNx2SF "HF") (VNx4SF "HF") (VNx8SF "HF") (VNx16SF "HF") (VNx32SF "HF") + (VNx1DF "SF") (VNx2DF "SF") (VNx4DF "SF") (VNx8DF "SF") (VNx16DF "SF") ]) (define_mode_attr sew [ - (VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") - (VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") - (VNx1SI "32") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") - (VNx1DI "64") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") - (VNx1SF "32") (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") - (VNx1DF "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") + (VNx1QI "8") (VNx2QI "8") (VNx4QI "8") (VNx8QI "8") (VNx16QI "8") (VNx32QI "8") (VNx64QI "8") (VNx128QI "8") + (VNx1HI "16") (VNx2HI "16") (VNx4HI "16") (VNx8HI "16") (VNx16HI "16") (VNx32HI "16") (VNx64HI "16") + (VNx1SI "32") (VNx2SI "32") (VNx4SI "32") (VNx8SI "32") (VNx16SI "32") (VNx32SI "32") + (VNx1DI "64") (VNx2DI "64") (VNx4DI "64") (VNx8DI "64") (VNx16DI "64") + (VNx1SF "32") (VNx2SF "32") (VNx4SF "32") (VNx8SF "32") (VNx16SF "32") (VNx32SF "32") + (VNx1DF "64") (VNx2DF "64") (VNx4DF "64") (VNx8DF "64") (VNx16DF "64") ]) (define_mode_attr double_trunc_sew [ - (VNx1HI "8") (VNx2HI "8") (VNx4HI "8") (VNx8HI "8") (VNx16HI "8") (VNx32HI "8") - (VNx1SI "16") (VNx2SI "16") (VNx4SI "16") (VNx8SI "16") (VNx16SI "16") - (VNx1DI "32") (VNx2DI "32") (VNx4DI "32") (VNx8DI "32") - (VNx1SF "16") (VNx2SF "16") (VNx4SF "16") (VNx8SF "16") (VNx16SF "16") - (VNx1DF "32") (VNx2DF "32") (VNx4DF "32") (VNx8DF "32") + (VNx1HI "8") (VNx2HI "8") (VNx4HI "8") (VNx8HI "8") (VNx16HI "8") (VNx32HI "8") (VNx64HI "8") + (VNx1SI "16") (VNx2SI "16") (VNx4SI "16") (VNx8SI "16") (VNx16SI "16") (VNx32SI "16") + (VNx1DI "32") (VNx2DI "32") (VNx4DI "32") (VNx8DI "32") (VNx16DI "32") + (VNx1SF "16") (VNx2SF "16") (VNx4SF "16") (VNx8SF "16") (VNx16SF "16") (VNx32SF "16") + (VNx1DF "32") (VNx2DF "32") (VNx4DF "32") (VNx8DF "32") (VNx16DF "32") ]) (define_mode_attr quad_trunc_sew [ - (VNx1SI "8") (VNx2SI "8") (VNx4SI "8") (VNx8SI "8") (VNx16SI "8") - (VNx1DI "16") (VNx2DI "16") (VNx4DI "16") (VNx8DI "16") - (VNx1SF "8") (VNx2SF "8") (VNx4SF "8") (VNx8SF "8") (VNx16SF "8") - (VNx1DF "16") (VNx2DF "16") (VNx4DF "16") (VNx8DF "16") + (VNx1SI "8") (VNx2SI "8") (VNx4SI "8") (VNx8SI "8") (VNx16SI "8") (VNx32SI "8") + (VNx1DI "16") (VNx2DI "16") (VNx4DI "16") (VNx8DI "16") (VNx16DI "16") + (VNx1SF "8") (VNx2SF "8") (VNx4SF "8") (VNx8SF "8") (VNx16SF "8") (VNx32SF "8") + (VNx1DF "16") (VNx2DF "16") (VNx4DF "16") (VNx8DF "16") (VNx16DF "16") ]) (define_mode_attr oct_trunc_sew [ - (VNx1DI "8") (VNx2DI "8") (VNx4DI "8") (VNx8DI "8") - (VNx1DF "8") (VNx2DF "8") (VNx4DF "8") (VNx8DF "8") + (VNx1DI "8") (VNx2DI "8") (VNx4DI "8") (VNx8DI "8") (VNx16DI "8") + (VNx1DF "8") (VNx2DF "8") (VNx4DF "8") (VNx8DF "8") (VNx16DF "8") ]) (define_mode_attr double_ext_sew [ - (VNx1QI "16") (VNx2QI "16") (VNx4QI "16") (VNx8QI "16") (VNx16QI "16") (VNx32QI "16") - (VNx1HI "32") (VNx2HI "32") (VNx4HI "32") (VNx8HI "32") (VNx16HI "32") - (VNx1SI "64") (VNx2SI "64") (VNx4SI "64") (VNx8SI "64") - (VNx1SF "64") (VNx2SF "64") (VNx4SF "64") (VNx8SF "64") + (VNx1QI "16") (VNx2QI "16") (VNx4QI "16") (VNx8QI "16") (VNx16QI "16") (VNx32QI "16") (VNx64QI "16") + (VNx1HI "32") (VNx2HI "32") (VNx4HI "32") (VNx8HI "32") (VNx16HI "32") (VNx32HI "32") + (VNx1SI "64") (VNx2SI "64") (VNx4SI "64") (VNx8SI "64") (VNx16SI "64") + (VNx1SF "64") (VNx2SF "64") (VNx4SF "64") (VNx8SF "64") (VNx16SF "64") ]) (define_mode_attr quad_ext_sew [ - (VNx1QI "32") (VNx2QI "32") (VNx4QI "32") (VNx8QI "32") (VNx16QI "32") - (VNx1HI "64") (VNx2HI "64") (VNx4HI "64") (VNx8HI "64") + (VNx1QI "32") (VNx2QI "32") (VNx4QI "32") (VNx8QI "32") (VNx16QI "32") (VNx32QI "32") + (VNx1HI "64") (VNx2HI "64") (VNx4HI "64") (VNx8HI "64") (VNx16HI "64") ]) (define_mode_attr oct_ext_sew [ - (VNx1QI "64") (VNx2QI "64") (VNx4QI "64") (VNx8QI "64") + (VNx1QI "64") (VNx2QI "64") (VNx4QI "64") (VNx8QI "64") (VNx16QI "64") ]) (define_mode_attr V_DOUBLE_TRUNC [ (VNx1HI "VNx1QI") (VNx2HI "VNx2QI") (VNx4HI "VNx4QI") (VNx8HI "VNx8QI") - (VNx16HI "VNx16QI") (VNx32HI "VNx32QI") + (VNx16HI "VNx16QI") (VNx32HI "VNx32QI") (VNx64HI "VNx64QI") (VNx1SI "VNx1HI") (VNx2SI "VNx2HI") (VNx4SI "VNx4HI") (VNx8SI "VNx8HI") - (VNx16SI "VNx16HI") - (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI") - (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF") + (VNx16SI "VNx16HI") (VNx32SI "VNx32HI") + (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI") (VNx16DI "VNx16SI") + (VNx1DF "VNx1SF") (VNx2DF "VNx2SF") (VNx4DF "VNx4SF") (VNx8DF "VNx8SF") (VNx16DF "VNx16SF") ]) (define_mode_attr V_QUAD_TRUNC [ (VNx1SI "VNx1QI") (VNx2SI "VNx2QI") (VNx4SI "VNx4QI") (VNx8SI "VNx8QI") - (VNx16SI "VNx16QI") + (VNx16SI "VNx16QI") (VNx32SI "VNx32QI") (VNx1DI "VNx1HI") (VNx2DI "VNx2HI") - (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") + (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") (VNx16DI "VNx16HI") ]) (define_mode_attr V_OCT_TRUNC [ - (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI") + (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI") (VNx16DI "VNx16QI") ]) (define_mode_attr VINDEX_DOUBLE_TRUNC [ (VNx1HI "VNx1QI") (VNx2HI "VNx2QI") (VNx4HI "VNx4QI") (VNx8HI "VNx8QI") - (VNx16HI "VNx16QI") (VNx32HI "VNx32QI") + (VNx16HI "VNx16QI") (VNx32HI "VNx32QI") (VNx64HI "VNx64QI") (VNx1SI "VNx1HI") (VNx2SI "VNx2HI") (VNx4SI "VNx4HI") (VNx8SI "VNx8HI") - (VNx16SI "VNx16HI") + (VNx16SI "VNx16HI") (VNx32SI "VNx32HI") (VNx1SF "VNx1HI") (VNx2SF "VNx2HI") (VNx4SF "VNx4HI") (VNx8SF "VNx8HI") - (VNx16SF "VNx16HI") - (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI") - (VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") + (VNx16SF "VNx16HI") (VNx32SF "VNx32HI") + (VNx1DI "VNx1SI") (VNx2DI "VNx2SI") (VNx4DI "VNx4SI") (VNx8DI "VNx8SI") (VNx16DI "VNx16SI") + (VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI") ]) (define_mode_attr VINDEX_QUAD_TRUNC [ (VNx1SI "VNx1QI") (VNx2SI "VNx2QI") (VNx4SI "VNx4QI") (VNx8SI "VNx8QI") - (VNx16SI "VNx16QI") + (VNx16SI "VNx16QI") (VNx32SI "VNx32QI") (VNx1DI "VNx1HI") (VNx2DI "VNx2HI") - (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") + (VNx4DI "VNx4HI") (VNx8DI "VNx8HI") (VNx16DI "VNx16HI") (VNx1SF "VNx1QI") (VNx2SF "VNx2QI") (VNx4SF "VNx4QI") (VNx8SF "VNx8QI") - (VNx16SF "VNx16QI") + (VNx16SF "VNx16QI") (VNx32SF "VNx32QI") (VNx1DF "VNx1HI") (VNx2DF "VNx2HI") - (VNx4DF "VNx4HI") (VNx8DF "VNx8HI") + (VNx4DF "VNx4HI") (VNx8DF "VNx8HI") (VNx16DF "VNx16HI") ]) (define_mode_attr VINDEX_OCT_TRUNC [ - (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI") - (VNx1DF "VNx1QI") (VNx2DF "VNx2QI") (VNx4DF "VNx4QI") (VNx8DF "VNx8QI") + (VNx1DI "VNx1QI") (VNx2DI "VNx2QI") (VNx4DI "VNx4QI") (VNx8DI "VNx8QI") (VNx16DI "VNx16QI") + (VNx1DF "VNx1QI") (VNx2DF "VNx2QI") (VNx4DF "VNx4QI") (VNx8DF "VNx8QI") (VNx16DF "VNx16QI") ]) (define_mode_attr VINDEX_DOUBLE_EXT [ - (VNx1QI "VNx1HI") (VNx2QI "VNx2HI") (VNx4QI "VNx4HI") (VNx8QI "VNx8HI") (VNx16QI "VNx16HI") (VNx32QI "VNx32HI") - (VNx1HI "VNx1SI") (VNx2HI "VNx2SI") (VNx4HI "VNx4SI") (VNx8HI "VNx8SI") (VNx16HI "VNx16SI") - (VNx1SI "VNx1DI") (VNx2SI "VNx2DI") (VNx4SI "VNx4DI") (VNx8SI "VNx8DI") - (VNx1SF "VNx1DI") (VNx2SF "VNx2DI") (VNx4SF "VNx4DI") (VNx8SF "VNx8DI") + (VNx1QI "VNx1HI") (VNx2QI "VNx2HI") (VNx4QI "VNx4HI") (VNx8QI "VNx8HI") (VNx16QI "VNx16HI") (VNx32QI "VNx32HI") (VNx64QI "VNx64HI") + (VNx1HI "VNx1SI") (VNx2HI "VNx2SI") (VNx4HI "VNx4SI") (VNx8HI "VNx8SI") (VNx16HI "VNx16SI") (VNx32HI "VNx32SI") + (VNx1SI "VNx1DI") (VNx2SI "VNx2DI") (VNx4SI "VNx4DI") (VNx8SI "VNx8DI") (VNx16SI "VNx16DI") + (VNx1SF "VNx1DI") (VNx2SF "VNx2DI") (VNx4SF "VNx4DI") (VNx8SF "VNx8DI") (VNx16SF "VNx16DI") ]) (define_mode_attr VINDEX_QUAD_EXT [ - (VNx1QI "VNx1SI") (VNx2QI "VNx2SI") (VNx4QI "VNx4SI") (VNx8QI "VNx8SI") (VNx16QI "VNx16SI") - (VNx1HI "VNx1DI") (VNx2HI "VNx2DI") (VNx4HI "VNx4DI") (VNx8HI "VNx8DI") + (VNx1QI "VNx1SI") (VNx2QI "VNx2SI") (VNx4QI "VNx4SI") (VNx8QI "VNx8SI") (VNx16QI "VNx16SI") (VNx32QI "VNx32SI") + (VNx1HI "VNx1DI") (VNx2HI "VNx2DI") (VNx4HI "VNx4DI") (VNx8HI "VNx8DI") (VNx16HI "VNx16DI") ]) (define_mode_attr VINDEX_OCT_EXT [ - (VNx1QI "VNx1DI") (VNx2QI "VNx2DI") (VNx4QI "VNx4DI") (VNx8QI "VNx8DI") + (VNx1QI "VNx1DI") (VNx2QI "VNx2DI") (VNx4QI "VNx4DI") (VNx8QI "VNx8DI") (VNx16QI "VNx16DI") ]) (define_mode_attr VCONVERT [ - (VNx1SF "VNx1SI") (VNx2SF "VNx2SI") (VNx4SF "VNx4SI") (VNx8SF "VNx8SI") (VNx16SF "VNx16SI") - (VNx1DF "VNx1DI") (VNx2DF "VNx2DI") (VNx4DF "VNx4DI") (VNx8DF "VNx8DI") + (VNx1SF "VNx1SI") (VNx2SF "VNx2SI") (VNx4SF "VNx4SI") (VNx8SF "VNx8SI") (VNx16SF "VNx16SI") (VNx32SF "VNx32SI") + (VNx1DF "VNx1DI") (VNx2DF "VNx2DI") (VNx4DF "VNx4DI") (VNx8DF "VNx8DI") (VNx16DF "VNx16DI") ]) (define_mode_attr VNCONVERT [ - (VNx1SF "VNx1HI") (VNx2SF "VNx2HI") (VNx4SF "VNx4HI") (VNx8SF "VNx8HI") (VNx16SF "VNx16HI") - (VNx1DI "VNx1SF") (VNx2DI "VNx2SF") (VNx4DI "VNx4SF") (VNx8DI "VNx8SF") - (VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") + (VNx1SF "VNx1HI") (VNx2SF "VNx2HI") (VNx4SF "VNx4HI") (VNx8SF "VNx8HI") (VNx16SF "VNx16HI") (VNx32SF "VNx32HI") + (VNx1DI "VNx1SF") (VNx2DI "VNx2SF") (VNx4DI "VNx4SF") (VNx8DI "VNx8SF") (VNx16DI "VNx16SF") + (VNx1DF "VNx1SI") (VNx2DF "VNx2SI") (VNx4DF "VNx4SI") (VNx8DF "VNx8SI") (VNx16DF "VNx16SI") ]) (define_mode_attr VLMUL1 [ + (VNx1QI "VNx16QI") (VNx2QI "VNx16QI") (VNx4QI "VNx16QI") + (VNx8QI "VNx16QI") (VNx16QI "VNx16QI") (VNx32QI "VNx16QI") (VNx64QI "VNx16QI") (VNx128QI "VNx16QI") + (VNx1HI "VNx8HI") (VNx2HI "VNx8HI") (VNx4HI "VNx8HI") + (VNx8HI "VNx8HI") (VNx16HI "VNx8HI") (VNx32HI "VNx8HI") (VNx64HI "VNx8HI") + (VNx1SI "VNx4SI") (VNx2SI "VNx4SI") (VNx4SI "VNx4SI") + (VNx8SI "VNx4SI") (VNx16SI "VNx4SI") (VNx32SI "VNx4SI") + (VNx1DI "VNx2DI") (VNx2DI "VNx2DI") + (VNx4DI "VNx2DI") (VNx8DI "VNx2DI") (VNx16DI "VNx2DI") + (VNx1SF "VNx4SF") (VNx2SF "VNx4SF") + (VNx4SF "VNx4SF") (VNx8SF "VNx4SF") (VNx16SF "VNx4SF") (VNx32SF "VNx4SF") + (VNx1DF "VNx2DF") (VNx2DF "VNx2DF") + (VNx4DF "VNx2DF") (VNx8DF "VNx2DF") (VNx16DF "VNx2DF") +]) + +(define_mode_attr VLMUL1_ZVE64 [ (VNx1QI "VNx8QI") (VNx2QI "VNx8QI") (VNx4QI "VNx8QI") (VNx8QI "VNx8QI") (VNx16QI "VNx8QI") (VNx32QI "VNx8QI") (VNx64QI "VNx8QI") (VNx1HI "VNx4HI") (VNx2HI "VNx4HI") (VNx4HI "VNx4HI") @@ -680,6 +752,17 @@ ]) (define_mode_attr VWLMUL1 [ + (VNx1QI "VNx8HI") (VNx2QI "VNx8HI") (VNx4QI "VNx8HI") + (VNx8QI "VNx8HI") (VNx16QI "VNx8HI") (VNx32QI "VNx8HI") (VNx64QI "VNx8HI") (VNx128QI "VNx8HI") + (VNx1HI "VNx4SI") (VNx2HI "VNx4SI") (VNx4HI "VNx4SI") + (VNx8HI "VNx4SI") (VNx16HI "VNx4SI") (VNx32HI "VNx4SI") (VNx64HI "VNx4SI") + (VNx1SI "VNx2DI") (VNx2SI "VNx2DI") (VNx4SI "VNx2DI") + (VNx8SI "VNx2DI") (VNx16SI "VNx2DI") (VNx32SI "VNx2DI") + (VNx1SF "VNx2DF") (VNx2SF "VNx2DF") + (VNx4SF "VNx2DF") (VNx8SF "VNx2DF") (VNx16SF "VNx2DF") (VNx32SF "VNx2DF") +]) + +(define_mode_attr VWLMUL1_ZVE64 [ (VNx1QI "VNx4HI") (VNx2QI "VNx4HI") (VNx4QI "VNx4HI") (VNx8QI "VNx4HI") (VNx16QI "VNx4HI") (VNx32QI "VNx4HI") (VNx64QI "VNx4HI") (VNx1HI "VNx2SI") (VNx2HI "VNx2SI") (VNx4HI "VNx2SI") @@ -698,6 +781,21 @@ ]) (define_mode_attr vlmul1 [ + (VNx1QI "vnx16qi") (VNx2QI "vnx16qi") (VNx4QI "vnx16qi") + (VNx8QI "vnx16qi") (VNx16QI "vnx16qi") (VNx32QI "vnx16qi") (VNx64QI "vnx16qi") (VNx128QI "vnx16qi") + (VNx1HI "vnx8hi") (VNx2HI "vnx8hi") (VNx4HI "vnx8hi") + (VNx8HI "vnx8hi") (VNx16HI "vnx8hi") (VNx32HI "vnx8hi") (VNx64HI "vnx8hi") + (VNx1SI "vnx4si") (VNx2SI "vnx4si") (VNx4SI "vnx4si") + (VNx8SI "vnx4si") (VNx16SI "vnx4si") (VNx32SI "vnx4si") + (VNx1DI "vnx2di") (VNx2DI "vnx2di") + (VNx4DI "vnx2di") (VNx8DI "vnx2di") (VNx16DI "vnx2di") + (VNx1SF "vnx4sf") (VNx2SF "vnx4sf") + (VNx4SF "vnx4sf") (VNx8SF "vnx4sf") (VNx16SF "vnx4sf") (VNx32SF "vnx4sf") + (VNx1DF "vnx2df") (VNx2DF "vnx2df") + (VNx4DF "vnx2df") (VNx8DF "vnx2df") (VNx16DF "vnx2df") +]) + +(define_mode_attr vlmul1_zve64 [ (VNx1QI "vnx8qi") (VNx2QI "vnx8qi") (VNx4QI "vnx8qi") (VNx8QI "vnx8qi") (VNx16QI "vnx8qi") (VNx32QI "vnx8qi") (VNx64QI "vnx8qi") (VNx1HI "vnx4hi") (VNx2HI "vnx4hi") (VNx4HI "vnx4hi") @@ -724,12 +822,23 @@ ]) (define_mode_attr vwlmul1 [ + (VNx1QI "vnx8hi") (VNx2QI "vnx8hi") (VNx4QI "vnx8hi") + (VNx8QI "vnx8hi") (VNx16QI "vnx8hi") (VNx32QI "vnx8hi") (VNx64QI "vnx8hi") (VNx128QI "vnx8hi") + (VNx1HI "vnx4si") (VNx2HI "vnx4si") (VNx4HI "vnx4si") + (VNx8HI "vnx4si") (VNx16HI "vnx4si") (VNx32HI "vnx4si") (VNx64HI "vnx4si") + (VNx1SI "vnx2di") (VNx2SI "vnx2di") (VNx4SI "vnx2di") + (VNx8SI "vnx2di") (VNx16SI "vnx2di") (VNx32SI "vnx2di") + (VNx1SF "vnx2df") (VNx2SF "vnx2df") + (VNx4SF "vnx2df") (VNx8SF "vnx2df") (VNx16SF "vnx2df") (VNx32SF "vnx2df") +]) + +(define_mode_attr vwlmul1_zve64 [ (VNx1QI "vnx4hi") (VNx2QI "vnx4hi") (VNx4QI "vnx4hi") (VNx8QI "vnx4hi") (VNx16QI "vnx4hi") (VNx32QI "vnx4hi") (VNx64QI "vnx4hi") (VNx1HI "vnx2si") (VNx2HI "vnx2si") (VNx4HI "vnx2si") - (VNx8HI "vnx2si") (VNx16HI "vnx2si") (VNx32HI "vnx2si") - (VNx1SI "vnx2di") (VNx2SI "vnx2di") (VNx4SI "vnx2di") - (VNx8SI "vnx2di") (VNx16SI "vnx2di") + (VNx8HI "vnx2si") (VNx16HI "vnx2si") (VNx32HI "vnx2SI") + (VNx1SI "vnx1di") (VNx2SI "vnx1di") (VNx4SI "vnx1di") + (VNx8SI "vnx1di") (VNx16SI "vnx1di") (VNx1SF "vnx1df") (VNx2SF "vnx1df") (VNx4SF "vnx1df") (VNx8SF "vnx1df") (VNx16SF "vnx1df") ]) @@ -738,17 +847,17 @@ (VNx1QI "vnx2hi") (VNx2QI "vnx2hi") (VNx4QI "vnx2hi") (VNx8QI "vnx2hi") (VNx16QI "vnx2hi") (VNx32QI "vnx2hi") (VNx1HI "vnx1si") (VNx2HI "vnx1si") (VNx4HI "vnx1si") - (VNx8HI "vnx1si") (VNx16HI "vnx1si") + (VNx8HI "vnx1si") (VNx16HI "vnx1SI") ]) (define_mode_attr VDEMOTE [ (VNx1DI "VNx2SI") (VNx2DI "VNx4SI") - (VNx4DI "VNx8SI") (VNx8DI "VNx16SI") + (VNx4DI "VNx8SI") (VNx8DI "VNx16SI") (VNx16DI "VNx32SI") ]) (define_mode_attr VMDEMOTE [ (VNx1DI "VNx2BI") (VNx2DI "VNx4BI") - (VNx4DI "VNx8BI") (VNx8DI "VNx16BI") + (VNx4DI "VNx8BI") (VNx8DI "VNx16BI") (VNx16DI "VNx32BI") ]) (define_int_iterator WREDUC [UNSPEC_WREDUC_SUM UNSPEC_WREDUC_USUM]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 0ecca98..0fda11e 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -82,15 +82,16 @@ ;; is no field for ratio in the vsetvl instruction encoding. (define_attr "sew" "" (cond [(eq_attr "mode" "VNx1QI,VNx2QI,VNx4QI,VNx8QI,VNx16QI,VNx32QI,VNx64QI,\ - VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI") + VNx1BI,VNx2BI,VNx4BI,VNx8BI,VNx16BI,VNx32BI,VNx64BI,\ + VNx128QI,VNx128BI") (const_int 8) - (eq_attr "mode" "VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI") + (eq_attr "mode" "VNx1HI,VNx2HI,VNx4HI,VNx8HI,VNx16HI,VNx32HI,VNx64HI") (const_int 16) - (eq_attr "mode" "VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,\ - VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF") + (eq_attr "mode" "VNx1SI,VNx2SI,VNx4SI,VNx8SI,VNx16SI,VNx32SI,\ + VNx1SF,VNx2SF,VNx4SF,VNx8SF,VNx16SF,VNx32SF") (const_int 32) - (eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,\ - VNx1DF,VNx2DF,VNx4DF,VNx8DF") + (eq_attr "mode" "VNx1DI,VNx2DI,VNx4DI,VNx8DI,VNx16DI,\ + VNx1DF,VNx2DF,VNx4DF,VNx8DF,VNx16DF") (const_int 64)] (const_int INVALID_ATTRIBUTE))) @@ -110,6 +111,8 @@ (symbol_ref "riscv_vector::get_vlmul(E_VNx32QImode)") (eq_attr "mode" "VNx64QI,VNx64BI") (symbol_ref "riscv_vector::get_vlmul(E_VNx64QImode)") + (eq_attr "mode" "VNx128QI,VNx128BI") + (symbol_ref "riscv_vector::get_vlmul(E_VNx128QImode)") (eq_attr "mode" "VNx1HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx1HImode)") (eq_attr "mode" "VNx2HI") @@ -122,6 +125,8 @@ (symbol_ref "riscv_vector::get_vlmul(E_VNx16HImode)") (eq_attr "mode" "VNx32HI") (symbol_ref "riscv_vector::get_vlmul(E_VNx32HImode)") + (eq_attr "mode" "VNx64HI") + (symbol_ref "riscv_vector::get_vlmul(E_VNx64HImode)") (eq_attr "mode" "VNx1SI,VNx1SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx1SImode)") (eq_attr "mode" "VNx2SI,VNx2SF") @@ -132,6 +137,8 @@ (symbol_ref "riscv_vector::get_vlmul(E_VNx8SImode)") (eq_attr "mode" "VNx16SI,VNx16SF") (symbol_ref "riscv_vector::get_vlmul(E_VNx16SImode)") + (eq_attr "mode" "VNx32SI,VNx32SF") + (symbol_ref "riscv_vector::get_vlmul(E_VNx32SImode)") (eq_attr "mode" "VNx1DI,VNx1DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx1DImode)") (eq_attr "mode" "VNx2DI,VNx2DF") @@ -139,7 +146,9 @@ (eq_attr "mode" "VNx4DI,VNx4DF") (symbol_ref "riscv_vector::get_vlmul(E_VNx4DImode)") (eq_attr "mode" "VNx8DI,VNx8DF") - (symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)")] + (symbol_ref "riscv_vector::get_vlmul(E_VNx8DImode)") + (eq_attr "mode" "VNx16DI,VNx16DF") + (symbol_ref "riscv_vector::get_vlmul(E_VNx16DImode)")] (const_int INVALID_ATTRIBUTE))) ;; It is valid for instruction that require sew/lmul ratio. @@ -173,6 +182,8 @@ (symbol_ref "riscv_vector::get_ratio(E_VNx32QImode)") (eq_attr "mode" "VNx64QI,VNx64BI") (symbol_ref "riscv_vector::get_ratio(E_VNx64QImode)") + (eq_attr "mode" "VNx128QI,VNx128BI") + (symbol_ref "riscv_vector::get_ratio(E_VNx128QImode)") (eq_attr "mode" "VNx1HI") (symbol_ref "riscv_vector::get_ratio(E_VNx1HImode)") (eq_attr "mode" "VNx2HI") @@ -185,6 +196,8 @@ (symbol_ref "riscv_vector::get_ratio(E_VNx16HImode)") (eq_attr "mode" "VNx32HI") (symbol_ref "riscv_vector::get_ratio(E_VNx32HImode)") + (eq_attr "mode" "VNx64HI") + (symbol_ref "riscv_vector::get_ratio(E_VNx64HImode)") (eq_attr "mode" "VNx1SI,VNx1SF") (symbol_ref "riscv_vector::get_ratio(E_VNx1SImode)") (eq_attr "mode" "VNx2SI,VNx2SF") @@ -195,6 +208,8 @@ (symbol_ref "riscv_vector::get_ratio(E_VNx8SImode)") (eq_attr "mode" "VNx16SI,VNx16SF") (symbol_ref "riscv_vector::get_ratio(E_VNx16SImode)") + (eq_attr "mode" "VNx32SI,VNx32SF") + (symbol_ref "riscv_vector::get_ratio(E_VNx32SImode)") (eq_attr "mode" "VNx1DI,VNx1DF") (symbol_ref "riscv_vector::get_ratio(E_VNx1DImode)") (eq_attr "mode" "VNx2DI,VNx2DF") @@ -202,7 +217,9 @@ (eq_attr "mode" "VNx4DI,VNx4DF") (symbol_ref "riscv_vector::get_ratio(E_VNx4DImode)") (eq_attr "mode" "VNx8DI,VNx8DF") - (symbol_ref "riscv_vector::get_ratio(E_VNx8DImode)")] + (symbol_ref "riscv_vector::get_ratio(E_VNx8DImode)") + (eq_attr "mode" "VNx16DI,VNx16DF") + (symbol_ref "riscv_vector::get_ratio(E_VNx16DImode)")] (const_int INVALID_ATTRIBUTE))) ;; The index of operand[] to get the merge op. @@ -1633,7 +1650,7 @@ [(set_attr "type" "vstx") (set_attr "mode" "")]) -(define_insn "@pred_indexed_store" +(define_insn "@pred_indexed_store" [(set (mem:BLK (scratch)) (unspec:BLK [(unspec: @@ -1643,14 +1660,14 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (match_operand 1 "pmode_register_operand" " r") - (match_operand:VNX32_QHI 2 "register_operand" " vr") - (match_operand:VNX32_QH 3 "register_operand" " vr")] ORDER))] + (match_operand:VNX32_QHSI 2 "register_operand" " vr") + (match_operand:VNX32_QHS 3 "register_operand" " vr")] ORDER))] "TARGET_VECTOR" - "vsxei.v\t%3,(%1),%2%p0" + "vsxei.v\t%3,(%1),%2%p0" [(set_attr "type" "vstx") - (set_attr "mode" "")]) + (set_attr "mode" "")]) -(define_insn "@pred_indexed_store" +(define_insn "@pred_indexed_store" [(set (mem:BLK (scratch)) (unspec:BLK [(unspec: @@ -1660,12 +1677,29 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (match_operand 1 "pmode_register_operand" " r") - (match_operand:VNX64_Q 2 "register_operand" " vr") - (match_operand:VNX64_Q 3 "register_operand" " vr")] ORDER))] + (match_operand:VNX64_QHI 2 "register_operand" " vr") + (match_operand:VNX64_QH 3 "register_operand" " vr")] ORDER))] "TARGET_VECTOR" - "vsxei.v\t%3,(%1),%2%p0" + "vsxei.v\t%3,(%1),%2%p0" [(set_attr "type" "vstx") - (set_attr "mode" "")]) + (set_attr "mode" "")]) + +(define_insn "@pred_indexed_store" + [(set (mem:BLK (scratch)) + (unspec:BLK + [(unspec: + [(match_operand: 0 "vector_mask_operand" "vmWc1") + (match_operand 4 "vector_length_operand" " rK") + (match_operand 5 "const_int_operand" " i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand 1 "pmode_register_operand" " r") + (match_operand:VNX128_Q 2 "register_operand" " vr") + (match_operand:VNX128_Q 3 "register_operand" " vr")] ORDER))] + "TARGET_VECTOR" + "vsxei.v\t%3,(%1),%2%p0" + [(set_attr "type" "vstx") + (set_attr "mode" "")]) ;; ------------------------------------------------------------------------------- ;; ---- Predicated integer binary operations @@ -6746,23 +6780,45 @@ ;; For example, The LMUL = 1 corresponding mode of VNx16QImode is VNx4QImode ;; for -march=rv*zve32* wheras VNx8QImode for -march=rv*zve64* (define_insn "@pred_reduc_" - [(set (match_operand: 0 "register_operand" "=vd, vd, vr, vr") + [(set (match_operand: 0 "register_operand" "=vr, vr") (unspec: [(unspec: - [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_reduc:VI (vec_duplicate:VI (vec_select: - (match_operand: 4 "register_operand" " vr, vr, vr, vr") + (match_operand: 4 "register_operand" " vr, vr") (parallel [(const_int 0)]))) - (match_operand:VI 3 "register_operand" " vr, vr, vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN > 32" + (match_operand:VI 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] + "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" + "vred.vs\t%0,%3,%4%p1" + [(set_attr "type" "vired") + (set_attr "mode" "")]) + +(define_insn "@pred_reduc_" + [(set (match_operand: 0 "register_operand" "=vr, vr") + (unspec: + [(unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (any_reduc:VI_ZVE64 + (vec_duplicate:VI_ZVE64 + (vec_select: + (match_operand: 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]))) + (match_operand:VI_ZVE64 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] + "TARGET_VECTOR && TARGET_MIN_VLEN == 64" "vred.vs\t%0,%3,%4%p1" [(set_attr "type" "vired") (set_attr "mode" "")]) @@ -6802,11 +6858,30 @@ (match_operand:VWI 3 "register_operand" " vr, vr") (match_operand: 4 "register_operand" " vr, vr") (match_operand: 2 "vector_merge_operand" " vu, 0")] WREDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN > 32" + "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" "vwredsum.vs\t%0,%3,%4%p1" [(set_attr "type" "viwred") (set_attr "mode" "")]) +(define_insn "@pred_widen_reduc_plus" + [(set (match_operand: 0 "register_operand" "=&vr, &vr") + (unspec: + [(unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VWI_ZVE64 3 "register_operand" " vr, vr") + (match_operand: 4 "register_operand" " vr, vr") + (match_operand: 2 "vector_merge_operand" " vu, 0")] WREDUC))] + "TARGET_VECTOR && TARGET_MIN_VLEN == 64" + "vwredsum.vs\t%0,%3,%4%p1" + [(set_attr "type" "viwred") + (set_attr "mode" "")]) + + (define_insn "@pred_widen_reduc_plus" [(set (match_operand: 0 "register_operand" "=&vr, &vr") (unspec: @@ -6826,23 +6901,45 @@ (set_attr "mode" "")]) (define_insn "@pred_reduc_" - [(set (match_operand: 0 "register_operand" "=vd, vd, vr, vr") + [(set (match_operand: 0 "register_operand" "=vr, vr") (unspec: [(unspec: - [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (any_freduc:VF (vec_duplicate:VF (vec_select: - (match_operand: 4 "register_operand" " vr, vr, vr, vr") + (match_operand: 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]))) + (match_operand:VF 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] + "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" + "vfred.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfredu") + (set_attr "mode" "")]) + +(define_insn "@pred_reduc_" + [(set (match_operand: 0 "register_operand" "=vr, vr") + (unspec: + [(unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (any_freduc:VF_ZVE64 + (vec_duplicate:VF_ZVE64 + (vec_select: + (match_operand: 4 "register_operand" " vr, vr") (parallel [(const_int 0)]))) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC))] - "TARGET_VECTOR && TARGET_MIN_VLEN > 32" + (match_operand:VF_ZVE64 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC))] + "TARGET_VECTOR && TARGET_MIN_VLEN == 64" "vfred.vs\t%0,%3,%4%p1" [(set_attr "type" "vfredu") (set_attr "mode" "")]) @@ -6870,24 +6967,47 @@ (set_attr "mode" "")]) (define_insn "@pred_reduc_plus" - [(set (match_operand: 0 "register_operand" "=vd, vd, vr, vr") + [(set (match_operand: 0 "register_operand" "=vr, vr") (unspec: [(unspec: [(unspec: - [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") - (match_operand 5 "vector_length_operand" " rK, rK, rK, rK") - (match_operand 6 "const_int_operand" " i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i") + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VF (vec_duplicate:VF (vec_select: - (match_operand: 4 "register_operand" " vr, vr, vr, vr") + (match_operand: 4 "register_operand" " vr, vr") (parallel [(const_int 0)]))) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand: 2 "vector_merge_operand" " vu, 0, vu, 0")] UNSPEC_REDUC)] ORDER))] - "TARGET_VECTOR && TARGET_MIN_VLEN > 32" + (match_operand:VF 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))] + "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" + "vfredsum.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfred") + (set_attr "mode" "")]) + +(define_insn "@pred_reduc_plus" + [(set (match_operand: 0 "register_operand" "=vr, vr") + (unspec: + [(unspec: + [(unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:VF_ZVE64 + (vec_duplicate:VF_ZVE64 + (vec_select: + (match_operand: 4 "register_operand" " vr, vr") + (parallel [(const_int 0)]))) + (match_operand:VF_ZVE64 3 "register_operand" " vr, vr")) + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_REDUC)] ORDER))] + "TARGET_VECTOR && TARGET_MIN_VLEN == 64" "vfredsum.vs\t%0,%3,%4%p1" [(set_attr "type" "vfred") (set_attr "mode" "")]) @@ -6929,7 +7049,26 @@ (match_operand:VWF 3 "register_operand" " vr, vr") (match_operand: 4 "register_operand" " vr, vr") (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_WREDUC_SUM)] ORDER))] - "TARGET_VECTOR && TARGET_MIN_VLEN > 32" + "TARGET_VECTOR && TARGET_MIN_VLEN >= 128" + "vfwredsum.vs\t%0,%3,%4%p1" + [(set_attr "type" "vfwred") + (set_attr "mode" "")]) + +(define_insn "@pred_widen_reduc_plus" + [(set (match_operand: 0 "register_operand" "=&vr, &vr") + (unspec: + [(unspec: + [(unspec: + [(match_operand: 1 "vector_mask_operand" "vmWc1,vmWc1") + (match_operand 5 "vector_length_operand" " rK, rK") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:VWF_ZVE64 3 "register_operand" " vr, vr") + (match_operand: 4 "register_operand" " vr, vr") + (match_operand: 2 "vector_merge_operand" " vu, 0")] UNSPEC_WREDUC_SUM)] ORDER))] + "TARGET_VECTOR && TARGET_MIN_VLEN == 64" "vfwredsum.vs\t%0,%3,%4%p1" [(set_attr "type" "vfwred") (set_attr "mode" "")]) diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-4.c index ea3c360..6e4d1cb 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr108185-4.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64gcv -mabi=lp64 -O3" } */ +/* { dg-options "-march=rv64gc_zve64d -mabi=lp64 -O3" } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c index 2f2d858..3b11e55 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv32gc_zve64d -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c index f522349..aa2e5e7 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-11.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-msave-restore -march=rv32gcv -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-options "-msave-restore -march=rv32gc_zve64d -mabi=ilp32 -msave-restore -fno-schedule-insns -fno-schedule-insns2 -O3" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c index 4bcaf4d..567aa56 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-2.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv32gc_zve64d -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c index 82d685e..2c1213b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-3.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv32gc_zve64d -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c index 5b3f75f..a687406 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-5.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-options "-march=rv32gc_zve64d -mabi=ilp32 -mpreferred-stack-boundary=3 -O3 -fno-schedule-insns -fno-schedule-insns2" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c index 7111113..ec67357 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-9.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */ +/* { dg-options "-march=rv32gc_zve64d -mabi=ilp32 -fno-schedule-insns -fno-schedule-insns2 -O3" } */ /* { dg-final { check-function-bodies "**" "" } } */ #include "riscv_vector.h" -- cgit v1.1 From ed32ec26697cc77492d094b31a0d2eebc0535644 Mon Sep 17 00:00:00 2001 From: Jason Merrill Date: Tue, 18 Apr 2023 17:12:17 -0400 Subject: c++: fix 'unsigned __int128_t' semantics [PR108099] My earlier patch for 108099 made us accept this non-standard pattern but messed up the semantics, so that e.g. unsigned __int128_t was not a 128-bit type. PR c++/108099 gcc/cp/ChangeLog: * decl.cc (grokdeclarator): Keep typedef_decl for __int128_t. gcc/testsuite/ChangeLog: * g++.dg/ext/int128-8.C: New test. --- gcc/cp/decl.cc | 6 ++++-- gcc/testsuite/g++.dg/ext/int128-8.C | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/g++.dg/ext/int128-8.C (limited to 'gcc') diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index 772c059..ab5cb69 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -12482,12 +12482,14 @@ grokdeclarator (const cp_declarator *declarator, key, typedef_decl); ok = !flag_pedantic_errors; if (is_typedef_decl (typedef_decl)) - type = DECL_ORIGINAL_TYPE (typedef_decl); + { + type = DECL_ORIGINAL_TYPE (typedef_decl); + typedef_decl = NULL_TREE; + } else /* PR108099: __int128_t comes from c_common_nodes_and_builtins, and is not built as a typedef. */ type = TREE_TYPE (typedef_decl); - typedef_decl = NULL_TREE; } else if (declspecs->decltype_p) error_at (loc, "%qs specified with %", key); diff --git a/gcc/testsuite/g++.dg/ext/int128-8.C b/gcc/testsuite/g++.dg/ext/int128-8.C new file mode 100644 index 0000000..14bbc49 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/int128-8.C @@ -0,0 +1,24 @@ +// PR c++/108099 +// { dg-do compile { target c++11 } } +// { dg-options "" } + +using u128 = unsigned __int128_t; +using s128 = signed __int128_t; +template struct integral_constant { + static constexpr T value = v; +}; +typedef integral_constant false_type; +typedef integral_constant true_type; +template +struct is_same : false_type {}; +template +struct is_same : true_type {}; +static_assert (is_same <__int128, s128>::value, ""); +static_assert (is_same ::value, ""); +static_assert (is_same <__int128_t, s128>::value, ""); +static_assert (is_same ::value, ""); // { dg-bogus "" "" { xfail *-*-* } } +static_assert (is_same <__uint128_t, u128>::value, ""); // { dg-bogus "" "" { xfail *-*-* } } +static_assert (sizeof (s128) == sizeof (__int128), ""); +static_assert (sizeof (u128) == sizeof (unsigned __int128), ""); +static_assert (s128(-1) < 0, ""); +static_assert (u128(-1) > 0, ""); -- cgit v1.1 From 04a9209dc865dafe3c9615f5c868aa3fd89b96cf Mon Sep 17 00:00:00 2001 From: Andrew Pinski Date: Thu, 13 Apr 2023 00:40:40 +0000 Subject: i386: Add new pattern for zero-extend cmov After a phiopt change, I got a failure of cmov9.c. The RTL IR has zero_extend on the outside of the if_then_else rather than on the side. Both ways are considered canonical as mentioned in PR 66588. This fixes the failure I got and also adds a testcase which fails before even my phiopt patch but will pass with this patch. OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions. gcc/ChangeLog: * config/i386/i386.md (*movsicc_noc_zext_1): New pattern. gcc/testsuite/ChangeLog: * gcc.target/i386/cmov10.c: New test. * gcc.target/i386/cmov11.c: New test. --- gcc/config/i386/i386.md | 16 ++++++++++++++++ gcc/testsuite/gcc.target/i386/cmov10.c | 10 ++++++++++ gcc/testsuite/gcc.target/i386/cmov11.c | 10 ++++++++++ 3 files changed, 36 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/cmov10.c create mode 100644 gcc/testsuite/gcc.target/i386/cmov11.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0f95d8e..01d5199 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -22033,6 +22033,22 @@ [(set_attr "type" "icmov") (set_attr "mode" "SI")]) +(define_insn "*movsicc_noc_zext_1" + [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r") + (zero_extend:DI + (if_then_else:SI (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SI 2 "nonimmediate_operand" "rm,0") + (match_operand:SI 3 "nonimmediate_operand" "0,rm"))))] + "TARGET_64BIT + && TARGET_CMOVE && !(MEM_P (operands[2]) && MEM_P (operands[3]))" + "@ + cmov%O2%C1\t{%2, %k0|%k0, %2} + cmov%O2%c1\t{%3, %k0|%k0, %3}" + [(set_attr "type" "icmov") + (set_attr "mode" "SI")]) + + ;; Don't do conditional moves with memory inputs. This splitter helps ;; register starved x86_32 by forcing inputs into registers before reload. (define_split diff --git a/gcc/testsuite/gcc.target/i386/cmov10.c b/gcc/testsuite/gcc.target/i386/cmov10.c new file mode 100644 index 0000000..9ba23b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmov10.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -dp" } */ +/* { dg-final { scan-assembler-not "zero_extendsidi" } } */ + + +void foo (unsigned long long *d, int a, unsigned int b, unsigned int c) +{ + *d = a ? b : c; +} + diff --git a/gcc/testsuite/gcc.target/i386/cmov11.c b/gcc/testsuite/gcc.target/i386/cmov11.c new file mode 100644 index 0000000..ba8a5e6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmov11.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -dp" } */ +/* { dg-final { scan-assembler-not "zero_extendsidi" } } */ + +unsigned long long foo (int a, unsigned b, unsigned c) +{ + unsigned t = a ? b : c; + return t; +} + -- cgit v1.1 From 6fc8e25cb6b5d720bedd85194b0ad740d75082f4 Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Tue, 18 Apr 2023 21:24:20 +0200 Subject: testsuite: fix scan-tree-dump patterns [PR83904,PR100297] Adjust scan-tree-dump patterns so that they do not accidentally match a valid path. gcc/testsuite/ChangeLog: PR testsuite/83904 PR fortran/100297 * gfortran.dg/allocatable_function_1.f90: Use "__builtin_free " instead of the naive "free". * gfortran.dg/reshape_8.f90: Extend pattern from a simple "data". --- gcc/testsuite/gfortran.dg/allocatable_function_1.f90 | 2 +- gcc/testsuite/gfortran.dg/reshape_8.f90 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gfortran.dg/allocatable_function_1.f90 b/gcc/testsuite/gfortran.dg/allocatable_function_1.f90 index f96ebc4..e38953b 100644 --- a/gcc/testsuite/gfortran.dg/allocatable_function_1.f90 +++ b/gcc/testsuite/gfortran.dg/allocatable_function_1.f90 @@ -107,4 +107,4 @@ contains end function bar end program alloc_fun -! { dg-final { scan-tree-dump-times "free" 10 "original" } } +! { dg-final { scan-tree-dump-times "__builtin_free " 10 "original" } } diff --git a/gcc/testsuite/gfortran.dg/reshape_8.f90 b/gcc/testsuite/gfortran.dg/reshape_8.f90 index 01799ac..5681212 100644 --- a/gcc/testsuite/gfortran.dg/reshape_8.f90 +++ b/gcc/testsuite/gfortran.dg/reshape_8.f90 @@ -11,4 +11,4 @@ program test a = reshape([1,2,3,4], [2,0]) print *, a end -! { dg-final { scan-tree-dump-times "data" 4 "original" } } +! { dg-final { scan-tree-dump-not "data..0. =" "original" } } -- cgit v1.1 From 5e284ebbc3082c5a8974d24e3a0977aa48f3cc60 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 19 Apr 2023 13:07:46 -0400 Subject: c++: bad ggc_free in try_class_unification [PR109556] Aside from correcting how try_class_unification copies multi-dimensional 'targs', r13-377-g3e948d645bc908 also made it ggc_free this copy as an optimization. But this is wrong since the call to unify within might've captured the args in persistent memory such as the satisfaction cache (as part of constrained auto deduction). PR c++/109556 gcc/cp/ChangeLog: * pt.cc (try_class_unification): Don't ggc_free the copy of 'targs'. gcc/testsuite/ChangeLog: * g++.dg/cpp2a/concepts-placeholder13.C: New test. --- gcc/cp/pt.cc | 5 ----- gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C (limited to 'gcc') diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index e065ace..68a056a 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -23895,11 +23895,6 @@ try_class_unification (tree tparms, tree targs, tree parm, tree arg, err = unify (tparms, targs, CLASSTYPE_TI_ARGS (parm), CLASSTYPE_TI_ARGS (arg), UNIFY_ALLOW_NONE, explain_p); - if (TMPL_ARGS_HAVE_MULTIPLE_LEVELS (targs)) - for (tree level : tree_vec_range (targs)) - ggc_free (level); - ggc_free (targs); - return err ? NULL_TREE : arg; } diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C new file mode 100644 index 0000000..ac9f845 --- /dev/null +++ b/gcc/testsuite/g++.dg/cpp2a/concepts-placeholder13.C @@ -0,0 +1,18 @@ +// PR c++/109556 +// { dg-do compile { target c++20 } } + +template +concept C = (N != 0); + +template +struct A { }; + +template auto M> +void f(A); + +int main() { + f(A<1, 42>{}); + f(A<2, 42>{}); + f(A<1, 43>{}); + f(A<2, 43>{}); +} -- cgit v1.1 From 58b7dbf865b146a4e65dbda9be6df78f212c03b6 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Wed, 19 Apr 2023 15:36:34 -0400 Subject: c++: Define built-in for std::tuple_element [PR100157] This adds a new built-in to replace the recursive class template instantiations done by traits such as std::tuple_element and std::variant_alternative. The purpose is to select the Nth type from a list of types, e.g. __type_pack_element<1, char, int, float> is int. We implement it as a special kind of TRAIT_TYPE. For a pathological example tuple_element_t<1000, tuple<2000 types...>> the compilation time is reduced by more than 90% and the memory used by the compiler is reduced by 97%. In realistic examples the gains will be much smaller, but still relevant. Unlike the other built-in traits, __type_pack_element uses template-id syntax instead of call syntax and is SFINAE-enabled, matching Clang's implementation. And like the other built-in traits, it's not mangleable so we can't use it directly in function signatures. N.B. Clang seems to implement __type_pack_element as a first-class template that can e.g. be used as a template-template argument. For simplicity we implement it in a more ad-hoc way. Co-authored-by: Jonathan Wakely PR c++/100157 gcc/cp/ChangeLog: * cp-trait.def (TYPE_PACK_ELEMENT): Define. * cp-tree.h (finish_trait_type): Add complain parameter. * cxx-pretty-print.cc (pp_cxx_trait): Handle CPTK_TYPE_PACK_ELEMENT. * parser.cc (cp_parser_constant_expression): Document default arguments. (cp_parser_trait): Handle CPTK_TYPE_PACK_ELEMENT. Pass tf_warning_or_error to finish_trait_type. * pt.cc (tsubst) : Handle non-type first argument. Pass complain to finish_trait_type. * semantics.cc (finish_type_pack_element): Define. (finish_trait_type): Add complain parameter. Handle CPTK_TYPE_PACK_ELEMENT. * tree.cc (strip_typedefs): Handle non-type first argument. Pass tf_warning_or_error to finish_trait_type. * typeck.cc (structural_comptypes) : Use cp_tree_equal instead of same_type_p for the first argument. libstdc++-v3/ChangeLog: * include/bits/utility.h (_Nth_type): Conditionally define in terms of __type_pack_element if available. * testsuite/20_util/tuple/element_access/get_neg.cc: Prune additional errors from the new built-in. gcc/testsuite/ChangeLog: * g++.dg/ext/type_pack_element1.C: New test. * g++.dg/ext/type_pack_element2.C: New test. * g++.dg/ext/type_pack_element3.C: New test. --- gcc/cp/cp-trait.def | 1 + gcc/cp/cp-tree.h | 2 +- gcc/cp/cxx-pretty-print.cc | 21 ++++++++++--- gcc/cp/parser.cc | 44 ++++++++++++++++++++++----- gcc/cp/pt.cc | 8 +++-- gcc/cp/semantics.cc | 39 +++++++++++++++++++++++- gcc/cp/tree.cc | 10 ++++-- gcc/cp/typeck.cc | 2 +- gcc/testsuite/g++.dg/ext/type_pack_element1.C | 19 ++++++++++++ gcc/testsuite/g++.dg/ext/type_pack_element2.C | 14 +++++++++ gcc/testsuite/g++.dg/ext/type_pack_element3.C | 22 ++++++++++++++ 11 files changed, 162 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/g++.dg/ext/type_pack_element1.C create mode 100644 gcc/testsuite/g++.dg/ext/type_pack_element2.C create mode 100644 gcc/testsuite/g++.dg/ext/type_pack_element3.C (limited to 'gcc') diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index bac593c..8b7fece 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -91,6 +91,7 @@ DEFTRAIT_TYPE (REMOVE_CV, "__remove_cv", 1) DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1) DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1) DEFTRAIT_TYPE (UNDERLYING_TYPE, "__underlying_type", 1) +DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1) /* These traits yield a type pack, not a type, and are represented by cp_parser_trait as a special BASES tree instead of a TRAIT_TYPE tree. */ diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index a14eb8d..6c5920d 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -7761,7 +7761,7 @@ extern tree finish_decltype_type (tree, bool, tsubst_flags_t); extern tree fold_builtin_is_corresponding_member (location_t, int, tree *); extern tree fold_builtin_is_pointer_inverconvertible_with_class (location_t, int, tree *); extern tree finish_trait_expr (location_t, enum cp_trait_kind, tree, tree); -extern tree finish_trait_type (enum cp_trait_kind, tree, tree); +extern tree finish_trait_type (enum cp_trait_kind, tree, tree, tsubst_flags_t); extern tree build_lambda_expr (void); extern tree build_lambda_object (tree); extern tree begin_lambda_type (tree); diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc index 7f4556d..c339198 100644 --- a/gcc/cp/cxx-pretty-print.cc +++ b/gcc/cp/cxx-pretty-print.cc @@ -2625,11 +2625,19 @@ pp_cxx_trait (cxx_pretty_printer *pp, tree t) #undef DEFTRAIT } - pp_cxx_left_paren (pp); - if (TYPE_P (type1)) - pp->type_id (type1); + if (kind == CPTK_TYPE_PACK_ELEMENT) + { + pp_cxx_begin_template_argument_list (pp); + pp->expression (type1); + } else - pp->expression (type1); + { + pp_cxx_left_paren (pp); + if (TYPE_P (type1)) + pp->type_id (type1); + else + pp->expression (type1); + } if (type2) { if (TREE_CODE (type2) != TREE_LIST) @@ -2644,7 +2652,10 @@ pp_cxx_trait (cxx_pretty_printer *pp, tree t) pp->type_id (TREE_VALUE (arg)); } } - pp_cxx_right_paren (pp); + if (kind == CPTK_TYPE_PACK_ELEMENT) + pp_cxx_end_template_argument_list (pp); + else + pp_cxx_right_paren (pp); } // requires-clause: diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index a6341b9..ee1497b 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -10730,9 +10730,9 @@ cp_parser_expression (cp_parser* parser, cp_id_kind * pidk, static cp_expr cp_parser_constant_expression (cp_parser* parser, - int allow_non_constant_p, - bool *non_constant_p, - bool strict_p) + int allow_non_constant_p /* = 0 */, + bool *non_constant_p /* = NULL */, + bool strict_p /* = false */) { bool saved_integral_constant_expression_p; bool saved_allow_non_integral_constant_expression_p; @@ -10959,7 +10959,10 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) cp_lexer_consume_token (parser->lexer); matching_parens parens; - parens.require_open (parser); + if (kind == CPTK_TYPE_PACK_ELEMENT) + cp_parser_require (parser, CPP_LESS, RT_LESS); + else + parens.require_open (parser); if (kind == CPTK_IS_DEDUCIBLE) { @@ -10972,6 +10975,12 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) /*optional_p=*/false); type1 = cp_parser_lookup_name_simple (parser, type1, token->location); } + else if (kind == CPTK_TYPE_PACK_ELEMENT) + /* __type_pack_element takes an expression as its first argument and uses + template-id syntax instead of function call syntax (for consistency + with Clang). We special case these properties of __type_pack_element + here and elsewhere. */ + type1 = cp_parser_constant_expression (parser); else { type_id_in_expr_sentinel s (parser); @@ -10981,7 +10990,24 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) if (type1 == error_mark_node) return error_mark_node; - if (binary) + if (kind == CPTK_TYPE_PACK_ELEMENT) + { + cp_parser_require (parser, CPP_COMMA, RT_COMMA); + tree rest = cp_parser_enclosed_template_argument_list (parser); + for (tree elt : tree_vec_range (rest)) + { + if (!TYPE_P (elt)) + { + error_at (cp_expr_loc_or_input_loc (elt), + "trailing argument to %<__type_pack_element%> " + "is not a type"); + return error_mark_node; + } + type2 = tree_cons (NULL_TREE, elt, type2); + } + type2 = nreverse (type2); + } + else if (binary) { cp_parser_require (parser, CPP_COMMA, RT_COMMA); @@ -11012,7 +11038,11 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) } location_t finish_loc = cp_lexer_peek_token (parser->lexer)->location; - parens.require_close (parser); + if (kind == CPTK_TYPE_PACK_ELEMENT) + /* cp_parser_enclosed_template_argument_list above already took care + of parsing the closing '>'. */; + else + parens.require_close (parser); /* Construct a location of the form: __is_trivially_copyable(_Tp) @@ -11030,7 +11060,7 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) return cp_expr (finish_bases (type1, true), trait_loc); default: if (type) - return finish_trait_type (kind, type1, type2); + return finish_trait_type (kind, type1, type2, tf_warning_or_error); else return finish_trait_expr (trait_loc, kind, type1, type2); } diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 68a056a..f65f2d5 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -16696,9 +16696,13 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) case TRAIT_TYPE: { - tree type1 = tsubst (TRAIT_TYPE_TYPE1 (t), args, complain, in_decl); + tree type1 = TRAIT_TYPE_TYPE1 (t); + if (TYPE_P (type1)) + type1 = tsubst (type1, args, complain, in_decl); + else + type1 = tsubst_copy_and_build (type1, args, complain, in_decl); tree type2 = tsubst (TRAIT_TYPE_TYPE2 (t), args, complain, in_decl); - type = finish_trait_type (TRAIT_TYPE_KIND (t), type1, type2); + type = finish_trait_type (TRAIT_TYPE_KIND (t), type1, type2, complain); return cp_build_qualified_type (type, cp_type_quals (t) | cp_type_quals (type), complain | tf_ignore_bad_quals); diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index 45e0b0e..a4f30fd 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -4470,6 +4470,36 @@ finish_underlying_type (tree type) return underlying_type; } +/* Implement the __type_pack_element keyword: Return the type + at index IDX within TYPES. */ + +static tree +finish_type_pack_element (tree idx, tree types, tsubst_flags_t complain) +{ + idx = maybe_constant_value (idx); + if (TREE_CODE (idx) != INTEGER_CST || !INTEGRAL_TYPE_P (TREE_TYPE (idx))) + { + if (complain & tf_error) + error ("%<__type_pack_element%> index is not an integral constant"); + return error_mark_node; + } + HOST_WIDE_INT val = tree_to_shwi (idx); + if (val < 0) + { + if (complain & tf_error) + error ("%<__type_pack_element%> index is negative"); + return error_mark_node; + } + tree result = chain_index (val, types); + if (!result) + { + if (complain & tf_error) + error ("%<__type_pack_element%> index is out of range"); + return error_mark_node; + } + return TREE_VALUE (result); +} + /* Implement the __direct_bases keyword: Return the direct base classes of type. */ @@ -12245,7 +12275,8 @@ finish_trait_expr (location_t loc, cp_trait_kind kind, tree type1, tree type2) /* Process a trait type. */ tree -finish_trait_type (cp_trait_kind kind, tree type1, tree type2) +finish_trait_type (cp_trait_kind kind, tree type1, tree type2, + tsubst_flags_t complain) { if (type1 == error_mark_node || type2 == error_mark_node) @@ -12269,17 +12300,23 @@ finish_trait_type (cp_trait_kind kind, tree type1, tree type2) { case CPTK_UNDERLYING_TYPE: return finish_underlying_type (type1); + case CPTK_REMOVE_CV: return cv_unqualified (type1); + case CPTK_REMOVE_REFERENCE: if (TYPE_REF_P (type1)) type1 = TREE_TYPE (type1); return type1; + case CPTK_REMOVE_CVREF: if (TYPE_REF_P (type1)) type1 = TREE_TYPE (type1); return cv_unqualified (type1); + case CPTK_TYPE_PACK_ELEMENT: + return finish_type_pack_element (type1, type2, complain); + #define DEFTRAIT_EXPR(CODE, NAME, ARITY) \ case CPTK_##CODE: #include "cp-trait.def" diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc index 16b8fcb..2c22fac 100644 --- a/gcc/cp/tree.cc +++ b/gcc/cp/tree.cc @@ -1792,14 +1792,18 @@ strip_typedefs (tree t, bool *remove_attributes /* = NULL */, break; case TRAIT_TYPE: { - tree type1 = strip_typedefs (TRAIT_TYPE_TYPE1 (t), - remove_attributes, flags); + tree type1 = TRAIT_TYPE_TYPE1 (t); + if (TYPE_P (type1)) + type1 = strip_typedefs (type1, remove_attributes, flags); + else + type1 = strip_typedefs_expr (type1, remove_attributes, flags); tree type2 = strip_typedefs (TRAIT_TYPE_TYPE2 (t), remove_attributes, flags); if (type1 == TRAIT_TYPE_TYPE1 (t) && type2 == TRAIT_TYPE_TYPE2 (t)) result = NULL_TREE; else - result = finish_trait_type (TRAIT_TYPE_KIND (t), type1, type2); + result = finish_trait_type (TRAIT_TYPE_KIND (t), type1, type2, + tf_warning_or_error); } break; case TYPE_PACK_EXPANSION: diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc index 8b60cbb..53ac925 100644 --- a/gcc/cp/typeck.cc +++ b/gcc/cp/typeck.cc @@ -1632,7 +1632,7 @@ structural_comptypes (tree t1, tree t2, int strict) case TRAIT_TYPE: if (TRAIT_TYPE_KIND (t1) != TRAIT_TYPE_KIND (t2)) return false; - if (!same_type_p (TRAIT_TYPE_TYPE1 (t1), TRAIT_TYPE_TYPE1 (t2)) + if (!cp_tree_equal (TRAIT_TYPE_TYPE1 (t1), TRAIT_TYPE_TYPE1 (t2)) || !cp_tree_equal (TRAIT_TYPE_TYPE2 (t1), TRAIT_TYPE_TYPE2 (t2))) return false; break; diff --git a/gcc/testsuite/g++.dg/ext/type_pack_element1.C b/gcc/testsuite/g++.dg/ext/type_pack_element1.C new file mode 100644 index 0000000..4685855 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/type_pack_element1.C @@ -0,0 +1,19 @@ +// { dg-do compile { target c++11 } } + +using ty0 = __type_pack_element<0, int>; +using ty0 = __type_pack_element<0, int, char>; +using ty0 = int; + +using ty1 = __type_pack_element<1, int, char>; +using ty1 = __type_pack_element<(6 - 5) * 1, int, char>; +using ty1 = char; + +template +using __const_type_pack_element_t = const __type_pack_element; + +using ty2 = __const_type_pack_element_t<2, int, char, long>; +using ty2 = const long; + +template struct A { }; +using ty3 = __type_pack_element<3, int, int, int, A>; +using ty3 = A; diff --git a/gcc/testsuite/g++.dg/ext/type_pack_element2.C b/gcc/testsuite/g++.dg/ext/type_pack_element2.C new file mode 100644 index 0000000..1bf7753 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/type_pack_element2.C @@ -0,0 +1,14 @@ +// { dg-do compile { target c++11 } } + +int p; + +using type = __type_pack_element<&p, int>; // { dg-error "not an integral constant" } +using type = __type_pack_element<1, int>; // { dg-error "out of range" } +using type = __type_pack_element<2, int, char>; // { dg-error "out of range" } +using type = __type_pack_element<-1, int>; // { dg-error "negative" } + +template +using __type_pack_element_t = __type_pack_element; +// { dg-error "out of range" "" { target *-*-* } .-1 } + +using type = __type_pack_element_t<3, int, char, long>; // { dg-message "here" } diff --git a/gcc/testsuite/g++.dg/ext/type_pack_element3.C b/gcc/testsuite/g++.dg/ext/type_pack_element3.C new file mode 100644 index 0000000..269f84f --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/type_pack_element3.C @@ -0,0 +1,22 @@ +// { dg-do compile { target c++11 } } + +template> +constexpr int f(int) { return 1; } + +template +constexpr int f(...) { return 2; }; + +int p; + +static_assert(f(0) == 1, ""); +static_assert(f(0) == 1, ""); +static_assert(f(0) == 2, ""); +static_assert(f(0) == 2, ""); + +template struct A; +template struct A> { }; +template struct A; + +template struct B; +template struct B> { }; +template struct B; -- cgit v1.1 From ec9b30879890f70266dda7f9c853f48b22a9fd50 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 28 Feb 2023 05:38:12 -0800 Subject: gcc: xtensa: add data alignment properties to dynconfig gcc/ * config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New function. include/ * xtensa-dynconfig.h (xtensa_config_v4): New struct. (XCHAL_DATA_WIDTH, XCHAL_UNALIGNED_LOAD_EXCEPTION) (XCHAL_UNALIGNED_STORE_EXCEPTION, XCHAL_UNALIGNED_LOAD_HW) (XCHAL_UNALIGNED_STORE_HW, XTENSA_CONFIG_V4_ENTRY_LIST): New definitions. (XTENSA_CONFIG_INSTANCE_LIST): Add xtensa_config_v4 instance. (XTENSA_CONFIG_ENTRY_LIST): Add XTENSA_CONFIG_V4_ENTRY_LIST. --- gcc/config/xtensa/xtensa-dynconfig.cc | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'gcc') diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc b/gcc/config/xtensa/xtensa-dynconfig.cc index 9aea9f2..12dce4d 100644 --- a/gcc/config/xtensa/xtensa-dynconfig.cc +++ b/gcc/config/xtensa/xtensa-dynconfig.cc @@ -182,6 +182,24 @@ const struct xtensa_config_v3 *xtensa_get_config_v3 (void) return config; } +const struct xtensa_config_v4 *xtensa_get_config_v4 (void) +{ + static const struct xtensa_config_v4 *config; + static const struct xtensa_config_v4 def = { + 16, /* xchal_data_width */ + 1, /* xchal_unaligned_load_exception */ + 1, /* xchal_unaligned_store_exception */ + 0, /* xchal_unaligned_load_hw */ + 0, /* xchal_unaligned_store_hw */ + }; + + if (!config) + config = (const struct xtensa_config_v4 *) xtensa_load_config ("xtensa_config_v4", + &xtensa_config_v4, + &def); + return config; +} + const char * const *xtensa_get_config_strings (void) { static const char * const *config_strings; -- cgit v1.1 From 675b390e6d037c035ed3be9aca03de3b0f0549f4 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 28 Feb 2023 05:46:29 -0800 Subject: gcc: xtensa: add -m[no-]strict-align option gcc/ * config/xtensa/xtensa-opts.h: New header. * config/xtensa/xtensa.h (STRICT_ALIGNMENT): Redefine as xtensa_strict_align. * config/xtensa/xtensa.cc (xtensa_option_override): When -m[no-]strict-align is not specified in the command line set xtensa_strict_align to 0 if the hardware supports both unaligned loads and stores or to 1 otherwise. * config/xtensa/xtensa.opt (mstrict-align): New option. * doc/invoke.texi (Xtensa Options): Document -m[no-]strict-align. --- gcc/config/xtensa/xtensa-opts.h | 28 ++++++++++++++++++++++++++++ gcc/config/xtensa/xtensa.cc | 4 ++++ gcc/config/xtensa/xtensa.h | 2 +- gcc/config/xtensa/xtensa.opt | 7 +++++++ gcc/doc/invoke.texi | 14 +++++++++++++- 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 gcc/config/xtensa/xtensa-opts.h (limited to 'gcc') diff --git a/gcc/config/xtensa/xtensa-opts.h b/gcc/config/xtensa/xtensa-opts.h new file mode 100644 index 0000000..f0b8f5b --- /dev/null +++ b/gcc/config/xtensa/xtensa-opts.h @@ -0,0 +1,28 @@ +/* Definitions for option handling for Xtensa. + Copyright (C) 2023 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#ifndef XTENSA_OPTS_H +#define XTENSA_OPTS_H + +/* Undefined state for the -mstrict-alignment option */ +enum xtensa_strict_alignment_setting { + XTENSA_STRICT_ALIGNMENT_UNDEFINED = -1, +}; + +#endif diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc index 7287aa7..9e5d314 100644 --- a/gcc/config/xtensa/xtensa.cc +++ b/gcc/config/xtensa/xtensa.cc @@ -2792,6 +2792,10 @@ xtensa_option_override (void) if (xtensa_windowed_abi == -1) xtensa_windowed_abi = TARGET_WINDOWED_ABI_DEFAULT; + if (xtensa_strict_alignment == XTENSA_STRICT_ALIGNMENT_UNDEFINED) + xtensa_strict_alignment = !XCHAL_UNALIGNED_LOAD_HW + || !XCHAL_UNALIGNED_STORE_HW; + if (! TARGET_THREADPTR) targetm.have_tls = false; diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h index 8ebf37c..34e06af 100644 --- a/gcc/config/xtensa/xtensa.h +++ b/gcc/config/xtensa/xtensa.h @@ -143,7 +143,7 @@ along with GCC; see the file COPYING3. If not see /* Set this nonzero if move instructions will actually fail to work when given unaligned data. */ -#define STRICT_ALIGNMENT 1 +#define STRICT_ALIGNMENT (xtensa_strict_alignment) /* Promote integer modes smaller than a word to SImode. Set UNSIGNEDP for QImode, because there is no 8-bit load from memory with sign diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt index 3a129a4..f16b53b 100644 --- a/gcc/config/xtensa/xtensa.opt +++ b/gcc/config/xtensa/xtensa.opt @@ -18,6 +18,9 @@ ; along with GCC; see the file COPYING3. If not see ; . +HeaderInclude +config/xtensa/xtensa-opts.h + mconst16 Target Mask(CONST16) Use CONST16 instruction to load constants. @@ -64,3 +67,7 @@ Use call0 ABI. mabi=windowed Target RejectNegative Var(xtensa_windowed_abi, 1) Use windowed registers ABI. + +mstrict-align +Target Var(xtensa_strict_alignment) Init(XTENSA_STRICT_ALIGNMENT_UNDEFINED) +Do not use unaligned memory references. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 57fb170..54dcccb 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1474,7 +1474,8 @@ See RS/6000 and PowerPC Options. -mtarget-align -mno-target-align -mlongcalls -mno-longcalls -mabi=@var{abi-type} --mextra-l32r-costs=@var{cycles}} +-mextra-l32r-costs=@var{cycles} +-mstrict-align -mno-strict-align} @emph{zSeries Options} See S/390 and zSeries Options. @@ -34401,6 +34402,17 @@ instructions, in clock cycles. This affects, when optimizing for speed, whether loading a constant from literal pool using @code{L32R} or synthesizing the constant from a small one with a couple of arithmetic instructions. The default value is 0. + +@opindex mstrict-align +@opindex mno-strict-align +@item -mstrict-align +@itemx -mno-strict-align +Avoid or allow generating memory accesses that may not be aligned on a natural +object boundary as described in the architecture specification. +The default is @option{-mno-strict-align} for cores that support both +unaligned loads and stores in hardware and @option{-mstrict-align} for all +other cores. + @end table @node zSeries Options -- cgit v1.1 From cf0d9dbc091af3ea28432dd7fff08a6e6c6659d8 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 20 Apr 2023 00:17:12 +0000 Subject: Daily bump. --- gcc/ChangeLog | 314 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/cp/ChangeLog | 33 +++++ gcc/testsuite/ChangeLog | 84 +++++++++++++ 4 files changed, 432 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index dac0d00..58c9a91 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,317 @@ +2023-04-19 Max Filippov + + * config/xtensa/xtensa-opts.h: New header. + * config/xtensa/xtensa.h (STRICT_ALIGNMENT): Redefine as + xtensa_strict_align. + * config/xtensa/xtensa.cc (xtensa_option_override): When + -m[no-]strict-align is not specified in the command line set + xtensa_strict_align to 0 if the hardware supports both unaligned + loads and stores or to 1 otherwise. + * config/xtensa/xtensa.opt (mstrict-align): New option. + * doc/invoke.texi (Xtensa Options): Document -m[no-]strict-align. + +2023-04-19 Max Filippov + + * config/xtensa/xtensa-dynconfig.cc (xtensa_get_config_v4): New + function. + +2023-04-19 Andrew Pinski + + * config/i386/i386.md (*movsicc_noc_zext_1): New pattern. + +2023-04-19 Juzhe-Zhong + + * config/riscv/riscv-modes.def (FLOAT_MODE): Add chunk 128 support. + (VECTOR_BOOL_MODE): Ditto. + (ADJUST_NUNITS): Ditto. + (ADJUST_ALIGNMENT): Ditto. + (ADJUST_BYTESIZE): Ditto. + (ADJUST_PRECISION): Ditto. + (RVV_MODES): Ditto. + (VECTOR_MODE_WITH_PREFIX): Ditto. + * config/riscv/riscv-v.cc (ENTRY): Ditto. + (get_vlmul): Ditto. + (get_ratio): Ditto. + * config/riscv/riscv-vector-builtins.cc (DEF_RVV_TYPE): Ditto. + * config/riscv/riscv-vector-builtins.def (DEF_RVV_TYPE): Ditto. + (vbool64_t): Ditto. + (vbool32_t): Ditto. + (vbool16_t): Ditto. + (vbool8_t): Ditto. + (vbool4_t): Ditto. + (vbool2_t): Ditto. + (vbool1_t): Ditto. + (vint8mf8_t): Ditto. + (vuint8mf8_t): Ditto. + (vint8mf4_t): Ditto. + (vuint8mf4_t): Ditto. + (vint8mf2_t): Ditto. + (vuint8mf2_t): Ditto. + (vint8m1_t): Ditto. + (vuint8m1_t): Ditto. + (vint8m2_t): Ditto. + (vuint8m2_t): Ditto. + (vint8m4_t): Ditto. + (vuint8m4_t): Ditto. + (vint8m8_t): Ditto. + (vuint8m8_t): Ditto. + (vint16mf4_t): Ditto. + (vuint16mf4_t): Ditto. + (vint16mf2_t): Ditto. + (vuint16mf2_t): Ditto. + (vint16m1_t): Ditto. + (vuint16m1_t): Ditto. + (vint16m2_t): Ditto. + (vuint16m2_t): Ditto. + (vint16m4_t): Ditto. + (vuint16m4_t): Ditto. + (vint16m8_t): Ditto. + (vuint16m8_t): Ditto. + (vint32mf2_t): Ditto. + (vuint32mf2_t): Ditto. + (vint32m1_t): Ditto. + (vuint32m1_t): Ditto. + (vint32m2_t): Ditto. + (vuint32m2_t): Ditto. + (vint32m4_t): Ditto. + (vuint32m4_t): Ditto. + (vint32m8_t): Ditto. + (vuint32m8_t): Ditto. + (vint64m1_t): Ditto. + (vuint64m1_t): Ditto. + (vint64m2_t): Ditto. + (vuint64m2_t): Ditto. + (vint64m4_t): Ditto. + (vuint64m4_t): Ditto. + (vint64m8_t): Ditto. + (vuint64m8_t): Ditto. + (vfloat32mf2_t): Ditto. + (vfloat32m1_t): Ditto. + (vfloat32m2_t): Ditto. + (vfloat32m4_t): Ditto. + (vfloat32m8_t): Ditto. + (vfloat64m1_t): Ditto. + (vfloat64m2_t): Ditto. + (vfloat64m4_t): Ditto. + (vfloat64m8_t): Ditto. + * config/riscv/riscv-vector-switch.def (ENTRY): Ditto. + * config/riscv/riscv.cc (riscv_legitimize_poly_move): Ditto. + (riscv_convert_vector_bits): Ditto. + * config/riscv/riscv.md: + * config/riscv/vector-iterators.md: + * config/riscv/vector.md + (@pred_indexed_store): Ditto. + (@pred_indexed_store): Ditto. + (@pred_indexed_store): Ditto. + (@pred_indexed_store): Ditto. + (@pred_indexed_store): Ditto. + (@pred_reduc_): Ditto. + (@pred_widen_reduc_plus): Ditto. + (@pred_reduc_plus): Ditto. + (@pred_widen_reduc_plus): Ditto. + +2023-04-19 Pan Li + + * simplify-rtx.cc (simplify_context::simplify_binary_operation_1): + Align IOR (A | (~A) -> -1) optimization MODE_CLASS condition to AND. + +2023-04-19 Uros Bizjak + + PR target/78904 + PR target/78952 + * config/i386/i386.md (*cmpqi_ext_1_mem_rex64): New insn pattern. + (*cmpqi_ext_1): Use nonimmediate_operand predicate + for operand 0. Use any_extract code iterator. + (*cmpqi_ext_1 peephole2): New peephole2 pattern. + (*cmpqi_ext_2): Use any_extract code iterator. + (*cmpqi_ext_3_mem_rex64): New insn pattern. + (*cmpqi_ext_1): Use general_operand predicate + for operand 1. Use any_extract code iterator. + (*cmpqi_ext_3 peephole2): New peephole2 pattern. + (*cmpqi_ext_4): Use any_extract code iterator. + +2023-04-19 Kyrylo Tkachov + + * config/aarch64/aarch64-simd.md (aarch64_saddw2): Delete. + (aarch64_uaddw2): Delete. + (aarch64_ssubw2): Delete. + (aarch64_usubw2): Delete. + (aarch64_w2): New define_expand. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (do_ds_constraint): Use + solve_add_graph_edge. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (solve_add_graph_edge): New function, + split out from ... + (do_sd_constraint): ... here. + +2023-04-19 Richard Biener + + * tree-cfg.cc (gimple_can_merge_blocks_p): Remove condition + rejecting the merge when A contains only a non-local label. + +2023-04-19 Uros Bizjak + + * rtl.h (VIRTUAL_REGISTER_P): New predicate. + (VIRTUAL_REGISTER_NUM_P): Ditto. + (REGNO_PTR_FRAME_P): Use VIRTUAL_REGISTER_NUM_P predicate. + * expr.cc (force_operand): Use VIRTUAL_REGISTER_P predicate. + * function.cc (instantiate_decl_rtl): Ditto. + * rtlanal.cc (rtx_addr_can_trap_p_1): Ditto. + (nonzero_address_p): Ditto. + (refers_to_regno_p): Use VIRTUAL_REGISTER_NUM_P predicate. + +2023-04-19 Aldy Hernandez + + * value-range.h (Value_Range::Value_Range): Avoid pointer sharing. + +2023-04-19 Richard Biener + + * system.h (auto_mpz::operator->()): New. + * realmpfr.h (auto_mpfr::operator->()): New. + * builtins.cc (do_mpfr_lgamma_r): Use auto_mpfr. + * real.cc (real_from_string): Likewise. + (dconst_e_ptr): Likewise. + (dconst_sqrt2_ptr): Likewise. + * tree-ssa-loop-niter.cc (refine_value_range_using_guard): + Use auto_mpz. + (bound_difference_of_offsetted_base): Likewise. + (number_of_iterations_ne): Likewise. + (number_of_iterations_lt_to_ne): Likewise. + * ubsan.cc: Include realmpfr.h. + (ubsan_instrument_float_cast): Use auto_mpfr. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (solve_graph): Remove self-copy + edges, remove edges from escaped after special-casing them. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (do_sd_constraint): Fixup escape + special casing. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (do_sd_constraint): Do not write + to the LHS varinfo solution member. + +2023-04-19 Richard Biener + + * tree-ssa-structalias.cc (topo_visit): Look at the real + destination of edges. + +2023-04-19 Richard Biener + + PR tree-optimization/44794 + * tree-ssa-loop-manip.cc (tree_transform_and_unroll_loop): + If an epilogue loop is required set its iteration upper bound. + +2023-04-19 Xi Ruoyao + + PR target/109465 + * config/loongarch/loongarch-protos.h + (loongarch_expand_block_move): Add a parameter as alignment RTX. + * config/loongarch/loongarch.h: + (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER): Remove. + (LARCH_MAX_MOVE_BYTES_STRAIGHT): Remove. + (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER): Define. + (LARCH_MAX_MOVE_OPS_STRAIGHT): Define. + (MOVE_RATIO): Use LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of + LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER. + * config/loongarch/loongarch.cc (loongarch_expand_block_move): + Take the alignment from the parameter, but set it to + UNITS_PER_WORD if !TARGET_STRICT_ALIGN. Limit the length of + straight-line implementation with LARCH_MAX_MOVE_OPS_STRAIGHT + instead of LARCH_MAX_MOVE_BYTES_STRAIGHT. + (loongarch_block_move_straight): When there are left-over bytes, + half the mode size instead of falling back to byte mode at once. + (loongarch_block_move_loop): Limit the length of loop body with + LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of + LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER. + * config/loongarch/loongarch.md (cpymemsi): Pass the alignment + to loongarch_expand_block_move. + +2023-04-19 Xi Ruoyao + + * config/loongarch/loongarch.cc + (loongarch_setup_incoming_varargs): Don't save more GARs than + cfun->va_list_gpr_size / UNITS_PER_WORD. + +2023-04-19 Richard Biener + + * tree-ssa-loop-manip.cc (determine_exit_conditions): Fix + no epilogue condition. + +2023-04-19 Richard Biener + + * gimple.h (gimple_assign_load): Outline... + * gimple.cc (gimple_assign_load): ... here. Avoid + get_base_address and instead just strip the outermost + handled component, treating a remaining handled component + as load. + +2023-04-19 Kyrylo Tkachov + + * config/aarch64/aarch64-simd-builtins.def (neg): Delete builtins + definition. + * config/aarch64/arm_fp16.h (vnegh_f16): Reimplement using normal negation. + +2023-04-19 Jakub Jelinek + + PR tree-optimization/109011 + * tree-vect-patterns.cc (vect_recog_popcount_pattern): Rename to ... + (vect_recog_popcount_clz_ctz_ffs_pattern): ... this. Handle also + CLZ, CTZ and FFS. Remove vargs variable, use + gimple_build_call_internal rather than gimple_build_call_internal_vec. + (vect_vect_recog_func_ptrs): Adjust popcount entry. + +2023-04-19 Jakub Jelinek + + PR target/109040 + * dse.cc (replace_read): If read_reg is a SUBREG of a word mode + REG, for WORD_REGISTER_OPERATIONS copy SUBREG_REG of it into + a new REG rather than the SUBREG. + +2023-04-19 Prathamesh Kulkarni + + * config/aarch64/aarch64-simd.md (aarch64_simd_vec_set_zero): + New pattern. + +2023-04-19 Kyrylo Tkachov + + PR target/108840 + * config/aarch64/aarch64.cc (aarch64_rtx_costs): Merge ASHIFT and + ROTATE, ROTATERT, LSHIFTRT, ASHIFTRT cases. Handle subregs in op1. + +2023-04-19 Richard Biener + + PR rtl-optimization/109237 + * cse.cc (insn_live_p): Remove NEXT_INSN walk, instead check + TREE_VISITED on INSN_VAR_LOCATION_DECL. + (delete_trivially_dead_insns): Maintain TREE_VISITED on + active debug bind INSN_VAR_LOCATION_DECL. + +2023-04-19 Richard Biener + + PR rtl-optimization/109237 + * cfgcleanup.cc (bb_is_just_return): Walk insns backwards. + +2023-04-19 Christophe Lyon + + * doc/install.texi (enable-decimal-float): Add AArch64. + +2023-04-19 liuhongt + + PR rtl-optimization/109351 + * ira.cc (setup_class_subset_and_memory_move_costs): Check + hard_regno_mode_ok before setting lowest memory move cost for + the mode with different reg classes. + 2023-04-18 Jason Merrill * doc/invoke.texi: Remove stray @gol. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 59726e5..758629b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20230419 +20230420 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index d6a5b88..ee08b14 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,36 @@ +2023-04-19 Patrick Palka + Jonathan Wakely + + PR c++/100157 + * cp-trait.def (TYPE_PACK_ELEMENT): Define. + * cp-tree.h (finish_trait_type): Add complain parameter. + * cxx-pretty-print.cc (pp_cxx_trait): Handle + CPTK_TYPE_PACK_ELEMENT. + * parser.cc (cp_parser_constant_expression): Document default + arguments. + (cp_parser_trait): Handle CPTK_TYPE_PACK_ELEMENT. Pass + tf_warning_or_error to finish_trait_type. + * pt.cc (tsubst) : Handle non-type first + argument. Pass complain to finish_trait_type. + * semantics.cc (finish_type_pack_element): Define. + (finish_trait_type): Add complain parameter. Handle + CPTK_TYPE_PACK_ELEMENT. + * tree.cc (strip_typedefs): Handle non-type first argument. + Pass tf_warning_or_error to finish_trait_type. + * typeck.cc (structural_comptypes) : Use + cp_tree_equal instead of same_type_p for the first argument. + +2023-04-19 Patrick Palka + + PR c++/109556 + * pt.cc (try_class_unification): Don't ggc_free the copy of + 'targs'. + +2023-04-19 Jason Merrill + + PR c++/108099 + * decl.cc (grokdeclarator): Keep typedef_decl for __int128_t. + 2023-04-17 Patrick Palka PR c++/109531 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 84c6c5a..4fa4c51 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,87 @@ +2023-04-19 Patrick Palka + Jonathan Wakely + + PR c++/100157 + * g++.dg/ext/type_pack_element1.C: New test. + * g++.dg/ext/type_pack_element2.C: New test. + * g++.dg/ext/type_pack_element3.C: New test. + +2023-04-19 Patrick Palka + + PR c++/109556 + * g++.dg/cpp2a/concepts-placeholder13.C: New test. + +2023-04-19 Harald Anlauf + + PR testsuite/83904 + PR fortran/100297 + * gfortran.dg/allocatable_function_1.f90: Use "__builtin_free " + instead of the naive "free". + * gfortran.dg/reshape_8.f90: Extend pattern from a simple "data". + +2023-04-19 Andrew Pinski + + * gcc.target/i386/cmov10.c: New test. + * gcc.target/i386/cmov11.c: New test. + +2023-04-19 Jason Merrill + + PR c++/108099 + * g++.dg/ext/int128-8.C: New test. + +2023-04-19 Juzhe-Zhong + + * gcc.target/riscv/rvv/base/pr108185-4.c: Adapt testcase. + * gcc.target/riscv/rvv/base/spill-1.c: Ditto. + * gcc.target/riscv/rvv/base/spill-11.c: Ditto. + * gcc.target/riscv/rvv/base/spill-2.c: Ditto. + * gcc.target/riscv/rvv/base/spill-3.c: Ditto. + * gcc.target/riscv/rvv/base/spill-5.c: Ditto. + * gcc.target/riscv/rvv/base/spill-9.c: Ditto. + +2023-04-19 Pan Li + + * gcc.target/riscv/rvv/base/mask_insn_shortcut.c: Update check + condition. + * gcc.target/riscv/simplify_ior_optimization.c: New test. + +2023-04-19 Uros Bizjak + + PR target/78904 + PR target/78952 + * gcc.target/i386/pr78952-3.c: New test. + +2023-04-19 Xi Ruoyao + + PR target/109465 + * gcc.target/loongarch/pr109465-1.c: New test. + * gcc.target/loongarch/pr109465-2.c: New test. + * gcc.target/loongarch/pr109465-3.c: New test. + +2023-04-19 Xi Ruoyao + + * gcc.target/loongarch/va_arg.c: New test. + +2023-04-19 Jakub Jelinek + + PR tree-optimization/109011 + * gcc.dg/vect/pr109011-1.c: New test. + +2023-04-19 Prathamesh Kulkarni + + * gcc.target/aarch64/vec-set-zero.c: New test. + +2023-04-19 Kyrylo Tkachov + + PR target/108840 + * gcc.target/aarch64/pr108840.c: New test. + +2023-04-19 Jakub Jelinek + + PR tree-optimization/109524 + * g++.dg/pr109524.C (nn::nn): Change argument type from nn & to + const nn &. + 2023-04-18 Uros Bizjak PR target/94908 -- cgit v1.1 From c2dac2e5fbbcdda013aa7b0609d579abec8120ec Mon Sep 17 00:00:00 2001 From: "Hu, Lin1" Date: Mon, 16 Jan 2023 11:23:09 +0800 Subject: Optimize vshuf{i,f}{32x4,64x2} ymm and vperm{i,f}128 ymm vshuf{i,f}{32x4,64x2} ymm and vperm{i,f}128 ymm are 3 clk. We can optimze them to vblend, vmovaps when there's no cross-lane. gcc/ChangeLog: * config/i386/sse.md: Modify insn vperm{i,f} and vshuf{i,f}. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512vl-vshuff32x4-1.c: Modify test. * gcc.target/i386/avx512vl-vshuff64x2-1.c: Ditto. * gcc.target/i386/avx512vl-vshufi32x4-1.c: Ditto. * gcc.target/i386/avx512vl-vshufi64x2-1.c: Ditto. * gcc.target/i386/opt-vperm-vshuf-1.c: New test. * gcc.target/i386/opt-vperm-vshuf-2.c: Ditto. * gcc.target/i386/opt-vperm-vshuf-3.c: Ditto. --- gcc/config/i386/sse.md | 36 ++++++++++-- .../gcc.target/i386/avx512vl-vshuff32x4-1.c | 2 +- .../gcc.target/i386/avx512vl-vshuff64x2-1.c | 2 +- .../gcc.target/i386/avx512vl-vshufi32x4-1.c | 2 +- .../gcc.target/i386/avx512vl-vshufi64x2-1.c | 2 +- gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-1.c | 51 ++++++++++++++++ gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-2.c | 68 ++++++++++++++++++++++ gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-3.c | 63 ++++++++++++++++++++ 8 files changed, 218 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-1.c create mode 100644 gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-2.c create mode 100644 gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-3.c (limited to 'gcc') diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5dca8dd..b0d9c02 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -18438,6 +18438,8 @@ mask = INTVAL (operands[3]) / 2; mask |= (INTVAL (operands[5]) - 4) / 2 << 1; operands[3] = GEN_INT (mask); + if (INTVAL (operands[3]) == 2 && !) + return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}"; return "vshuf64x2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") @@ -18596,6 +18598,9 @@ mask |= (INTVAL (operands[7]) - 8) / 4 << 1; operands[3] = GEN_INT (mask); + if (INTVAL (operands[3]) == 2 && !) + return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}"; + return "vshuf32x4\t{%3, %2, %1, %0|%0, %1, %2, %3}"; } [(set_attr "type" "sselog") @@ -25664,7 +25669,28 @@ (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_VPERMTI))] "TARGET_AVX2" - "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}" + { + int mask = INTVAL (operands[3]); + if ((mask & 0xbb) == 16) + { + if (rtx_equal_p (operands[0], operands[1])) + return ""; + else + return "vmovaps\t{%1, %0|%0, %1}"; + } + if ((mask & 0xbb) == 50) + { + if (rtx_equal_p (operands[0], operands[2])) + return ""; + else + return "vmovaps\t{%2, %0|%0, %2}"; + } + if ((mask & 0xbb) == 18) + return "vblendps\t{$15, %2, %1, %0|%0, %1, %2, 15}"; + if ((mask & 0xbb) == 48) + return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}"; + return "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + } [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) @@ -26227,9 +26253,11 @@ && avx_vperm2f128_parallel (operands[3], mode)" { int mask = avx_vperm2f128_parallel (operands[3], mode) - 1; - if (mask == 0x12) - return "vinsert\t{$0, %x2, %1, %0|%0, %1, %x2, 0}"; - if (mask == 0x20) + if ((mask & 0xbb) == 0x12) + return "vblendps\t{$15, %2, %1, %0|%0, %1, %2, 15}"; + if ((mask & 0xbb) == 0x30) + return "vblendps\t{$240, %2, %1, %0|%0, %1, %2, 240}"; + if ((mask & 0xbb) == 0x20) return "vinsert\t{$1, %x2, %1, %0|%0, %1, %x2, 1}"; operands[3] = GEN_INT (mask); return "vperm2\t{%3, %2, %1, %0|%0, %1, %2, %3}"; diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c index 6c2fb2f1..02aecf4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff32x4-1.c @@ -12,7 +12,7 @@ volatile __mmask8 m; void extern avx512vl_test (void) { - x = _mm256_shuffle_f32x4 (x, x, 2); + x = _mm256_shuffle_f32x4 (x, x, 3); x = _mm256_mask_shuffle_f32x4 (x, m, x, x, 2); x = _mm256_maskz_shuffle_f32x4 (m, x, x, 2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c index 1191b40..563ded5 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshuff64x2-1.c @@ -12,7 +12,7 @@ volatile __mmask8 m; void extern avx512vl_test (void) { - x = _mm256_shuffle_f64x2 (x, x, 2); + x = _mm256_shuffle_f64x2 (x, x, 3); x = _mm256_mask_shuffle_f64x2 (x, m, x, x, 2); x = _mm256_maskz_shuffle_f64x2 (m, x, x, 2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c index ef9a441..e89c414 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi32x4-1.c @@ -12,7 +12,7 @@ volatile __mmask8 m; void extern avx512vl_test (void) { - x = _mm256_shuffle_i32x4 (x, x, 2); + x = _mm256_shuffle_i32x4 (x, x, 3); x = _mm256_mask_shuffle_i32x4 (x, m, x, x, 2); x = _mm256_maskz_shuffle_i32x4 (m, x, x, 2); } diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c index 0bd117e..8e8e47e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vshufi64x2-1.c @@ -12,7 +12,7 @@ volatile __mmask8 m; void extern avx512vl_test (void) { - x = _mm256_shuffle_i64x2 (x, x, 2); + x = _mm256_shuffle_i64x2 (x, x, 3); x = _mm256_mask_shuffle_i64x2 (x, m, x, x, 2); x = _mm256_maskz_shuffle_i64x2 (m, x, x, 2); } diff --git a/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-1.c b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-1.c new file mode 100644 index 0000000..1ee00b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=sapphirerapids" } */ +/* { dg-final { scan-assembler-times "vmovaps" 1 } } */ +/* { dg-final { scan-assembler-times "vblendps\t\\\$15" 1 } } */ +/* { dg-final { scan-assembler-times "vblendps\t\\\$240" 5 } } */ + +#include + +/* Vpermi128/Vpermf128 */ +__m256i +perm0 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 50); +} + +__m256i +perm1 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 18); +} + +__m256i +perm2 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 48); +} + +/* vshuf{i,f}{32x4,64x2} ymm .*/ +__m256i +shuff0 (__m256i a, __m256i b) +{ + return _mm256_shuffle_i32x4(a, b, 2); +} + +__m256 +shuff1 (__m256 a, __m256 b) +{ + return _mm256_shuffle_f32x4(a, b, 2); +} + +__m256i +shuff2 (__m256i a, __m256i b) +{ + return _mm256_shuffle_i64x2(a, b, 2); +} + +__m256d +shuff3 (__m256d a, __m256d b) +{ + return _mm256_shuffle_f64x2(a, b, 2); +} diff --git a/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-2.c b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-2.c new file mode 100644 index 0000000..9775072 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-2.c @@ -0,0 +1,68 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=sapphirerapids" } */ +/* { dg-final { scan-assembler-not "vmovaps" } } */ +/* { dg-final { scan-assembler-not "vblendps" } } */ +/* { dg-final { scan-assembler-not "vperm2i128" } } */ +/* { dg-final { scan-assembler-not "vperm2f128" } } */ + +#include + +__m256i +perm0 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 16); +} + +__m256d +perm1 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 16); +} + +__m256 +perm2 (__m256 a, __m256 b) +{ + return _mm256_permute2f128_ps (a, b, 16); +} + +__m256i +perm3 (__m256i a, __m256i b) +{ + return _mm256_permute2f128_si256 (a, b, 16); +} + +__m256i +perm4 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 20); +} + +__m256d +perm5 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 20); +} + +__m256i +perm6 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 80); +} + +__m256d +perm7 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 80); +} + +__m256i +perm8 (__m256i a, __m256i b) +{ + return _mm256_permute2x128_si256 (a, b, 84); +} + +__m256d +perm9 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 84); +} diff --git a/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-3.c b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-3.c new file mode 100644 index 0000000..a330b14 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/opt-vperm-vshuf-3.c @@ -0,0 +1,63 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -march=sapphirerapids" } */ +/* { dg-final { scan-assembler-times "vmov..." 3 } } */ +/* { dg-final { scan-assembler-times "vblendps\t\\\$15" 3 } } */ +/* { dg-final { scan-assembler-times "vblendps\t\\\$240" 3 } } */ +/* { dg-final { scan-assembler-not "vperm2f128" } } */ + +#include + +/* Vpermf128 */ +__m256 +perm0 (__m256 a, __m256 b) +{ + return _mm256_permute2f128_ps (a, b, 50); +} + +__m256 +perm1 (__m256 a, __m256 b) +{ + return _mm256_permute2f128_ps (a, b, 18); +} + +__m256 +perm2 (__m256 a, __m256 b) +{ + return _mm256_permute2f128_ps (a, b, 48); +} + +__m256i +perm3 (__m256i a, __m256i b) +{ + return _mm256_permute2f128_si256 (a, b, 50); +} + +__m256i +perm4 (__m256i a, __m256i b) +{ + return _mm256_permute2f128_si256 (a, b, 18); +} + +__m256i +perm5 (__m256i a, __m256i b) +{ + return _mm256_permute2f128_si256 (a, b, 48); +} + +__m256d +perm6 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 50); +} + +__m256d +perm7 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 18); +} + +__m256d +perm8 (__m256d a, __m256d b) +{ + return _mm256_permute2f128_pd (a, b, 48); +} -- cgit v1.1 From fd7ecd8010c9967b79fa35d337d6fd27de303a72 Mon Sep 17 00:00:00 2001 From: "Mo, Zewei" Date: Tue, 10 Jan 2023 16:11:02 +0800 Subject: Re-arrange sections of i386 cpuid gcc/ChangeLog: * config/i386/cpuid.h: Open a new section for Extended Features Leaf (%eax == 7, %ecx == 0) and Extended Features Sub-leaf (%eax == 7, %ecx == 1). --- gcc/config/i386/cpuid.h | 61 ++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 29 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index e1d6d2f..4cc4461 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -24,15 +24,6 @@ #ifndef _CPUID_H_INCLUDED #define _CPUID_H_INCLUDED -/* %eax */ -#define bit_RAOINT (1 << 3) -#define bit_AVXVNNI (1 << 4) -#define bit_AVX512BF16 (1 << 5) -#define bit_CMPCCXADD (1 << 7) -#define bit_AMX_FP16 (1 << 21) -#define bit_HRESET (1 << 22) -#define bit_AVXIFMA (1 << 23) - /* %ecx */ #define bit_SSE3 (1 << 0) #define bit_PCLMUL (1 << 1) @@ -52,10 +43,7 @@ #define bit_RDRND (1 << 30) /* %edx */ -#define bit_AVXVNNIINT8 (1 << 4) -#define bit_AVXNECONVERT (1 << 5) #define bit_CMPXCHG8B (1 << 8) -#define bit_PREFETCHI (1 << 14) #define bit_CMOV (1 << 15) #define bit_MMX (1 << 23) #define bit_FXSAVE (1 << 24) @@ -84,19 +72,19 @@ #define bit_CLZERO (1 << 0) #define bit_WBNOINVD (1 << 9) -/* Extended Features (%eax == 7) */ +/* Extended Features Leaf (%eax == 7, %ecx == 0) */ /* %ebx */ #define bit_FSGSBASE (1 << 0) -#define bit_SGX (1 << 2) -#define bit_BMI (1 << 3) -#define bit_HLE (1 << 4) +#define bit_SGX (1 << 2) +#define bit_BMI (1 << 3) +#define bit_HLE (1 << 4) #define bit_AVX2 (1 << 5) #define bit_BMI2 (1 << 8) -#define bit_RTM (1 << 11) +#define bit_RTM (1 << 11) #define bit_AVX512F (1 << 16) #define bit_AVX512DQ (1 << 17) #define bit_RDSEED (1 << 18) -#define bit_ADX (1 << 19) +#define bit_ADX (1 << 19) #define bit_AVX512IFMA (1 << 21) #define bit_CLFLUSHOPT (1 << 23) #define bit_CLWB (1 << 24) @@ -108,40 +96,55 @@ #define bit_AVX512VL (1u << 31) /* %ecx */ -#define bit_PREFETCHWT1 (1 << 0) +#define bit_PREFETCHWT1 (1 << 0) #define bit_AVX512VBMI (1 << 1) -#define bit_PKU (1 << 3) +#define bit_PKU (1 << 3) #define bit_OSPKE (1 << 4) #define bit_WAITPKG (1 << 5) #define bit_AVX512VBMI2 (1 << 6) #define bit_SHSTK (1 << 7) #define bit_GFNI (1 << 8) #define bit_VAES (1 << 9) -#define bit_AVX512VNNI (1 << 11) #define bit_VPCLMULQDQ (1 << 10) +#define bit_AVX512VNNI (1 << 11) #define bit_AVX512BITALG (1 << 12) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) +#define bit_KL (1 << 23) +#define bit_CLDEMOTE (1 << 25) #define bit_MOVDIRI (1 << 27) #define bit_MOVDIR64B (1 << 28) #define bit_ENQCMD (1 << 29) -#define bit_CLDEMOTE (1 << 25) -#define bit_KL (1 << 23) /* %edx */ -#define bit_AVX5124VNNIW (1 << 2) -#define bit_AVX5124FMAPS (1 << 3) +#define bit_AVX5124VNNIW (1 << 2) +#define bit_AVX5124FMAPS (1 << 3) +#define bit_UINTR (1 << 5) #define bit_AVX512VP2INTERSECT (1 << 8) -#define bit_AVX512FP16 (1 << 23) -#define bit_IBT (1 << 20) -#define bit_UINTR (1 << 5) -#define bit_PCONFIG (1 << 18) #define bit_SERIALIZE (1 << 14) #define bit_TSXLDTRK (1 << 16) +#define bit_PCONFIG (1 << 18) +#define bit_IBT (1 << 20) #define bit_AMX_BF16 (1 << 22) +#define bit_AVX512FP16 (1 << 23) #define bit_AMX_TILE (1 << 24) #define bit_AMX_INT8 (1 << 25) + +/* Extended Features Sub-leaf (%eax == 7, %ecx == 1) */ +/* %eax */ +#define bit_RAOINT (1 << 3) +#define bit_AVXVNNI (1 << 4) +#define bit_AVX512BF16 (1 << 5) +#define bit_CMPCCXADD (1 << 7) #define bit_AMX_COMPLEX (1 << 8) +#define bit_AMX_FP16 (1 << 21) +#define bit_HRESET (1 << 22) +#define bit_AVXIFMA (1 << 23) + +/* %edx */ +#define bit_AVXVNNIINT8 (1 << 4) +#define bit_AVXNECONVERT (1 << 5) +#define bit_PREFETCHI (1 << 14) /* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */ #define bit_XSAVEOPT (1 << 0) -- cgit v1.1 From 5ebdbdb9cfa7a378bf655d92c9fc6c3f6eda8425 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 15 Dec 2022 11:10:16 +0800 Subject: i386: Use macro to wrap up share builtin exceptions in builtin isa check gcc/ChangeLog: * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): Correct wrong comments. Add a new macro SHARE_BUILTIN and refactor the current if clauses to macro. --- gcc/config/i386/i386-expand.cc | 72 ++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 48 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 9fa549c..f692ddc 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12588,6 +12588,7 @@ ix86_check_builtin_isa_match (unsigned int fcode, HOST_WIDE_INT isa2 = ix86_isa_flags2; HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; HOST_WIDE_INT bisa2 = ix86_builtins_isa[fcode].isa2; + HOST_WIDE_INT tmp_isa = isa, tmp_isa2 = isa2; /* The general case is we require all the ISAs specified in bisa{,2} to be enabled. The exceptions are: @@ -12596,60 +12597,35 @@ ix86_check_builtin_isa_match (unsigned int fcode, OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or OPTION_MASK_ISA2_AVXVNNI - (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512IFMA) or + (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL) or OPTION_MASK_ISA2_AVXIFMA - (OPTION_MASK_ISA_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16) or + (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA2_AVX512BF16) or OPTION_MASK_ISA2_AVXNECONVERT where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ - if (((bisa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) - == (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) - && (isa & (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A)) != 0) - isa |= (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A); - if (((bisa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) - == (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) - && (isa & (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32)) != 0) - isa |= (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32); - - if (((bisa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) - == (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) - && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) - isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); - - if ((((bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) - == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) - || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0) - && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) - == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) - || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)) - { - isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL; - isa2 |= OPTION_MASK_ISA2_AVXVNNI; - } - - if ((((bisa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) - == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) - || (bisa2 & OPTION_MASK_ISA2_AVXIFMA) != 0) - && (((isa & (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) - == (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL)) - || (isa2 & OPTION_MASK_ISA2_AVXIFMA) != 0)) - { - isa |= OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL; - isa2 |= OPTION_MASK_ISA2_AVXIFMA; - } - - if ((((bisa & OPTION_MASK_ISA_AVX512VL) != 0 - && (bisa2 & OPTION_MASK_ISA2_AVX512BF16) != 0) - && (bisa2 & OPTION_MASK_ISA2_AVXNECONVERT) != 0) - && (((isa & OPTION_MASK_ISA_AVX512VL) != 0 - && (isa2 & OPTION_MASK_ISA2_AVX512BF16) != 0) - || (isa2 & OPTION_MASK_ISA2_AVXNECONVERT) != 0)) - { - isa |= OPTION_MASK_ISA_AVX512VL; - isa2 |= OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16; - } +#define SHARE_BUILTIN(A1, A2, B1, B2) \ + if ((((bisa & (A1)) == (A1) && (bisa2 & (A2)) == (A2)) \ + && ((bisa & (B1)) == (B1) && (bisa2 & (B2)) == (B2))) \ + && (((isa & (A1)) == (A1) && (isa2 & (A2)) == (A2)) \ + || ((isa & (B1)) == (B1) && (isa2 & (B2)) == (B2)))) \ + { \ + tmp_isa |= (A1) | (B1); \ + tmp_isa2 |= (A2) | (B2); \ + } + + SHARE_BUILTIN (OPTION_MASK_ISA_SSE, 0, OPTION_MASK_ISA_3DNOW_A, 0); + SHARE_BUILTIN (OPTION_MASK_ISA_SSE4_2, 0, OPTION_MASK_ISA_CRC32, 0); + SHARE_BUILTIN (OPTION_MASK_ISA_FMA, 0, OPTION_MASK_ISA_FMA4, 0); + SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, 0, + OPTION_MASK_ISA2_AVXVNNI); + SHARE_BUILTIN (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, 0, + OPTION_MASK_ISA2_AVXIFMA); + SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, 0, + OPTION_MASK_ISA2_AVXNECONVERT); + isa = tmp_isa; + isa2 = tmp_isa2; if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE /* __builtin_ia32_maskmovq requires MMX registers. */ -- cgit v1.1 From d08b0559fdeec7e56566500e9d1356da73376249 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Thu, 5 Jan 2023 14:58:14 +0800 Subject: i386: Add AVX512BW dependency to AVX512BITALG Since some of the AVX512BITALG intrins use 32/64 bit mask, AVX512BW should be implied. gcc/ChangeLog: * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVX512BITALG_SET): Change OPTION_MASK_ISA_AVX512F_SET to OPTION_MASK_ISA_AVX512BW_SET. (OPTION_MASK_ISA_AVX512F_UNSET): Remove OPTION_MASK_ISA_AVX512BITALG_SET. (OPTION_MASK_ISA_AVX512BW_UNSET): Add OPTION_MASK_ISA_AVX512BITALG_SET. * config/i386/avx512bitalgintrin.h: Do not push avx512bw. * config/i386/i386-builtin.def: Remove redundant OPTION_MASK_ISA_AVX512BW. * config/i386/sse.md (VI1_AVX512VLBW): Removed. (avx512vl_vpshufbitqmb): Change the iterator from VI1_AVX512VLBW to VI1_AVX512VL. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bitalg-vpopcntb-1.c: Remove avx512bw. * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/pr93696-1.c: Ditto. * gcc.target/i386/pr93696-2.c: Ditto. --- gcc/common/config/i386/i386-common.cc | 8 ++--- gcc/config/i386/avx512bitalgintrin.h | 39 +++++----------------- gcc/config/i386/i386-builtin.def | 10 +++--- gcc/config/i386/sse.md | 8 ++--- .../gcc.target/i386/avx512bitalg-vpopcntb-1.c | 3 +- .../gcc.target/i386/avx512bitalg-vpopcntb.c | 2 +- .../gcc.target/i386/avx512bitalg-vpopcntbvl.c | 2 +- .../gcc.target/i386/avx512bitalg-vpopcntw-1.c | 3 +- .../gcc.target/i386/avx512bitalg-vpopcntw.c | 2 +- .../gcc.target/i386/avx512bitalg-vpopcntwvl.c | 2 +- .../gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c | 2 +- .../gcc.target/i386/avx512bitalg-vpshufbitqmb.c | 2 +- .../gcc.target/i386/avx512bitalgvl-vpopcntb-1.c | 3 +- .../gcc.target/i386/avx512bitalgvl-vpopcntw-1.c | 3 +- .../i386/avx512bitalgvl-vpshufbitqmb-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr93696-1.c | 2 +- gcc/testsuite/gcc.target/i386/pr93696-2.c | 2 +- 17 files changed, 32 insertions(+), 63 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index d90c558..f78fc0a 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -91,7 +91,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \ (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512BITALG_SET \ - (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET) + (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512BW_SET) #define OPTION_MASK_ISA2_AVX512BF16_SET OPTION_MASK_ISA2_AVX512BF16 #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW @@ -234,14 +234,14 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \ | OPTION_MASK_ISA_AVX512VBMI2_UNSET \ | OPTION_MASK_ISA_AVX512VNNI_UNSET \ - | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET \ - | OPTION_MASK_ISA_AVX512BITALG_UNSET) + | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET) #define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD #define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF #define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER #define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ #define OPTION_MASK_ISA_AVX512BW_UNSET \ - (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET) + (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET \ + | OPTION_MASK_ISA_AVX512BITALG_UNSET) #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL #define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA #define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h index aa6d652..a1c7be1 100644 --- a/gcc/config/i386/avx512bitalgintrin.h +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -48,17 +48,6 @@ _mm512_popcnt_epi16 (__m512i __A) return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A); } -#ifdef __DISABLE_AVX512BITALG__ -#undef __DISABLE_AVX512BITALG__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BITALG__ */ - -#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__) -#pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512bw") -#define __DISABLE_AVX512BITALGBW__ -#endif /* __AVX512VLBW__ */ - extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_popcnt_epi8 (__m512i __W, __mmask64 __U, __m512i __A) @@ -114,16 +103,16 @@ _mm512_mask_bitshuffle_epi64_mask (__mmask64 __M, __m512i __A, __m512i __B) (__mmask64) __M); } -#ifdef __DISABLE_AVX512BITALGBW__ -#undef __DISABLE_AVX512BITALGBW__ +#ifdef __DISABLE_AVX512BITALG__ +#undef __DISABLE_AVX512BITALG__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BITALGBW__ */ +#endif /* __DISABLE_AVX512BITALG__ */ -#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) #pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512vl,avx512bw") -#define __DISABLE_AVX512BITALGVLBW__ -#endif /* __AVX512VLBW__ */ +#pragma GCC target("avx512bitalg,avx512vl") +#define __DISABLE_AVX512BITALGVL__ +#endif /* __AVX512BITALGVL__ */ extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -162,18 +151,6 @@ _mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B) (__mmask32) __M); } -#ifdef __DISABLE_AVX512BITALGVLBW__ -#undef __DISABLE_AVX512BITALGVLBW__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512BITALGVLBW__ */ - - -#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) -#pragma GCC push_options -#pragma GCC target("avx512bitalg,avx512vl") -#define __DISABLE_AVX512BITALGVL__ -#endif /* __AVX512VLBW__ */ - extern __inline __mmask16 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B) @@ -278,6 +255,6 @@ _mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A) #ifdef __DISABLE_AVX512BITALGVL__ #undef __DISABLE_AVX512BITALGVL__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512BITALGBW__ */ +#endif /* __DISABLE_AVX512BITALGVL__ */ #endif /* _AVX512BITALGINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index f7cf105..4134183 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2762,21 +2762,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v /* BITALG */ BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) -BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI) -BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) -BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI) /* AVX512_4FMAPS and AVX512_4VNNIW builtins with variable number of arguments. Defined in additional ix86_isa_flags2. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b0d9c02..260dfa1 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -685,10 +685,6 @@ (define_mode_iterator VF4_128_8_256 [V4DF V4SF]) -(define_mode_iterator VI1_AVX512VLBW - [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL") - (V16QI "TARGET_AVX512VL")]) - (define_mode_attr avx512 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw") (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") @@ -28854,8 +28850,8 @@ (define_insn "avx512vl_vpshufbitqmb" [(set (match_operand: 0 "register_operand" "=k") (unspec: - [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v") - (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")] + [(match_operand:VI1_AVX512VL 1 "register_operand" "v") + (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_VPSHUFBIT))] "TARGET_AVX512BITALG" "vpshufbitqmb\t{%2, %1, %0|%0, %1, %2}" diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c index 697757b..93afe13 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c @@ -1,7 +1,6 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512bitalg" } */ /* { dg-require-effective-target avx512bitalg } */ -/* { dg-require-effective-target avx512bw } */ #define AVX512BITALG #define SIZE (AVX512F_LEN / 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c index 246f925..44b82c0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512bitalg" } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c index 8c7f45f..8c2dfab 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512vl" } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c index 0a725fe..93e2be2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c @@ -1,7 +1,6 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512bitalg" } */ /* { dg-require-effective-target avx512bitalg } */ -/* { dg-require-effective-target avx512bw } */ #define AVX512BITALG #define SIZE (AVX512F_LEN / 16) diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c index 90663f4..2ef8589 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512bitalg" } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c index 3a646b5..c976461 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512vl" } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c index 668064a..5e88148 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512f -mavx512bw" } */ +/* { dg-options "-O2 -mavx512bitalg" } */ /* { dg-require-effective-target avx512bitalg } */ #define AVX512BITALG diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c index 7acb0c2..75fbef8 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512bitalg -mavx512vl -mavx512bw -O2" } */ +/* { dg-options "-mavx512bitalg -mavx512vl -O2" } */ /* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*%k\[0-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c index 607ec3f..a4e9d63 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c @@ -1,8 +1,7 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512bitalg } */ -/* { dg-require-effective-target avx512bw } */ #define AVX512VL #define AVX512F_LEN 256 diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c index 3d7e2b5..55fa811 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c @@ -1,8 +1,7 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512bitalg } */ -/* { dg-require-effective-target avx512bw } */ #define AVX512VL #define AVX512F_LEN 256 diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c index 76598c4..497e369 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bitalg -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512bitalg } */ diff --git a/gcc/testsuite/gcc.target/i386/pr93696-1.c b/gcc/testsuite/gcc.target/i386/pr93696-1.c index 128bb98..70f0f8a 100644 --- a/gcc/testsuite/gcc.target/i386/pr93696-1.c +++ b/gcc/testsuite/gcc.target/i386/pr93696-1.c @@ -1,6 +1,6 @@ /* PR target/93696 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -mavx512bw -masm=att" } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -masm=att" } */ /* { dg-final { scan-assembler-times "vpopcnt\[bwdq]\t%\[xyz]mm1, %\[xyz]mm0\{%k\[0-7]\}\[^\{]" 12 } } */ /* { dg-final { scan-assembler-not "vmovdq\[au]\[0-9]" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr93696-2.c b/gcc/testsuite/gcc.target/i386/pr93696-2.c index 25a298a..e6aabd4 100644 --- a/gcc/testsuite/gcc.target/i386/pr93696-2.c +++ b/gcc/testsuite/gcc.target/i386/pr93696-2.c @@ -1,6 +1,6 @@ /* PR target/93696 */ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -mavx512bw -masm=att" } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512vpopcntdq -mavx512vl -masm=att" } */ /* { dg-final { scan-assembler-times "vpopcnt\[bwdq]\t%\[xyz]mm1, %\[xyz]mm0\{%k\[0-7]\}\{z\}" 12 } } */ /* { dg-final { scan-assembler-not "vmovdq\[au]\[0-9]" } } */ -- cgit v1.1 From 4fb12ae93ddf6dea9a30041cecc94911d7863556 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Mon, 9 Jan 2023 16:41:17 +0800 Subject: i386: Add AVX512BW dependency to AVX512VBMI2 gcc/ChangeLog: * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVX512VBMI2_SET): Change OPTION_MASK_ISA_AVX512F_SET to OPTION_MASK_ISA_AVX512BW_SET. (OPTION_MASK_ISA_AVX512F_UNSET): Remove OPTION_MASK_ISA_AVX512VBMI2_UNSET. (OPTION_MASK_ISA_AVX512BW_UNSET): Add OPTION_MASK_ISA_AVX512VBMI2_UNSET. * config/i386/avx512vbmi2intrin.h: Do not push avx512bw. * config/i386/avx512vbmi2vlintrin.h: Ditto. * config/i386/i386-builtin.def: Remove OPTION_MASK_ISA_AVX512BW. * config/i386/sse.md (VI12_AVX512VLBW): Removed. (VI12_VI48F_AVX512VLBW): Rename to VI12_VI48F_AVX512VL. (compress_mask): Change iterator from VI12_AVX512VLBW to VI12_AVX512VL. (compressstore_mask): Ditto. (expand_mask): Ditto. (expand_maskz): Ditto. (*expand_mask): Change iterator from VI12_VI48F_AVX512VLBW to VI12_VI48F_AVX512VL. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512bw-pr100267-1.c: Remove avx512f and avx512bw. * gcc.target/i386/avx512bw-pr100267-b-2.c: Ditto. * gcc.target/i386/avx512bw-pr100267-d-2.c: Ditto. * gcc.target/i386/avx512bw-pr100267-q-2.c: Ditto. * gcc.target/i386/avx512bw-pr100267-w-2.c: Ditto. * gcc.target/i386/avx512f-vpcompressb-1.c: Ditto. * gcc.target/i386/avx512f-vpcompressb-2.c: Ditto. * gcc.target/i386/avx512f-vpcompressw-1.c: Ditto. * gcc.target/i386/avx512f-vpcompressw-2.c: Ditto. * gcc.target/i386/avx512f-vpexpandb-1.c: Ditto. * gcc.target/i386/avx512f-vpexpandb-2.c: Ditto. * gcc.target/i386/avx512f-vpexpandw-1.c: Ditto. * gcc.target/i386/avx512f-vpexpandw-2.c: Ditto. * gcc.target/i386/avx512f-vpshld-1.c: Ditto. * gcc.target/i386/avx512f-vpshldd-2.c: Ditto. * gcc.target/i386/avx512f-vpshldq-2.c: Ditto. * gcc.target/i386/avx512f-vpshldv-1.c: Ditto. * gcc.target/i386/avx512f-vpshldvd-2.c: Ditto. * gcc.target/i386/avx512f-vpshldvq-2.c: Ditto. * gcc.target/i386/avx512f-vpshldvw-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdd-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdq-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdv-1.c: Ditto. * gcc.target/i386/avx512f-vpshrdvd-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdvq-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdvw-2.c: Ditto. * gcc.target/i386/avx512f-vpshrdw-2.c: Ditto. * gcc.target/i386/avx512vbmi2-vpshld-1.c: Ditto. * gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto. * gcc.target/i386/avx512vl-vpcompressb-1.c: Ditto. * gcc.target/i386/avx512vl-vpcompressb-2.c: Ditto. * gcc.target/i386/avx512vl-vpcompressw-2.c: Ditto. * gcc.target/i386/avx512vl-vpexpandb-1.c: Ditto. * gcc.target/i386/avx512vl-vpexpandb-2.c: Ditto. * gcc.target/i386/avx512vl-vpexpandw-1.c: Ditto. * gcc.target/i386/avx512vl-vpexpandw-2.c: Ditto. * gcc.target/i386/avx512vl-vpshldd-2.c: Ditto. * gcc.target/i386/avx512vl-vpshldq-2.c: Ditto. * gcc.target/i386/avx512vl-vpshldv-1.c: Ditto. * gcc.target/i386/avx512vl-vpshldvd-2.c: Ditto. * gcc.target/i386/avx512vl-vpshldvq-2.c: Ditto. * gcc.target/i386/avx512vl-vpshldvw-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdd-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdq-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdv-1.c: Ditto. * gcc.target/i386/avx512vl-vpshrdvd-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdvq-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdvw-2.c: Ditto. * gcc.target/i386/avx512vl-vpshrdw-2.c: Ditto. * gcc.target/i386/avx512vlbw-pr100267-1.c: Ditto. * gcc.target/i386/avx512vlbw-pr100267-b-2.c: Ditto. * gcc.target/i386/avx512vlbw-pr100267-w-2.c: Ditto. --- gcc/common/config/i386/i386-common.cc | 5 +-- gcc/config/i386/avx512vbmi2intrin.h | 18 ++------ gcc/config/i386/avx512vbmi2vlintrin.h | 21 ++------- gcc/config/i386/i386-builtin.def | 48 ++++++++++---------- gcc/config/i386/sse.md | 51 ++++++++++------------ .../gcc.target/i386/avx512bw-pr100267-1.c | 2 +- .../gcc.target/i386/avx512bw-pr100267-b-2.c | 3 +- .../gcc.target/i386/avx512bw-pr100267-d-2.c | 3 +- .../gcc.target/i386/avx512bw-pr100267-q-2.c | 3 +- .../gcc.target/i386/avx512bw-pr100267-w-2.c | 3 +- .../gcc.target/i386/avx512f-vpcompressb-1.c | 2 +- .../gcc.target/i386/avx512f-vpcompressb-2.c | 3 +- .../gcc.target/i386/avx512f-vpcompressw-1.c | 2 +- .../gcc.target/i386/avx512f-vpcompressw-2.c | 3 +- .../gcc.target/i386/avx512f-vpexpandb-1.c | 2 +- .../gcc.target/i386/avx512f-vpexpandb-2.c | 3 +- .../gcc.target/i386/avx512f-vpexpandw-1.c | 2 +- .../gcc.target/i386/avx512f-vpexpandw-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshld-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldd-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldq-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldv-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldvd-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldvq-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshldvw-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdv-1.c | 2 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdvd-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdvq-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdvw-2.c | 3 +- gcc/testsuite/gcc.target/i386/avx512f-vpshrdw-2.c | 3 +- .../gcc.target/i386/avx512vbmi2-vpshld-1.c | 2 +- .../gcc.target/i386/avx512vbmi2-vpshrd-1.c | 2 +- .../gcc.target/i386/avx512vl-vpcompressb-1.c | 2 +- .../gcc.target/i386/avx512vl-vpcompressb-2.c | 2 +- .../gcc.target/i386/avx512vl-vpcompressw-2.c | 2 +- .../gcc.target/i386/avx512vl-vpexpandb-1.c | 2 +- .../gcc.target/i386/avx512vl-vpexpandb-2.c | 2 +- .../gcc.target/i386/avx512vl-vpexpandw-1.c | 2 +- .../gcc.target/i386/avx512vl-vpexpandw-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshldd-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshldq-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshldv-1.c | 2 +- .../gcc.target/i386/avx512vl-vpshldvd-2.c | 2 +- .../gcc.target/i386/avx512vl-vpshldvq-2.c | 2 +- .../gcc.target/i386/avx512vl-vpshldvw-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshrdv-1.c | 2 +- .../gcc.target/i386/avx512vl-vpshrdvd-2.c | 2 +- .../gcc.target/i386/avx512vl-vpshrdvq-2.c | 2 +- .../gcc.target/i386/avx512vl-vpshrdvw-2.c | 2 +- gcc/testsuite/gcc.target/i386/avx512vl-vpshrdw-2.c | 2 +- .../gcc.target/i386/avx512vlbw-pr100267-1.c | 2 +- .../gcc.target/i386/avx512vlbw-pr100267-b-2.c | 2 +- .../gcc.target/i386/avx512vlbw-pr100267-w-2.c | 2 +- 57 files changed, 106 insertions(+), 160 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index f78fc0a..315db85 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -82,7 +82,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS #define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW #define OPTION_MASK_ISA_AVX512VBMI2_SET \ - (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET) + (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW_SET) #define OPTION_MASK_ISA_AVX512FP16_SET OPTION_MASK_ISA_AVX512BW_SET #define OPTION_MASK_ISA2_AVX512FP16_SET OPTION_MASK_ISA2_AVX512FP16 #define OPTION_MASK_ISA_AVX512VNNI_SET \ @@ -232,7 +232,6 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ | OPTION_MASK_ISA_AVX512DQ_UNSET | OPTION_MASK_ISA_AVX512BW_UNSET \ | OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \ - | OPTION_MASK_ISA_AVX512VBMI2_UNSET \ | OPTION_MASK_ISA_AVX512VNNI_UNSET \ | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET) #define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD @@ -241,7 +240,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ #define OPTION_MASK_ISA_AVX512BW_UNSET \ (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET \ - | OPTION_MASK_ISA_AVX512BITALG_UNSET) + | OPTION_MASK_ISA_AVX512VBMI2_UNSET | OPTION_MASK_ISA_AVX512BITALG_UNSET) #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL #define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA #define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h index 528d193..ca00f8a 100644 --- a/gcc/config/i386/avx512vbmi2intrin.h +++ b/gcc/config/i386/avx512vbmi2intrin.h @@ -326,18 +326,6 @@ _mm512_maskz_shldv_epi64 (__mmask8 __A, __m512i __B, __m512i __C, __m512i __D) (__v8di) __D, (__mmask8)__A); } -#ifdef __DISABLE_AVX512VBMI2__ -#undef __DISABLE_AVX512VBMI2__ - -#pragma GCC pop_options -#endif /* __DISABLE_AVX512VBMI2__ */ - -#if !defined(__AVX512VBMI2__) || !defined(__AVX512BW__) -#pragma GCC push_options -#pragma GCC target("avx512vbmi2,avx512bw") -#define __DISABLE_AVX512VBMI2BW__ -#endif /* __AVX512VBMI2BW__ */ - extern __inline __m512i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_compress_epi8 (__m512i __A, __mmask64 __B, __m512i __C) @@ -548,10 +536,10 @@ _mm512_maskz_shldv_epi16 (__mmask32 __A, __m512i __B, __m512i __C, __m512i __D) (__v32hi) __C, (__v32hi) __D, (__mmask32)__A); } -#ifdef __DISABLE_AVX512VBMI2BW__ -#undef __DISABLE_AVX512VBMI2BW__ +#ifdef __DISABLE_AVX512VBMI2__ +#undef __DISABLE_AVX512VBMI2__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512VBMI2BW__ */ +#endif /* __DISABLE_AVX512VBMI2__ */ #endif /* __AVX512VBMI2INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h index 86efca2..92cae8c 100644 --- a/gcc/config/i386/avx512vbmi2vlintrin.h +++ b/gcc/config/i386/avx512vbmi2vlintrin.h @@ -957,21 +957,6 @@ _mm_maskz_shldv_epi64 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v2di) __D, (__mmask8)__A); } - - - -#ifdef __DISABLE_AVX512VBMI2VL__ -#undef __DISABLE_AVX512VBMI2VL__ -#pragma GCC pop_options -#endif /* __DISABLE_AVX512VBMIVL__ */ - -#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || \ - !defined(__AVX512BW__) -#pragma GCC push_options -#pragma GCC target("avx512vbmi2,avx512vl,avx512bw") -#define __DISABLE_AVX512VBMI2VLBW__ -#endif /* __AVX512VBMIVLBW__ */ - extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_compress_epi8 (__m256i __A, __mmask32 __B, __m256i __C) @@ -1029,9 +1014,9 @@ _mm256_maskz_expandloadu_epi8 (__mmask32 __A, const void * __B) (__v32qi) _mm256_setzero_si256 (), (__mmask32) __A); } -#ifdef __DISABLE_AVX512VBMI2VLBW__ -#undef __DISABLE_AVX512VBMI2VLBW__ +#ifdef __DISABLE_AVX512VBMI2VL__ +#undef __DISABLE_AVX512VBMI2VL__ #pragma GCC pop_options -#endif /* __DISABLE_AVX512VBMIVLBW__ */ +#endif /* __DISABLE_AVX512VBMIVL__ */ #endif /* _AVX512VBMIVLINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 4134183..f7b10a6a 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -430,20 +430,20 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru, "__builtin_ia32_rdpkru", IX86_B BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru, "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_mask, "__builtin_ia32_expandloadhi256_mask", IX86_BUILTIN_PEXPANDWLOAD256, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandloadhi256_maskz", IX86_BUILTIN_PEXPANDWLOAD256Z, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI) @@ -2553,18 +2553,18 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) /* VBMI2 */ -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_maskz, "__builtin_ia32_expandqi128_maskz", IX86_BUILTIN_PEXPANDB128Z, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_mask, "__builtin_ia32_expandhi256_mask", IX86_BUILTIN_PEXPANDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) @@ -2572,7 +2572,7 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) @@ -2590,7 +2590,7 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT) @@ -2609,8 +2609,8 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT) BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) @@ -2637,8 +2637,8 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) -BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 260dfa1..b5236ad 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -274,12 +274,6 @@ V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) -;; Same iterator, but without supposed TARGET_AVX512BW -(define_mode_iterator VI12_AVX512VLBW - [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL") - (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW") - (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) - (define_mode_iterator VI1_AVX512VL [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) @@ -862,16 +856,15 @@ (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")]) -(define_mode_iterator VI12_VI48F_AVX512VLBW +(define_mode_iterator VI12_VI48F_AVX512VL [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F") (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL") - (V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL") - (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW") - (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) + V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL") + V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")]) (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF]) @@ -27454,10 +27447,10 @@ (set_attr "mode" "")]) (define_insn "compress_mask" - [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v") - (unspec:VI12_AVX512VLBW - [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v") - (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C") + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "register_operand" "v") + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") (match_operand: 3 "register_operand" "Yk")] UNSPEC_COMPRESS))] "TARGET_AVX512VBMI2" @@ -27481,9 +27474,9 @@ (set_attr "mode" "")]) (define_insn "compressstore_mask" - [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m") - (unspec:VI12_AVX512VLBW - [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x") + [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m") + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "register_operand" "x") (match_dup 0) (match_operand: 2 "register_operand" "Yk")] UNSPEC_COMPRESS_STORE))] @@ -27519,10 +27512,10 @@ (set_attr "mode" "")]) (define_insn "expand_mask" - [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v") - (unspec:VI12_AVX512VLBW - [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m") - (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C") + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m") + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C") (match_operand: 3 "register_operand" "Yk,Yk")] UNSPEC_EXPAND))] "TARGET_AVX512VBMI2" @@ -27533,10 +27526,10 @@ (set_attr "mode" "")]) (define_insn_and_split "*expand_mask" - [(set (match_operand:VI12_VI48F_AVX512VLBW 0 "register_operand") - (unspec:VI12_VI48F_AVX512VLBW - [(match_operand:VI12_VI48F_AVX512VLBW 1 "nonimmediate_operand") - (match_operand:VI12_VI48F_AVX512VLBW 2 "nonimm_or_0_operand") + [(set (match_operand:VI12_VI48F_AVX512VL 0 "register_operand") + (unspec:VI12_VI48F_AVX512VL + [(match_operand:VI12_VI48F_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_VI48F_AVX512VL 2 "nonimm_or_0_operand") (match_operand 3 "const_int_operand")] UNSPEC_EXPAND))] "ix86_pre_reload_split () @@ -27589,10 +27582,10 @@ }) (define_expand "expand_maskz" - [(set (match_operand:VI12_AVX512VLBW 0 "register_operand") - (unspec:VI12_AVX512VLBW - [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand") - (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand") + [(set (match_operand:VI12_AVX512VL 0 "register_operand") + (unspec:VI12_AVX512VL + [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") + (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand") (match_operand: 3 "register_operand")] UNSPEC_EXPAND))] "TARGET_AVX512VBMI2" diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c index ce83d63..33af0d9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512bw -mavx512vbmi2 -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c index 424b485..161c217 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c index 24790b2..c7416da 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c index 119b50e..797ee90 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c index 926e04d..94660f2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-1.c index c449d95..0ee8fe4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-2.c index 4f15963..773fce2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressb-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512bw -mavx512vbmi2" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-1.c index 2da92a4..11f4ba4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpcompressw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-2.c index 20da539..45866b6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpcompressw-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512bw -mavx512vbmi2" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-1.c index fb0c58e..ed96b53 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -mavx512f -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-2.c index 0105ddb..88dc48c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandb-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-1.c index 49d9fb8..9f56881 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -mavx512f -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-2.c index fdad38b..5c090a3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpexpandw-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshld-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshld-1.c index f465ce2..f9c25008 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshld-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshld-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldd-2.c index 5ddf493..4c700f1 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldd-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldq-2.c index 0377aaa..1d23759 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldq-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldv-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldv-1.c index 3427b04..6b1dd16 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldv-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldv-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvd-2.c index 4637075..a38869e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvd-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvq-2.c index 4436f01..2eeb349 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvq-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvw-2.c index 5473a57..6a31a4d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshldvw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshldvw-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c index 54dd369..2c3a429 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdd-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c index 4997c70..89bafc3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdq-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdv-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdv-1.c index 6dd3f0f..5e12470 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdv-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdv-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512f -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvd-2.c index 6e08095..d280579 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvd-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512bw -mavx512vbmi2" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvq-2.c index 5810fa0..44378a6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvq-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512bw -mavx512vbmi2" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvw-2.c index 1699c26..c7131a0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdvw-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512bw -mavx512vbmi2" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdw-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdw-2.c index 67596eb..2dab245 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vpshrdw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpshrdw-2.c @@ -1,6 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */ -/* { dg-require-effective-target avx512f } */ +/* { dg-options "-O2 -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vbmi2 } */ #define AVX512F diff --git a/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshld-1.c b/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshld-1.c index 0b29923..a61ff98 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshld-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshld-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshrd-1.c b/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshrd-1.c index bb4de78..7bf5967 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshrd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vbmi2-vpshrd-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdw\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\\n\\r]*%zmm\[0-9\]+\[^\\n\\r\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-1.c index 7e3aef9..ce4410a 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vl -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vl -mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpcompressb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-2.c index e620772..dc65a21 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressb-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressw-2.c index 012ac10..a56c1b9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpcompressw-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-1.c index 96e0d81..5600bd4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -mavx512vl -O2" } */ +/* { dg-options "-mavx512vbmi2 -mavx512vl -O2" } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-2.c index 280aeda..3a3bed6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandb-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-1.c index ac5c34a..9a897ec 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vbmi2 -mavx512bw -mavx512vl -O2" } */ +/* { dg-options "-mavx512vbmi2 -mavx512vl -O2" } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-2.c index 2c1e004..48ec1a9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpexpandw-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldd-2.c index d47e4e6..99d5154 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldd-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldq-2.c index 7a5575e..a95b443 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldq-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldv-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldv-1.c index 9569552..79248e02 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldv-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldv-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vl -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vl -mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshldvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvd-2.c index cd2c751..58481c4 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvd-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvq-2.c index 451487d..54e8193 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvq-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvw-2.c index fa593f5..8d81007 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshldvw-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c index bf22915..3b2c29d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdd-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c index 61e0708..02adfbf 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdq-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdv-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdv-1.c index 4e6ceb2..243878c 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdv-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdv-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512vl -mavx512vbmi2 -mavx512bw -O2" } */ +/* { dg-options "-mavx512vl -mavx512vbmi2 -O2" } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpshrdvw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvd-2.c index 6d8ab79b..a9e47ba 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvd-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvq-2.c index da74a62..9b4f2f2 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvq-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvq-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvw-2.c index 50a3c00..2b161fc 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdvw-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdw-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdw-2.c index 507034b..bfb32af 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdw-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpshrdw-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512bw -mavx512vbmi2" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c index 135dbd7..2f7d515 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-mavx512bw -mavx512vbmi2 -mavx512vl -O2" } */ +/* { dg-options "-mavx512vbmi2 -mavx512vl -O2" } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c index d54e803..688d1be 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c index a46ca78..ed061a9 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */ +/* { dg-options "-O2 -mavx512vl -mavx512vbmi2" } */ /* { dg-require-effective-target avx512vl } */ /* { dg-require-effective-target avx512vbmi2 } */ -- cgit v1.1 From e8571019066d9820c5cd4b3019b816203d438e83 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Tue, 10 Jan 2023 09:35:08 +0800 Subject: i386: Fix vpblendm{b,w} intrins and insns For vpblendm{b,w}, they actually do not have constant parameters. Therefore, there is no need for them been wrapped in __OPTIMIZE__. Also, we should check TARGET_AVX512VL for 128/256 bit vectors. gcc/ChangeLog: * config/i386/avx512vlbwintrin.h (_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper. (_mm_mask_blend_epi8): Ditto. (_mm256_mask_blend_epi16): Ditto. (_mm256_mask_blend_epi8): Ditto. * config/i386/avx512vlintrin.h (_mm256_mask_blend_pd): Ditto. (_mm256_mask_blend_ps): Ditto. (_mm256_mask_blend_epi64): Ditto. (_mm256_mask_blend_epi32): Ditto. (_mm_mask_blend_pd): Ditto. (_mm_mask_blend_ps): Ditto. (_mm_mask_blend_epi64): Ditto. (_mm_mask_blend_epi32): Ditto. * config/i386/sse.md (VF_AVX512BWHFBF16): Removed. (VF_AVX512HFBFVL): Move it before the first usage. (_blendm): Change iterator from VF_AVX512BWHFBF16 to VF_AVX512HFBFVL. --- gcc/config/i386/avx512vlbwintrin.h | 92 ++++++++----------- gcc/config/i386/avx512vlintrin.h | 184 +++++++++++++++---------------------- gcc/config/i386/sse.md | 17 ++-- 3 files changed, 115 insertions(+), 178 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 0232783..9d2aba2 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -259,6 +259,42 @@ _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, + (__v8hi) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, + (__v16qi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, + (__v16hi) __W, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, + (__v32qi) __W, + (__mmask32) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cvtepi16_epi8 (__m256i __A) { @@ -1442,42 +1478,6 @@ _mm_maskz_dbsad_epu8 (__mmask8 __U, __m128i __A, __m128i __B, (__mmask8) __U); } -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) __A, - (__v8hi) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) __A, - (__v16qi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) __A, - (__v16hi) __W, - (__mmask16) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) __A, - (__v32qi) __W, - (__mmask32) __U); -} - extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_cmp_epi16_mask (__mmask8 __U, __m128i __X, __m128i __Y, @@ -1986,26 +1986,6 @@ _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) (__v8hi)(__m128i)_mm_setzero_si128(), \ (__mmask8)(U))) -#define _mm_mask_blend_epi16(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmw_128_mask ((__v8hi) (__A), \ - (__v8hi) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi8(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmb_128_mask ((__v16qi) (__A), \ - (__v16qi) (__W), \ - (__mmask16) (__U))) - -#define _mm256_mask_blend_epi16(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmw_256_mask ((__v16hi) (__A), \ - (__v16hi) (__W), \ - (__mmask16) (__U))) - -#define _mm256_mask_blend_epi8(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmb_256_mask ((__v32qi) (__A), \ - (__v32qi) (__W), \ - (__mmask32) (__U))) - #define _mm_cmp_epi16_mask(X, Y, P) \ ((__mmask8) __builtin_ia32_cmpw128_mask ((__v8hi)(__m128i)(X), \ (__v8hi)(__m128i)(Y), (int)(P),\ diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index 758b71a..4a717a7 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -935,6 +935,78 @@ _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) (__mmask8) __U); } +extern __inline __m256d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) +{ + return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, + (__v4df) __W, + (__mmask8) __U); +} + +extern __inline __m256 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) +{ + return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, + (__v4di) __W, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) +{ + return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, + (__v8si) __W, + (__mmask8) __U); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) +{ + return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, + (__v2df) __W, + (__mmask8) __U); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) +{ + return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, + (__v2di) __W, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) +{ + return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, + (__v4si) __W, + (__mmask8) __U); +} + extern __inline __m256i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) @@ -12262,78 +12334,6 @@ _mm_maskz_permute_ps (__mmask8 __U, __m128 __X, const int __C) (__mmask8) __U); } -extern __inline __m256d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) -{ - return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A, - (__v4df) __W, - (__mmask8) __U); -} - -extern __inline __m256 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) -{ - return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A, - (__v8sf) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); -} - -extern __inline __m256i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) -{ - return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); -} - -extern __inline __m128d -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) -{ - return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A, - (__v2df) __W, - (__mmask8) __U); -} - -extern __inline __m128 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) -{ - return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A, - (__v4sf) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); -} - -extern __inline __m128i -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) -{ - return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); -} - extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_cmp_epi64_mask (__m256i __X, __m256i __Y, const int __P) @@ -13717,46 +13717,6 @@ _mm256_permutex_pd (__m256d __X, const int __M) (__v4sf)(__m128)_mm_setzero_ps (), \ (__mmask8)(U))) -#define _mm256_mask_blend_pd(__U, __A, __W) \ - ((__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) (__A), \ - (__v4df) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_ps(__U, __A, __W) \ - ((__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) (__A), \ - (__v8sf) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_epi64(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) (__A), \ - (__v4di) (__W), \ - (__mmask8) (__U))) - -#define _mm256_mask_blend_epi32(__U, __A, __W) \ - ((__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) (__A), \ - (__v8si) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_pd(__U, __A, __W) \ - ((__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) (__A), \ - (__v2df) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_ps(__U, __A, __W) \ - ((__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) (__A), \ - (__v4sf) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi64(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) (__A), \ - (__v2di) (__W), \ - (__mmask8) (__U))) - -#define _mm_mask_blend_epi32(__U, __A, __W) \ - ((__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) (__A), \ - (__v4si) (__W), \ - (__mmask8) (__U))) - #define _mm256_cmp_epu32_mask(X, Y, P) \ ((__mmask8) __builtin_ia32_ucmpd256_mask ((__v8si)(__m256i)(X), \ (__v8si)(__m256i)(Y), (int)(P),\ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b5236ad..ddc9fd2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -444,8 +444,9 @@ [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF]) -(define_mode_iterator VF_AVX512BWHFBF16 - [V32HF V16HF V8HF V32BF V16BF V8BF]) +(define_mode_iterator VF_AVX512HFBFVL + [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") + V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) (define_mode_iterator VF_AVX512FP16VL [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) @@ -1585,10 +1586,10 @@ (set_attr "mode" "")]) (define_insn "_blendm" - [(set (match_operand:VF_AVX512BWHFBF16 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512BWHFBF16 - (match_operand:VF_AVX512BWHFBF16 2 "nonimmediate_operand" "vm,vm") - (match_operand:VF_AVX512BWHFBF16 1 "nonimm_or_0_operand" "0C,v") + [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v") + (vec_merge:VF_AVX512HFBFVL + (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm") + (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v") (match_operand: 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512BW" "@ @@ -4545,10 +4546,6 @@ DONE; }) -(define_mode_iterator VF_AVX512HFBFVL - [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) - (define_expand "vcond" [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand") (if_then_else:VF_AVX512HFBFVL -- cgit v1.1 From 4246611d1915f1664c01f286dbeb946dd06e2a4d Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Fri, 10 Mar 2023 10:38:50 +0800 Subject: i386: Add PCLMUL dependency for VPCLMULQDQ Currently in GCC, the 128 bit intrin for instruction vpclmulqdq is under PCLMUL ISA. Because there is no dependency between ISA set PCLMUL and VPCLMULQDQ, The 128 bit intrin is not available when we just use compiler flag -mvpclmulqdq. But it should according to Intel SDM. Since VPCLMULQDQ is a VEX/EVEX promotion for PCLMUL, it is natural to add dependency between them. Also, with -mvpclmulqdq, we can use ymm under VEX encoding, so VPCLMULQDQ should imply AVX. gcc/ChangeLog: * common/config/i386/i386-common.cc (OPTION_MASK_ISA_VPCLMULQDQ_SET): Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET. (OPTION_MASK_ISA_AVX_UNSET): Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET. (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto. * config/i386/i386.md (vpclmulqdqvl): New. * config/i386/sse.md (pclmulqdq): Add evex encoding. * config/i386/vpclmulqdqintrin.h: Remove redudant avx target push. gcc/testsuite/ChangeLog: * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm. --- gcc/common/config/i386/i386-common.cc | 9 ++++++--- gcc/config/i386/i386.md | 4 +++- gcc/config/i386/sse.md | 11 ++++++----- gcc/config/i386/vpclmulqdqintrin.h | 4 ++-- gcc/testsuite/gcc.target/i386/vpclmulqdq.c | 3 +++ 5 files changed, 20 insertions(+), 11 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 315db85..c7954da 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -171,7 +171,9 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES -#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ +#define OPTION_MASK_ISA_VPCLMULQDQ_SET \ + (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_PCLMUL_SET \ + | OPTION_MASK_ISA_AVX_SET) #define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI #define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B #define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG @@ -211,7 +213,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX_UNSET \ (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \ | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \ - | OPTION_MASK_ISA_AVX2_UNSET ) + | OPTION_MASK_ISA_AVX2_UNSET | OPTION_MASK_ISA_VPCLMULQDQ_UNSET) #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA #define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR #define OPTION_MASK_ISA_XSAVE_UNSET \ @@ -314,7 +316,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES #define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA -#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL +#define OPTION_MASK_ISA_PCLMUL_UNSET \ + (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_VPCLMULQDQ_UNSET) #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM #define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG #define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 01d5199..8afa400 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -842,7 +842,7 @@ avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma, - avx512ifmavl,avxneconvert,avx512bf16vl" + avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl" (const_string "base")) ;; Define instruction set of MMX instructions @@ -904,6 +904,8 @@ (eq_attr "isa" "avxneconvert") (symbol_ref "TARGET_AVXNECONVERT") (eq_attr "isa" "avx512bf16vl") (symbol_ref "TARGET_AVX512BF16 && TARGET_AVX512VL") + (eq_attr "isa" "vpclmulqdqvl") + (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ddc9fd2..5594ea6 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -25196,20 +25196,21 @@ (set_attr "mode" "TI")]) (define_insn "pclmulqdq" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "vector_operand" "xBm,xm") + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_PCLMUL))] "TARGET_PCLMUL" "@ pclmulqdq\t{%3, %2, %0|%0, %2, %3} + vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3} vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,avx,vpclmulqdqvl") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") - (set_attr "prefix" "orig,vex") + (set_attr "prefix" "orig,vex,evex") (set_attr "mode" "TI")]) (define_expand "avx_vzeroall" diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h index ba93fc4..2c83b60 100644 --- a/gcc/config/i386/vpclmulqdqintrin.h +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -53,9 +53,9 @@ _mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C) #pragma GCC pop_options #endif /* __DISABLE_VPCLMULQDQF__ */ -#if !defined(__VPCLMULQDQ__) || !defined(__AVX__) +#if !defined(__VPCLMULQDQ__) #pragma GCC push_options -#pragma GCC target("vpclmulqdq,avx") +#pragma GCC target("vpclmulqdq") #define __DISABLE_VPCLMULQDQ__ #endif /* __VPCLMULQDQ__ */ diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c index d93f776..27b2fd7 100644 --- a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c @@ -2,16 +2,19 @@ /* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m512i x1, x2; volatile __m256i x3, x4; +volatile __m128i x5, x6; void extern avx512vl_test (void) { x1 = _mm512_clmulepi64_epi128(x1, x2, 3); x3 = _mm256_clmulepi64_epi128(x3, x4, 3); + x5 = _mm_clmulepi64_si128(x5, x6, 3); } -- cgit v1.1 From ca3bd377c7eae2ece01a1bb686a920daad179a89 Mon Sep 17 00:00:00 2001 From: "Hu, Lin1" Date: Thu, 16 Feb 2023 09:10:16 +0800 Subject: Add reduce_*_ep[i|u][8|16] series intrinsics gcc/ChangeLog: * config/i386/avx2intrin.h (_MM_REDUCE_OPERATOR_BASIC_EPI16): New macro. (_MM_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto. (_MM256_REDUCE_OPERATOR_BASIC_EPI16): Ditto. (_MM256_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto. (_MM_REDUCE_OPERATOR_BASIC_EPI8): Ditto. (_MM_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto. (_MM256_REDUCE_OPERATOR_BASIC_EPI8): Ditto. (_MM256_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto. (_mm_reduce_add_epi16): New instrinsics. (_mm_reduce_mul_epi16): Ditto. (_mm_reduce_and_epi16): Ditto. (_mm_reduce_or_epi16): Ditto. (_mm_reduce_max_epi16): Ditto. (_mm_reduce_max_epu16): Ditto. (_mm_reduce_min_epi16): Ditto. (_mm_reduce_min_epu16): Ditto. (_mm256_reduce_add_epi16): Ditto. (_mm256_reduce_mul_epi16): Ditto. (_mm256_reduce_and_epi16): Ditto. (_mm256_reduce_or_epi16): Ditto. (_mm256_reduce_max_epi16): Ditto. (_mm256_reduce_max_epu16): Ditto. (_mm256_reduce_min_epi16): Ditto. (_mm256_reduce_min_epu16): Ditto. (_mm_reduce_add_epi8): Ditto. (_mm_reduce_mul_epi8): Ditto. (_mm_reduce_and_epi8): Ditto. (_mm_reduce_or_epi8): Ditto. (_mm_reduce_max_epi8): Ditto. (_mm_reduce_max_epu8): Ditto. (_mm_reduce_min_epi8): Ditto. (_mm_reduce_min_epu8): Ditto. (_mm256_reduce_add_epi8): Ditto. (_mm256_reduce_mul_epi8): Ditto. (_mm256_reduce_and_epi8): Ditto. (_mm256_reduce_or_epi8): Ditto. (_mm256_reduce_max_epi8): Ditto. (_mm256_reduce_max_epu8): Ditto. (_mm256_reduce_min_epi8): Ditto. (_mm256_reduce_min_epu8): Ditto. * config/i386/avx512vlbwintrin.h: (_mm_mask_reduce_add_epi16): Ditto. (_mm_mask_reduce_mul_epi16): Ditto. (_mm_mask_reduce_and_epi16): Ditto. (_mm_mask_reduce_or_epi16): Ditto. (_mm_mask_reduce_max_epi16): Ditto. (_mm_mask_reduce_max_epu16): Ditto. (_mm_mask_reduce_min_epi16): Ditto. (_mm_mask_reduce_min_epu16): Ditto. (_mm256_mask_reduce_add_epi16): Ditto. (_mm256_mask_reduce_mul_epi16): Ditto. (_mm256_mask_reduce_and_epi16): Ditto. (_mm256_mask_reduce_or_epi16): Ditto. (_mm256_mask_reduce_max_epi16): Ditto. (_mm256_mask_reduce_max_epu16): Ditto. (_mm256_mask_reduce_min_epi16): Ditto. (_mm256_mask_reduce_min_epu16): Ditto. (_mm_mask_reduce_add_epi8): Ditto. (_mm_mask_reduce_mul_epi8): Ditto. (_mm_mask_reduce_and_epi8): Ditto. (_mm_mask_reduce_or_epi8): Ditto. (_mm_mask_reduce_max_epi8): Ditto. (_mm_mask_reduce_max_epu8): Ditto. (_mm_mask_reduce_min_epi8): Ditto. (_mm_mask_reduce_min_epu8): Ditto. (_mm256_mask_reduce_add_epi8): Ditto. (_mm256_mask_reduce_mul_epi8): Ditto. (_mm256_mask_reduce_and_epi8): Ditto. (_mm256_mask_reduce_or_epi8): Ditto. (_mm256_mask_reduce_max_epi8): Ditto. (_mm256_mask_reduce_max_epu8): Ditto. (_mm256_mask_reduce_min_epi8): Ditto. (_mm256_mask_reduce_min_epu8): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512vlbw-reduce-op-1.c: New test. --- gcc/config/i386/avx2intrin.h | 347 +++++++++++++++++++++ gcc/config/i386/avx512vlbwintrin.h | 256 +++++++++++++++ .../gcc.target/i386/avx512vlbw-reduce-op-1.c | 206 ++++++++++++ 3 files changed, 809 insertions(+) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c (limited to 'gcc') diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 1b9c816..9b8c13b 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -1915,6 +1915,353 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, (int) (SCALE)) #endif /* __OPTIMIZE__ */ +#define _MM_REDUCE_OPERATOR_BASIC_EPI16(op) \ + __v8hi __T1 = (__v8hi)__W; \ + __v8hi __T2 = __builtin_shufflevector (__T1, __T1, 4, 5, 6, 7, 4, 5, 6, 7); \ + __v8hi __T3 = __T1 op __T2; \ + __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 2, 3, 2, 3, 4, 5, 6, 7); \ + __v8hi __T5 = __T3 op __T4; \ + __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 1, 1, 2, 3, 4, 5, 6, 7); \ + __v8hi __T7 = __T5 op __T6; \ + return __T7[0] + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_add_epi16 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI16 (+); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_mul_epi16 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI16 (*); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_and_epi16 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI16 (&); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_or_epi16 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI16 (|); +} + +#define _MM_REDUCE_OPERATOR_MAX_MIN_EP16(op) \ + __m128i __T1 = (__m128i)__builtin_shufflevector ((__v8hi)__V, \ + (__v8hi)__V, 4, 5, 6, 7, 4, 5, 6, 7); \ + __m128i __T2 = _mm_##op (__V, __T1); \ + __m128i __T3 = (__m128i)__builtin_shufflevector ((__v8hi)__T2, \ + (__v8hi)__T2, 2, 3, 2, 3, 4, 5, 6, 7); \ + __m128i __T4 = _mm_##op (__T2, __T3); \ + __m128i __T5 = (__m128i)__builtin_shufflevector ((__v8hi)__T4, \ + (__v8hi)__T4, 1, 1, 2, 3, 4, 5, 6, 7); \ + __v8hi __T6 = (__v8hi)_mm_##op (__T4, __T5); \ + return __T6[0] + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_max_epi16 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_max_epu16 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_min_epi16 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_min_epu16 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16); +} + +#define _MM256_REDUCE_OPERATOR_BASIC_EPI16(op) \ + __v8hi __T1 = (__v8hi)_mm256_extracti128_si256 (__W, 0); \ + __v8hi __T2 = (__v8hi)_mm256_extracti128_si256 (__W, 1); \ + __v8hi __T3 = __T1 op __T2; \ + __v8hi __T4 = __builtin_shufflevector (__T3, __T3, 4, 5, 6, 7, 4, 5, 6, 7); \ + __v8hi __T5 = __T3 op __T4; \ + __v8hi __T6 = __builtin_shufflevector (__T5, __T5, 2, 3, 2, 3, 4, 5, 6, 7); \ + __v8hi __T7 = __T5 op __T6; \ + __v8hi __T8 = __builtin_shufflevector (__T7, __T7, 1, 1, 2, 3, 4, 5, 6, 7); \ + __v8hi __T9 = __T7 op __T8; \ + return __T9[0] + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_add_epi16 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (+); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_mul_epi16 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (*); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_and_epi16 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (&); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_or_epi16 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (|); +} + +#define _MM256_REDUCE_OPERATOR_MAX_MIN_EP16(op) \ + __m128i __T1 = _mm256_extracti128_si256 (__V, 0); \ + __m128i __T2 = _mm256_extracti128_si256 (__V, 1); \ + __m128i __T3 = _mm_##op (__T1, __T2); \ + __m128i __T4 = (__m128i)__builtin_shufflevector ((__v8hi)__T3, \ + (__v8hi)__T3, 4, 5, 6, 7, 4, 5, 6, 7); \ + __m128i __T5 = _mm_##op (__T3, __T4); \ + __m128i __T6 = (__m128i)__builtin_shufflevector ((__v8hi)__T5, \ + (__v8hi)__T5, 2, 3, 2, 3, 4, 5, 6, 7); \ + __m128i __T7 = _mm_##op (__T5, __T6); \ + __m128i __T8 = (__m128i)__builtin_shufflevector ((__v8hi)__T7, \ + (__v8hi)__T7, 1, 1, 2, 3, 4, 5, 6, 7); \ + __v8hi __T9 = (__v8hi)_mm_##op (__T7, __T8); \ + return __T9[0] + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_max_epi16 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_max_epu16 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_min_epi16 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_min_epu16 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16); +} + +#define _MM_REDUCE_OPERATOR_BASIC_EPI8(op) \ + __v16qi __T1 = (__v16qi)__W; \ + __v16qi __T2 = __builtin_shufflevector (__T1, __T1, \ + 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T3 = __T1 op __T2; \ + __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \ + 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T5 = __T3 op __T4; \ + __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \ + 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T7 = __T5 op __T6; \ + __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \ + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T9 = __T7 op __T8; \ + return __T9[0] + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_add_epi8 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI8 (+); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_mul_epi8 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI8 (*); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_and_epi8 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI8 (&); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_or_epi8 (__m128i __W) +{ + _MM_REDUCE_OPERATOR_BASIC_EPI8 (|); +} + +#define _MM_REDUCE_OPERATOR_MAX_MIN_EP8(op) \ + __m128i __T1 = (__m128i)__builtin_shufflevector ((__v16qi)__V, (__v16qi)__V, \ + 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T2 = _mm_##op (__V, __T1); \ + __m128i __T3 = (__m128i)__builtin_shufflevector ((__v16qi)__T2, \ + (__v16qi)__T2, \ + 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T4 = _mm_##op (__T2, __T3); \ + __m128i __T5 = (__m128i)__builtin_shufflevector ((__v16qi)__T4, \ + (__v16qi)__T4, \ + 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T6 = _mm_##op (__T4, __T5); \ + __m128i __T7 = (__m128i)__builtin_shufflevector ((__v16qi)__T6, \ + (__v16qi)__T6, \ + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T8 = (__v16qi)_mm_##op (__T6, __T7); \ + return __T8[0] + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_max_epi8 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_max_epu8 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_min_epi8 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_reduce_min_epu8 (__m128i __V) +{ + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8); +} + +#define _MM256_REDUCE_OPERATOR_BASIC_EPI8(op) \ + __v16qi __T1 = (__v16qi)_mm256_extracti128_si256 (__W, 0); \ + __v16qi __T2 = (__v16qi)_mm256_extracti128_si256 (__W, 1); \ + __v16qi __T3 = __T1 op __T2; \ + __v16qi __T4 = __builtin_shufflevector (__T3, __T3, \ + 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T5 = __T3 op __T4; \ + __v16qi __T6 = __builtin_shufflevector (__T5, __T5, \ + 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T7 = __T5 op __T6; \ + __v16qi __T8 = __builtin_shufflevector (__T7, __T7, \ + 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T9 = __T7 op __T8; \ + __v16qi __T10 = __builtin_shufflevector (__T9, __T9, \ + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T11 = __T9 op __T10; \ + return __T11[0] + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_add_epi8 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (+); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_mul_epi8 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (*); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_and_epi8 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (&); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_or_epi8 (__m256i __W) +{ + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (|); +} + +#define _MM256_REDUCE_OPERATOR_MAX_MIN_EP8(op) \ + __m128i __T1 = _mm256_extracti128_si256 (__V, 0); \ + __m128i __T2 = _mm256_extracti128_si256 (__V, 1); \ + __m128i __T3 = _mm_##op (__T1, __T2); \ + __m128i __T4 = (__m128i)__builtin_shufflevector ((__v16qi)__T3, \ + (__v16qi)__T3, \ + 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T5 = _mm_##op (__T3, __T4); \ + __m128i __T6 = (__m128i)__builtin_shufflevector ((__v16qi)__T5, \ + (__v16qi)__T5, \ + 4, 5, 6, 7, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T7 = _mm_##op (__T5, __T6); \ + __m128i __T8 = (__m128i)__builtin_shufflevector ((__v16qi)__T7, \ + (__v16qi)__T5, \ + 2, 3, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i __T9 = _mm_##op (__T7, __T8); \ + __m128i __T10 = (__m128i)__builtin_shufflevector ((__v16qi)__T9, \ + (__v16qi)__T9, \ + 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __v16qi __T11 = (__v16qi)_mm_##op (__T9, __T10); \ + return __T11[0] + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_max_epi8 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_max_epu8 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_min_epi8 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_reduce_min_epu8 (__m256i __V) +{ + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8); +} + #ifdef __DISABLE_AVX2__ #undef __DISABLE_AVX2__ #pragma GCC pop_options diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h index 9d2aba2..3654cf2 100644 --- a/gcc/config/i386/avx512vlbwintrin.h +++ b/gcc/config/i386/avx512vlbwintrin.h @@ -4730,6 +4730,262 @@ _mm256_mask_cmple_epi16_mask (__mmask16 __M, __m256i __X, __m256i __Y) (__mmask16) __M); } +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_add_epi16 (__mmask8 __M, __m128i __W) +{ + __W = _mm_maskz_mov_epi16 (__M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI16 (+); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_mul_epi16 (__mmask8 __M, __m128i __W) +{ + __W = _mm_mask_mov_epi16 (_mm_set1_epi16 (1), __M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI16 (*); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_and_epi16 (__mmask8 __M, __m128i __W) +{ + __W = _mm_mask_mov_epi16 (_mm_set1_epi16 (-1), __M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI16 (&); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_or_epi16 (__mmask8 __M, __m128i __W) +{ + __W = _mm_maskz_mov_epi16 (__M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI16 (|); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_max_epi16 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (-32767-1), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_max_epu16 (__mmask16 __M, __m128i __V) +{ + __V = _mm_maskz_mov_epi16 (__M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_min_epi16 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (32767), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_min_epu16 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi16 (_mm_set1_epi16 (-1), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_add_epi16 (__mmask16 __M, __m256i __W) +{ + __W = _mm256_maskz_mov_epi16 (__M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (+); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_mul_epi16 (__mmask16 __M, __m256i __W) +{ + __W = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (1), __M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (*); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_and_epi16 (__mmask16 __M, __m256i __W) +{ + __W = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-1), __M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (&); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_or_epi16 (__mmask16 __M, __m256i __W) +{ + __W = _mm256_maskz_mov_epi16 (__M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI16 (|); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_max_epi16 (__mmask16 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-32767-1), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_max_epu16 (__mmask16 __M, __m256i __V) +{ + __V = _mm256_maskz_mov_epi16 (__M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (max_epu16); +} + +extern __inline short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_min_epi16 (__mmask16 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (32767), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epi16); +} + +extern __inline unsigned short +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_min_epu16 (__mmask16 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi16 (_mm256_set1_epi16 (-1), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP16 (min_epu16); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_add_epi8 (__mmask16 __M, __m128i __W) +{ + __W = _mm_maskz_mov_epi8 (__M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI8 (+); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_mul_epi8 (__mmask16 __M, __m128i __W) +{ + __W = _mm_mask_mov_epi8 (_mm_set1_epi8 (1), __M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI8 (*); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_and_epi8 (__mmask16 __M, __m128i __W) +{ + __W = _mm_mask_mov_epi8 (_mm_set1_epi8 (-1), __M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI8 (&); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_or_epi8 (__mmask16 __M, __m128i __W) +{ + __W = _mm_maskz_mov_epi8 (__M, __W); + _MM_REDUCE_OPERATOR_BASIC_EPI8 (|); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_max_epi8 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (-127-1), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_max_epu8 (__mmask16 __M, __m128i __V) +{ + __V = _mm_maskz_mov_epi8 (__M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_min_epi8 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (127), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_reduce_min_epu8 (__mmask16 __M, __m128i __V) +{ + __V = _mm_mask_mov_epi8 (_mm_set1_epi8 (-1), __M, __V); + _MM_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_add_epi8 (__mmask32 __M, __m256i __W) +{ + __W = _mm256_maskz_mov_epi8 (__M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (+); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_mul_epi8 (__mmask32 __M, __m256i __W) +{ + __W = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (1), __M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (*); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_and_epi8 (__mmask32 __M, __m256i __W) +{ + __W = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-1), __M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (&); +} + +extern __inline char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_or_epi8 (__mmask32 __M, __m256i __W) +{ + __W = _mm256_maskz_mov_epi8 (__M, __W); + _MM256_REDUCE_OPERATOR_BASIC_EPI8 (|); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_max_epi8 (__mmask32 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-127-1), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_max_epu8 (__mmask32 __M, __m256i __V) +{ + __V = _mm256_maskz_mov_epi8 (__M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (max_epu8); +} + +extern __inline signed char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_min_epi8 (__mmask32 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (127), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epi8); +} + +extern __inline unsigned char +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_reduce_min_epu8 (__mmask32 __M, __m256i __V) +{ + __V = _mm256_mask_mov_epi8 (_mm256_set1_epi8 (-1), __M, __V); + _MM256_REDUCE_OPERATOR_MAX_MIN_EP8 (min_epu8); +} + #ifdef __DISABLE_AVX512VLBW__ #undef __DISABLE_AVX512VLBW__ #pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c new file mode 100644 index 0000000..146ef6b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-reduce-op-1.c @@ -0,0 +1,206 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl" } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512BW +#define AVX512VL + +#include "avx512f-helper.h" + +#define FUNC_TEST_REDUCE_BASIC(opname) \ + FUNC_TEST_REDUCE_OP (, short, epi16, opname, __m128i, __mmask8) \ + FUNC_TEST_REDUCE_OP (256, short, epi16, opname, __m256i, __mmask16) \ + FUNC_TEST_REDUCE_OP (, char, epi8, opname, __m128i, __mmask16) \ + FUNC_TEST_REDUCE_OP (256, char, epi8, opname, __m256i, __mmask32) + +#define FUNC_TEST_REDUCE_MAX_MIN(opname) \ + FUNC_TEST_REDUCE_OP (, short, epi16, opname, __m128i, __mmask8) \ + FUNC_TEST_REDUCE_OP (256, short, epi16, opname, __m256i, __mmask16) \ + FUNC_TEST_REDUCE_OP (, char, epi8, opname, __m128i, __mmask16) \ + FUNC_TEST_REDUCE_OP (256, char, epi8, opname, __m256i, __mmask32) \ + FUNC_TEST_REDUCE_OP (, unsigned short, epu16, opname, __m128i, __mmask8) \ + FUNC_TEST_REDUCE_OP (256, unsigned short, epu16, \ + opname, __m256i, __mmask16) \ + FUNC_TEST_REDUCE_OP (, unsigned char, epu8, opname, __m128i, __mmask16) \ + FUNC_TEST_REDUCE_OP (256, unsigned char, epu8, opname, __m256i, __mmask32) + +#define FUNC_TEST_REDUCE_OP(len, rtype, type, opname, argtype, masktype) \ + __attribute__((noinline, noclone)) rtype \ + test_##len##_reduce_##opname##_##type (argtype a) \ + { \ + return _mm##len##_reduce_##opname##_##type (a); \ + } \ + __attribute__((noinline, noclone)) rtype \ + test_##len##_mask_reduce_##opname##_##type (masktype u, argtype a) \ + { \ + return _mm##len##_mask_reduce_##opname##_##type (u, a); \ + } + +FUNC_TEST_REDUCE_BASIC (add) +FUNC_TEST_REDUCE_BASIC (mul) +FUNC_TEST_REDUCE_BASIC (and) +FUNC_TEST_REDUCE_BASIC (or) +FUNC_TEST_REDUCE_MAX_MIN (max) +FUNC_TEST_REDUCE_MAX_MIN (min) + +#define TESTOP(len, opname, op, type, suffix, neutral) \ + do { \ + type r1 = _mm##len##_reduce_##opname##_##suffix (v.x); \ + type r2 = test_##len##_reduce_##opname##_##suffix (v.x); \ + type r3 = neutral; \ + if (r1 != r2) \ + __builtin_abort (); \ + for (int i = 0; i < SIZE; i++) \ + r3 = r3 op v.a[i]; \ + if (r1 != r3) \ + __builtin_abort (); \ + type r4 = _mm##len##_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x); \ + type r5 = test_##len##_mask_reduce_##opname##_##suffix (MASK_VALUE, v.x); \ + if (r4 != r5) \ + __builtin_abort (); \ + r3 = neutral; \ + for (int i = 0; i < SIZE; i++) \ + if (MASK_VALUE & (1 << i)) \ + r3 = r3 op v.a[i]; \ + if (r4 != r3) \ + __builtin_abort (); \ + type r6 = _mm##len##_mask_reduce_##opname##_##suffix (0, v.x); \ + type r7 = test_##len##_mask_reduce_##opname##_##suffix (0, v.x); \ + if (r6 != r7 || r6 != neutral) \ + __builtin_abort (); \ + } while (0) + +#undef AVX512F_LEN +#define AVX512F_LEN 128 + +#undef SIZE +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +#define TEST_128_EPI8(c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16) \ + do { \ + UNION_TYPE (AVX512F_LEN, i_b) v; \ + v.x = _mm_set_epi8 (c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16); \ + TESTOP (, add, +, char, epi8, 0); \ + TESTOP (, mul, *, char, epi8, 1); \ + TESTOP (, and, &, char, epi8, (char) ~0); \ + TESTOP (, or, |, char, epi8, 0); \ + TESTOP (, min, < v.a[i] ? r3 :, char, epi8, __SCHAR_MAX__); \ + TESTOP (, max, > v.a[i] ? r3 :, char, epi8, -__SCHAR_MAX__ - 1); \ + TESTOP (, min, < (unsigned char) v.a[i] ? r3 :, unsigned char, epu8, (unsigned char) ~0U); \ + TESTOP (, max, > (unsigned char) v.a[i] ? r3 :, unsigned char, epu8, 0); \ + } while (0) + +static void +test_128_epi8 (void) +{ + TEST_128_EPI8 (1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4); + TEST_128_EPI8 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6); +} + +#undef SIZE +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +#define TEST_128_EPI16(c1, c2, c3, c4, c5, c6, c7, c8) \ + do { \ + UNION_TYPE (AVX512F_LEN, i_w) v; \ + v.x = _mm_set_epi16 (c1, c2, c3, c4, c5, c6, c7, c8); \ + TESTOP (, add, +, short, epi16, 0); \ + TESTOP (, mul, *, short, epi16, 1); \ + TESTOP (, and, &, short, epi16, (short) ~0); \ + TESTOP (, or, |, short, epi16, 0); \ + TESTOP (, min, < v.a[i] ? r3 :, short, epi16, __SHRT_MAX__); \ + TESTOP (, max, > v.a[i] ? r3 :, short, epi16, -__SHRT_MAX__ - 1); \ + TESTOP (, min, < (unsigned short) v.a[i] ? r3 :, unsigned short, epu16,(unsigned short) ~0U); \ + TESTOP (, max, > (unsigned short) v.a[i] ? r3 :, unsigned short, epu16, 0); \ + } while (0) + +static void +test_128_epi16 (void) +{ + TEST_128_EPI16 (1, 2, 3, 4, 5, 6, 6, 5); + TEST_128_EPI16 (-1, 15, -1, 7, -1, 7, -1, -1); +} + +void +test_128 (void) +{ + test_128_epi8 (); + test_128_epi16 (); +} + +#undef AVX512F_LEN +#define AVX512F_LEN 256 + +#undef SIZE +#define SIZE (AVX512F_LEN / 8) +#include "avx512f-mask-type.h" + +#define TEST_256_EPI8(c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16, \ + c17, c18, c19, c20, c21, c22, c23, c24, \ + c25, c26, c27, c28, c29, c30, c31, c32) \ + do { \ + UNION_TYPE (AVX512F_LEN, i_b) v; \ + v.x = _mm256_set_epi8 (c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16, \ + c17, c18, c19, c20, c21, c22, c23, c24, \ + c25, c26, c27, c28, c29, c30, c31, c32); \ + TESTOP (256, add, +, char, epi8, 0); \ + TESTOP (256, mul, *, char, epi8, 1); \ + TESTOP (256, and, &, char, epi8, (char) ~0); \ + TESTOP (256, or, |, char, epi8, 0); \ + TESTOP (256, min, < v.a[i] ? r3 :, char, epi8, __SCHAR_MAX__); \ + TESTOP (256, max, > v.a[i] ? r3 :, char, epi8, -__SCHAR_MAX__ - 1); \ + TESTOP (256, min, < (unsigned char) v.a[i] ? r3 :, \ + unsigned char, epu8, (unsigned char)~0U); \ + TESTOP (256, max, > (unsigned char) v.a[i] ? r3 :, \ + unsigned char, epu8, 0); \ + } while (0) + +static void +test_256_epi8 (void) +{ + TEST_256_EPI8 (1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 12, 11, 10, 9, 9, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 7, 10, 11, 12); + TEST_256_EPI8 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6, -1, 30, -1, 28, -1, 26, -1, 24, -1, 22, -1, -1, -1, -1, 17, 16); +} + +#undef SIZE +#define SIZE (AVX512F_LEN / 16) +#include "avx512f-mask-type.h" + +#define TEST_256_EPI16(c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16) \ + do { \ + UNION_TYPE (AVX512F_LEN, i_w) v; \ + v.x = _mm256_set_epi16 (c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15, c16); \ + TESTOP (256, add, +, short, epi16, 0); \ + TESTOP (256, mul, *, short, epi16, 1); \ + TESTOP (256, and, &, short, epi16, (short) ~0); \ + TESTOP (256, or, |, short, epi16, 0); \ + TESTOP (256, min, < v.a[i] ? r3 :, short, epi16, __SHRT_MAX__); \ + TESTOP (256, max, > v.a[i] ? r3 :, short, epi16, -__SHRT_MAX__ - 1);\ + TESTOP (256, min, < (unsigned short) v.a[i] ? r3 :, \ + unsigned short, epu16, (unsigned short) ~0U); \ + TESTOP (256, max, > (unsigned short) v.a[i] ? r3 :, \ + unsigned short, epu16, 0); \ + } while (0) + +static void +test_256_epi16 (void) +{ + TEST_256_EPI16 (9, 7, 6, 5, 4, 3, 2, 1, 7, 6, 5, 4, 7, 10, 11, 12); + TEST_256_EPI16 (-1, 15, -1, 7, -1, 7, -1, -1, 6, 6, -1, -1, -1, -1, 7, 6); +} + +void +test_256 (void) +{ + test_256_epi8 (); + test_256_epi16 (); +} -- cgit v1.1 From 24a8acc1662c37003a7b54814bf840019fec2190 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Fri, 10 Mar 2023 13:40:09 +0800 Subject: i386: Share AES xmm intrin with VAES Currently in GCC, the 128 bit intrin for instruction vaes{end,dec}{last,} is under AES ISA. Because there is no dependency between ISA set AES and VAES, The 128 bit intrin is not available when we use compiler flag -mvaes -mavx512vl and there is no other way to use that intrin. But it should according to Intel SDM. Although VAES aims to be a VEX/EVEX promotion for AES, but it is only part of it. Therefore, we share the AES xmm intrin with VAES. Also, since -mvaes indicates that we could use VEX encoding for ymm, we should imply AVX for VAES. gcc/ChangeLog: * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AVX_UNSET): Add OPTION_MASK_ISA2_VAES_UNSET. (ix86_handle_option): Set AVX flag for VAES. * config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins): Add OPTION_MASK_ISA2_VAES_UNSET. (def_builtin): Share builtin between AES and VAES. * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): Ditto. * config/i386/i386.md (aes): New isa attribute. * config/i386/sse.md (aesenc): Add pattern for VAES with xmm. (aesenclast): Ditto. (aesdec): Ditto. (aesdeclast): Ditto. * config/i386/vaesintrin.h: Remove redundant avx target push. * config/i386/wmmintrin.h (_mm_aesdec_si128): Change to macro. (_mm_aesdeclast_si128): Ditto. (_mm_aesenc_si128): Ditto. (_mm_aesenclast_si128): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fvl-vaes-1.c: Add VAES xmm test. * gcc.target/i386/pr109117-1.c: Modify error message. --- gcc/common/config/i386/i386-common.cc | 5 +- gcc/config/i386/i386-builtins.cc | 21 +++++---- gcc/config/i386/i386-expand.cc | 1 + gcc/config/i386/i386.md | 3 +- gcc/config/i386/sse.md | 60 +++++++++++++----------- gcc/config/i386/vaesintrin.h | 4 +- gcc/config/i386/wmmintrin.h | 29 ++++-------- gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c | 11 +++++ gcc/testsuite/gcc.target/i386/pr109117-1.c | 4 +- 9 files changed, 75 insertions(+), 63 deletions(-) (limited to 'gcc') diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index c7954da..bf126f1 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -348,7 +348,8 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET) #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \ OPTION_MASK_ISA2_SSE_UNSET -#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET +#define OPTION_MASK_ISA2_AVX_UNSET \ + (OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET) #define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET #define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET #define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET @@ -685,6 +686,8 @@ ix86_handle_option (struct gcc_options *opts, { opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_VAES_SET; opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_VAES_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET; } else { diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index fc0c82b..28f404d 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -279,14 +279,15 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) - /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics - or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be - defined whenever avxvnni/avxifma or avx512vnni/avxifma && - avx512vl exist. */ + /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES intrinsics + or AVX512VNNIVL/AVX512IFMAVL/VAESVL non-mask intrinsics should be + defined whenever avxvnni/avxifma/aes or avx512vnni/avx512ifma/vaes + && avx512vl exist. */ || (mask2 == OPTION_MASK_ISA2_AVXVNNI) || (mask2 == OPTION_MASK_ISA2_AVXIFMA) || (mask2 == (OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16)) + || ((mask2 & OPTION_MASK_ISA2_VAES) != 0) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { @@ -661,16 +662,20 @@ ix86_init_mmx_sse_builtins (void) VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT); /* AES */ - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, + def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, + OPTION_MASK_ISA2_VAES, "__builtin_ia32_aesenc128", V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, + def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, + OPTION_MASK_ISA2_VAES, "__builtin_ia32_aesenclast128", V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, + def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, + OPTION_MASK_ISA2_VAES, "__builtin_ia32_aesdec128", V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128); - def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, + def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, + OPTION_MASK_ISA2_VAES, "__builtin_ia32_aesdeclast128", V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128); def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0, diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index f692ddc..634fe61 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12624,6 +12624,7 @@ ix86_check_builtin_isa_match (unsigned int fcode, OPTION_MASK_ISA2_AVXIFMA); SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, 0, OPTION_MASK_ISA2_AVXNECONVERT); + SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, 0, OPTION_MASK_ISA2_VAES); isa = tmp_isa; isa2 = tmp_isa2; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 8afa400..f8698ea 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -837,7 +837,7 @@ ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, - x64_avx,x64_avx512bw,x64_avx512dq, + x64_avx,x64_avx512bw,x64_avx512dq,aes, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl, @@ -864,6 +864,7 @@ (symbol_ref "TARGET_64BIT && TARGET_AVX512BW") (eq_attr "isa" "x64_avx512dq") (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ") + (eq_attr "isa" "aes") (symbol_ref "TARGET_AES") (eq_attr "isa" "sse_noavx") (symbol_ref "TARGET_SSE && !TARGET_AVX") (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 5594ea6..f14a9c2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -25108,67 +25108,71 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "aesenc" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")] UNSPEC_AESENC))] - "TARGET_AES" + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesenc\t{%2, %0|%0, %2} + vaesenc\t{%2, %1, %0|%0, %1, %2} vaesenc\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,aes,avx512vl") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "double,double") + (set_attr "prefix" "orig,vex,evex") + (set_attr "btver2_decode" "double,double,double") (set_attr "mode" "TI")]) (define_insn "aesenclast" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")] UNSPEC_AESENCLAST))] - "TARGET_AES" + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesenclast\t{%2, %0|%0, %2} + vaesenclast\t{%2, %1, %0|%0, %1, %2} vaesenclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,aes,avx512vl") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "double,double") + (set_attr "prefix" "orig,vex,evex") + (set_attr "btver2_decode" "double,double,double") (set_attr "mode" "TI")]) (define_insn "aesdec" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")] UNSPEC_AESDEC))] - "TARGET_AES" + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesdec\t{%2, %0|%0, %2} + vaesdec\t{%2, %1, %0|%0, %1, %2} vaesdec\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,aes,avx512vl") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "double,double") + (set_attr "prefix" "orig,vex,evex") + (set_attr "btver2_decode" "double,double,double") (set_attr "mode" "TI")]) (define_insn "aesdeclast" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") + (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")] UNSPEC_AESDECLAST))] - "TARGET_AES" + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesdeclast\t{%2, %0|%0, %2} + vaesdeclast\t{%2, %1, %0|%0, %1, %2} vaesdeclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx") + [(set_attr "isa" "noavx,aes,avx512vl") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex") - (set_attr "btver2_decode" "double,double") + (set_attr "prefix" "orig,vex,evex") + (set_attr "btver2_decode" "double,double,double") (set_attr "mode" "TI")]) (define_insn "aesimc" diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h index 0f1cffe..58fc19c 100644 --- a/gcc/config/i386/vaesintrin.h +++ b/gcc/config/i386/vaesintrin.h @@ -24,9 +24,9 @@ #ifndef __VAESINTRIN_H_INCLUDED #define __VAESINTRIN_H_INCLUDED -#if !defined(__VAES__) || !defined(__AVX__) +#if !defined(__VAES__) #pragma GCC push_options -#pragma GCC target("vaes,avx") +#pragma GCC target("vaes") #define __DISABLE_VAES__ #endif /* __VAES__ */ diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h index ae15cea..da314db 100644 --- a/gcc/config/i386/wmmintrin.h +++ b/gcc/config/i386/wmmintrin.h @@ -40,36 +40,23 @@ /* Performs 1 round of AES decryption of the first m128i using the second m128i as a round key. */ -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_aesdec_si128 (__m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y); -} +#define _mm_aesdec_si128(X, Y) \ + (__m128i) __builtin_ia32_aesdec128 ((__v2di) (X), (__v2di) (Y)) /* Performs the last round of AES decryption of the first m128i using the second m128i as a round key. */ -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_aesdeclast_si128 (__m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X, - (__v2di)__Y); -} +#define _mm_aesdeclast_si128(X, Y) \ + (__m128i) __builtin_ia32_aesdeclast128 ((__v2di) (X), (__v2di) (Y)) /* Performs 1 round of AES encryption of the first m128i using the second m128i as a round key. */ -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_aesenc_si128 (__m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y); -} +#define _mm_aesenc_si128(X, Y) \ + (__m128i) __builtin_ia32_aesenc128 ((__v2di) (X), (__v2di) (Y)) /* Performs the last round of AES encryption of the first m128i using the second m128i as a round key. */ -extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_aesenclast_si128 (__m128i __X, __m128i __Y) -{ - return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y); -} +#define _mm_aesenclast_si128(X, Y) \ + (__m128i) __builtin_ia32_aesenclast128 ((__v2di) (X), (__v2di) (Y)) /* Performs the InverseMixColumn operation on the source m128i and stores the result into m128i destination. */ diff --git a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c index c65b570..f35742e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c @@ -10,10 +10,16 @@ /* { dg-final { scan-assembler-times "vaesenc\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vaesenclast\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaesdec\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaesdeclast\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaesenc\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vaesenclast\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + #include volatile __m512i x,y; volatile __m256i x256, y256; +volatile __m128i x128, y128; void extern avx512f_test (void) @@ -27,4 +33,9 @@ avx512f_test (void) x256 = _mm256_aesdeclast_epi128 (x256, y256); x256 = _mm256_aesenc_epi128 (x256, y256); x256 = _mm256_aesenclast_epi128 (x256, y256); + + x128 = _mm_aesdec_si128 (x128, y128); + x128 = _mm_aesdeclast_si128 (x128, y128); + x128 = _mm_aesenc_si128 (x128, y128); + x128 = _mm_aesenclast_si128 (x128, y128); } diff --git a/gcc/testsuite/gcc.target/i386/pr109117-1.c b/gcc/testsuite/gcc.target/i386/pr109117-1.c index 87a5c0e..1c4da99 100644 --- a/gcc/testsuite/gcc.target/i386/pr109117-1.c +++ b/gcc/testsuite/gcc.target/i386/pr109117-1.c @@ -10,5 +10,5 @@ volatile __m128i res; void foo (void) { - res = __builtin_ia32_vaesdec_v16qi (x, y); /* { dg-warning "implicit declaration of function" } */ -} /* { dg-error "incompatible types when assigning to type" "" { target *-*-* } .-1 } */ + res = __builtin_ia32_vaesdec_v16qi (x, y); /* { dg-error "incompatible types when assigning to type" } */ +} -- cgit v1.1 From 57e7229a29ca0e9929b61051e4f5857f0b41b6c7 Mon Sep 17 00:00:00 2001 From: Jiufu Guo Date: Tue, 18 Apr 2023 15:56:53 +0800 Subject: PR testsuite/106879 FAIL: gcc.dg/vect/bb-slp-layout-19.c on powerpc64 On P7, option -mno-allow-movmisalign is added during testing, which prevents slp happen on the case. Like PR65484 and PR87306, this patch use vect_hw_misalign to guard the case on powerpc targets. gcc/testsuite/ChangeLog: PR testsuite/106879 * gcc.dg/vect/bb-slp-layout-19.c: Modify to guard the check with vect_hw_misalign on POWERs. --- gcc/testsuite/gcc.dg/vect/bb-slp-layout-19.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-19.c b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-19.c index f075a83..847a07b 100644 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-layout-19.c +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-layout-19.c @@ -31,4 +31,9 @@ void f() e[3] = b3; } -/* { dg-final { scan-tree-dump-times "add new stmt: \[^\\n\\r\]* = VEC_PERM_EXPR" 3 "slp1" { target { vect_int_mult && vect_perm } } } } */ +/* On older powerpc hardware (POWER7 and earlier), the default flag + -mno-allow-movmisalign prevents vectorization. On POWER8 and later, + when vect_hw_misalign is true, vectorization occurs. For other + targets, ! vect_no_align is a sufficient test. */ + +/* { dg-final { scan-tree-dump-times "add new stmt: \[^\\n\\r\]* = VEC_PERM_EXPR" 3 "slp1" { target { { vect_int_mult && vect_perm } && { { ! powerpc*-*-* } || { vect_hw_misalign } } } } } } */ -- cgit v1.1 From bd4a1a547242a924663712ac7a13799433cdf476 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Apr 2023 09:43:04 +0200 Subject: testsuite: Fix up g++.dg/ext/int128-8.C testcase [PR109560] The testcase needs to be restricted to int128 effective targets, it expectedly fails on i386 and other 32-bit targets. 2023-04-20 Jakub Jelinek PR c++/108099 PR testsuite/109560 * g++.dg/ext/int128-8.C: Require int128 effective target. --- gcc/testsuite/g++.dg/ext/int128-8.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/ext/int128-8.C b/gcc/testsuite/g++.dg/ext/int128-8.C index 14bbc49..7e909d5 100644 --- a/gcc/testsuite/g++.dg/ext/int128-8.C +++ b/gcc/testsuite/g++.dg/ext/int128-8.C @@ -1,5 +1,5 @@ // PR c++/108099 -// { dg-do compile { target c++11 } } +// { dg-do compile { target { c++11 && int128 } } } // { dg-options "" } using u128 = unsigned __int128_t; -- cgit v1.1 From 974326fd5199a8ae8482f2a521b1edd449a2fa9e Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 20 Feb 2023 15:02:43 +0100 Subject: Remove duplicate DFS walks from DF init The following removes unused CFG order computes from rest_of_handle_df_initialize. The CFG orders are computed from df_analyze (). This also removes code duplication that would have to be kept in sync. * df-core.cc (rest_of_handle_df_initialize): Remove computation of df->postorder, df->postorder_inverted and df->n_blocks. --- gcc/df-core.cc | 5 ----- 1 file changed, 5 deletions(-) (limited to 'gcc') diff --git a/gcc/df-core.cc b/gcc/df-core.cc index 3286ffd..de5cbd0 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -701,11 +701,6 @@ rest_of_handle_df_initialize (void) if (optimize > 1) df_live_add_problem (); - df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); - df->n_blocks = post_order_compute (df->postorder, true, true); - inverted_post_order_compute (&df->postorder_inverted); - gcc_assert ((unsigned) df->n_blocks == df->postorder_inverted.length ()); - df->hard_regs_live_count = XCNEWVEC (unsigned int, FIRST_PSEUDO_REGISTER); df_hard_reg_init (); -- cgit v1.1 From 705b0d2b62318b3935214f08a1cf023b1117acb8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Apr 2023 11:55:16 +0200 Subject: tree-vect-patterns: Pattern recognize ctz or ffs using clz, popcount or ctz [PR109011] The following patch allows to vectorize __builtin_ffs*/.FFS even if we just have vector .CTZ support, or __builtin_ffs*/.FFS/__builtin_ctz*/.CTZ if we just have vector .CLZ or .POPCOUNT support. It uses various expansions from Hacker's Delight book as well as GCC's expansion, in particular: .CTZ (X) = PREC - .CLZ ((X - 1) & ~X) .CTZ (X) = .POPCOUNT ((X - 1) & ~X) .CTZ (X) = (PREC - 1) - .CLZ (X & -X) .FFS (X) = PREC - .CLZ (X & -X) .CTZ (X) = PREC - .POPCOUNT (X | -X) .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X) .FFS (X) = .CTZ (X) + 1 where the first one can be only used if both CTZ and CLZ have value defined at zero (kind 2) and both have value of PREC there. If the original has value defined at zero and the latter doesn't for other forms or if it doesn't have matching value for that case, a COND_EXPR is added for that afterwards. The patch also modifies vect_recog_popcount_clz_ctz_ffs_pattern such that the two can work together. 2023-04-20 Jakub Jelinek PR tree-optimization/109011 * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): New function. (vect_recog_popcount_clz_ctz_ffs_pattern): Move vect_pattern_detected call later. Don't punt for IFN_CTZ or IFN_FFS if it doesn't have direct optab support, but has instead IFN_CLZ, IFN_POPCOUNT or for IFN_FFS IFN_CTZ support, use vect_recog_ctz_ffs_pattern for that case. (vect_vect_recog_func_ptrs): Add ctz_ffs entry. * gcc.dg/vect/pr109011-1.c: Remove -mpower9-vector from dg-additional-options. (baz, qux): Remove functions and corresponding dg-final. * gcc.dg/vect/pr109011-2.c: New test. * gcc.dg/vect/pr109011-3.c: New test. * gcc.dg/vect/pr109011-4.c: New test. * gcc.dg/vect/pr109011-5.c: New test. --- gcc/testsuite/gcc.dg/vect/pr109011-1.c | 19 -- gcc/testsuite/gcc.dg/vect/pr109011-2.c | 35 ++++ gcc/testsuite/gcc.dg/vect/pr109011-3.c | 32 ++++ gcc/testsuite/gcc.dg/vect/pr109011-4.c | 35 ++++ gcc/testsuite/gcc.dg/vect/pr109011-5.c | 32 ++++ gcc/tree-vect-patterns.cc | 313 ++++++++++++++++++++++++++++++++- 6 files changed, 442 insertions(+), 24 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr109011-2.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr109011-3.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr109011-4.c create mode 100644 gcc/testsuite/gcc.dg/vect/pr109011-5.c (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-1.c b/gcc/testsuite/gcc.dg/vect/pr109011-1.c index 707a82a..16a5826 100644 --- a/gcc/testsuite/gcc.dg/vect/pr109011-1.c +++ b/gcc/testsuite/gcc.dg/vect/pr109011-1.c @@ -4,7 +4,6 @@ /* { dg-additional-options "-mavx512cd" { target { { i?86-*-* x86_64-*-* } && avx512cd } } } */ /* { dg-additional-options "-mavx512vpopcntdq" { target { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } } } */ /* { dg-additional-options "-mpower8-vector" { target powerpc_p8vector_ok } } */ -/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } } */ /* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ void @@ -28,21 +27,3 @@ bar (long long *p, long long *q) /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target { { i?86-*-* x86_64-*-* } && avx512cd } } } } */ /* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 1 "optimized" { target { powerpc_p8vector_ok || s390_vx } } } } */ - -void -baz (long long *p, long long *q) -{ -#pragma omp simd - for (int i = 0; i < 2048; ++i) - p[i] = __builtin_ctzll (q[i]); -} - -/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 1 "optimized" { target { powerpc_p9vector_ok || s390_vx } } } } */ - -void -qux (long long *p, long long *q) -{ -#pragma omp simd - for (int i = 0; i < 2048; ++i) - p[i] = __builtin_ffsll (q[i]); -} diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-2.c b/gcc/testsuite/gcc.dg/vect/pr109011-2.c new file mode 100644 index 0000000..191af89 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr109011-2.c @@ -0,0 +1,35 @@ +/* PR tree-optimization/109011 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */ +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } } */ +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ + +void +foo (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ctz (q[i]); +} + +void +bar (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__; +} + +void +baz (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ffs (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target powerpc_p9vector_ok } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target s390_vx } } } */ +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { target s390_vx } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-3.c b/gcc/testsuite/gcc.dg/vect/pr109011-3.c new file mode 100644 index 0000000..2e631fc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr109011-3.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/109011 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 -fdump-tree-optimized" } */ +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } } } */ +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target powerpc_p8vector_ok } } */ + +void +foo (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ctz (q[i]); +} + +void +bar (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = q[i] ? __builtin_ctz (q[i]) : __SIZEOF_INT__ * __CHAR_BIT__; +} + +void +baz (int *p, int *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ffs (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } } } } */ +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target powerpc_p8vector_ok } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-4.c b/gcc/testsuite/gcc.dg/vect/pr109011-4.c new file mode 100644 index 0000000..ce1ee02 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr109011-4.c @@ -0,0 +1,35 @@ +/* PR tree-optimization/109011 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx512cd -mbmi -mlzcnt -mno-avx512vpopcntdq" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } */ +/* { dg-additional-options "-mpower9-vector" { target powerpc_p9vector_ok } } */ +/* { dg-additional-options "-march=z13 -mzarch" { target s390_vx } } */ + +void +foo (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ctzll (q[i]); +} + +void +bar (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * __CHAR_BIT__; +} + +void +baz (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ffsll (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512cd } && lzcnt } && bmi } } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 4 "optimized" { target powerpc_p9vector_ok } } } */ +/* { dg-final { scan-tree-dump-times " = \.CTZ \\\(" 2 "optimized" { target s390_vx } } } */ +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 1 "optimized" { target s390_vx } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/pr109011-5.c b/gcc/testsuite/gcc.dg/vect/pr109011-5.c new file mode 100644 index 0000000..51168ef --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr109011-5.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/109011 */ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-unroll-loops --param=vect-epilogues-nomask=0 -fdump-tree-optimized" } */ +/* { dg-additional-options "-mno-avx512cd -mbmi -mlzcnt -mavx512vpopcntdq" { target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } } } */ +/* { dg-additional-options "-mpower8-vector -mno-power9-vector" { target powerpc_p8vector_ok } } */ + +void +foo (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ctzll (q[i]); +} + +void +bar (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = q[i] ? __builtin_ctzll (q[i]) : __SIZEOF_LONG_LONG__ * __CHAR_BIT__; +} + +void +baz (long long *p, long long *q) +{ +#pragma omp simd + for (int i = 0; i < 2048; ++i) + p[i] = __builtin_ffsll (q[i]); +} + +/* { dg-final { scan-tree-dump-times " = \.POPCOUNT \\\(" 3 "optimized" { target { { { { i?86-*-* x86_64-*-* } && avx512vpopcntdq } && lzcnt } && bmi } } } } */ +/* { dg-final { scan-tree-dump-times " = \.CLZ \\\(" 3 "optimized" { target powerpc_p8vector_ok } } } */ diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index 633998e..d1b86e8 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1501,6 +1501,266 @@ vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, "vect_recog_widen_minus_pattern"); } +/* Function vect_recog_ctz_ffs_pattern + + Try to find the following pattern: + + TYPE1 A; + TYPE1 B; + + B = __builtin_ctz{,l,ll} (A); + + or + + B = __builtin_ffs{,l,ll} (A); + + Input: + + * STMT_VINFO: The stmt from which the pattern search begins. + here it starts with B = __builtin_* (A); + + Output: + + * TYPE_OUT: The vector type of the output of this pattern. + + * Return value: A new stmt that will be used to replace the sequence of + stmts that constitute the pattern, using clz or popcount builtins. */ + +static gimple * +vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, + tree *type_out) +{ + gimple *call_stmt = stmt_vinfo->stmt; + gimple *pattern_stmt; + tree rhs_oprnd, rhs_type, lhs_oprnd, lhs_type, vec_type, vec_rhs_type; + tree new_var; + internal_fn ifn = IFN_LAST, ifnnew = IFN_LAST; + bool defined_at_zero = true, defined_at_zero_new = false; + int val = 0, val_new = 0; + int prec; + int sub = 0, add = 0; + location_t loc; + + if (!is_gimple_call (call_stmt)) + return NULL; + + if (gimple_call_num_args (call_stmt) != 1) + return NULL; + + rhs_oprnd = gimple_call_arg (call_stmt, 0); + rhs_type = TREE_TYPE (rhs_oprnd); + lhs_oprnd = gimple_call_lhs (call_stmt); + if (!lhs_oprnd) + return NULL; + lhs_type = TREE_TYPE (lhs_oprnd); + if (!INTEGRAL_TYPE_P (lhs_type) + || !INTEGRAL_TYPE_P (rhs_type) + || !type_has_mode_precision_p (rhs_type) + || TREE_CODE (rhs_oprnd) != SSA_NAME) + return NULL; + + switch (gimple_call_combined_fn (call_stmt)) + { + CASE_CFN_CTZ: + ifn = IFN_CTZ; + if (!gimple_call_internal_p (call_stmt) + || CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), + val) != 2) + defined_at_zero = false; + break; + CASE_CFN_FFS: + ifn = IFN_FFS; + break; + default: + return NULL; + } + + prec = TYPE_PRECISION (rhs_type); + loc = gimple_location (call_stmt); + + vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); + if (!vec_type) + return NULL; + + vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type); + if (!vec_rhs_type) + return NULL; + + /* Do it only if the backend doesn't have ctz2 or + ffs2 pattern but does have clz2 or + popcount2. */ + if (!vec_type + || direct_internal_fn_supported_p (ifn, vec_rhs_type, + OPTIMIZE_FOR_SPEED)) + return NULL; + + if (ifn == IFN_FFS + && direct_internal_fn_supported_p (IFN_CTZ, vec_rhs_type, + OPTIMIZE_FOR_SPEED)) + { + ifnnew = IFN_CTZ; + defined_at_zero_new + = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), + val_new) == 2; + } + else if (direct_internal_fn_supported_p (IFN_CLZ, vec_rhs_type, + OPTIMIZE_FOR_SPEED)) + { + ifnnew = IFN_CLZ; + defined_at_zero_new + = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type), + val_new) == 2; + } + if ((ifnnew == IFN_LAST + || (defined_at_zero && !defined_at_zero_new)) + && direct_internal_fn_supported_p (IFN_POPCOUNT, vec_rhs_type, + OPTIMIZE_FOR_SPEED)) + { + ifnnew = IFN_POPCOUNT; + defined_at_zero_new = true; + val_new = prec; + } + if (ifnnew == IFN_LAST) + return NULL; + + vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt); + + if ((ifnnew == IFN_CLZ + && defined_at_zero + && defined_at_zero_new + && val == prec + && val_new == prec) + || (ifnnew == IFN_POPCOUNT && ifn == IFN_CLZ)) + { + /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X) + .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */ + if (ifnnew == IFN_CLZ) + sub = prec; + val_new = prec; + + if (!TYPE_UNSIGNED (rhs_type)) + { + rhs_type = unsigned_type_for (rhs_type); + vec_rhs_type = get_vectype_for_scalar_type (vinfo, rhs_type); + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (new_var, NOP_EXPR, rhs_oprnd); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, + vec_rhs_type); + rhs_oprnd = new_var; + } + + tree m1 = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (m1, PLUS_EXPR, rhs_oprnd, + build_int_cst (rhs_type, -1)); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (new_var, BIT_NOT_EXPR, rhs_oprnd); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + rhs_oprnd = new_var; + + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR, + m1, rhs_oprnd); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + rhs_oprnd = new_var; + } + else if (ifnnew == IFN_CLZ) + { + /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X) + .FFS (X) = PREC - .CLZ (X & -X). */ + sub = prec - (ifn == IFN_CTZ); + val_new = sub - val_new; + + tree neg = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (new_var, BIT_AND_EXPR, + rhs_oprnd, neg); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + rhs_oprnd = new_var; + } + else if (ifnnew == IFN_POPCOUNT) + { + /* .CTZ (X) = PREC - .POPCOUNT (X | -X) + .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */ + sub = prec + (ifn == IFN_FFS); + val_new = sub; + + tree neg = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (neg, NEGATE_EXPR, rhs_oprnd); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + + new_var = vect_recog_temp_ssa_var (rhs_type, NULL); + pattern_stmt = gimple_build_assign (new_var, BIT_IOR_EXPR, + rhs_oprnd, neg); + gimple_set_location (pattern_stmt, loc); + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_rhs_type); + rhs_oprnd = new_var; + } + else if (ifnnew == IFN_CTZ) + { + /* .FFS (X) = .CTZ (X) + 1. */ + add = 1; + val_new++; + } + + /* Create B = .IFNNEW (A). */ + new_var = vect_recog_temp_ssa_var (lhs_type, NULL); + pattern_stmt = gimple_build_call_internal (ifnnew, 1, rhs_oprnd); + gimple_call_set_lhs (pattern_stmt, new_var); + gimple_set_location (pattern_stmt, loc); + *type_out = vec_type; + + if (sub) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); + pattern_stmt = gimple_build_assign (ret_var, MINUS_EXPR, + build_int_cst (lhs_type, sub), + new_var); + gimple_set_location (pattern_stmt, loc); + new_var = ret_var; + } + else if (add) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); + pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var, + build_int_cst (lhs_type, add)); + gimple_set_location (pattern_stmt, loc); + new_var = ret_var; + } + + if (defined_at_zero + && (!defined_at_zero_new || val != val_new)) + { + append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); + tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); + rhs_oprnd = gimple_call_arg (call_stmt, 0); + rhs_type = TREE_TYPE (rhs_oprnd); + tree cmp = build2_loc (loc, NE_EXPR, boolean_type_node, + rhs_oprnd, build_zero_cst (rhs_type)); + pattern_stmt = gimple_build_assign (ret_var, COND_EXPR, cmp, + new_var, + build_int_cst (lhs_type, val)); + } + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "created pattern stmt: %G", pattern_stmt); + + return pattern_stmt; +} + /* Function vect_recog_popcount_clz_ctz_ffs_pattern Try to find the following pattern: @@ -1680,15 +1940,42 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo, gcc_unreachable (); } - vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern", - call_stmt); vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); /* Do it only if the backend has popcount2 etc. pattern. */ - if (!vec_type - || !direct_internal_fn_supported_p (ifn, vec_type, - OPTIMIZE_FOR_SPEED)) + if (!vec_type) return NULL; + bool supported + = direct_internal_fn_supported_p (ifn, vec_type, OPTIMIZE_FOR_SPEED); + if (!supported) + switch (ifn) + { + case IFN_POPCOUNT: + case IFN_CLZ: + return NULL; + case IFN_FFS: + /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */ + if (direct_internal_fn_supported_p (IFN_CTZ, vec_type, + OPTIMIZE_FOR_SPEED)) + break; + /* FALLTHRU */ + case IFN_CTZ: + /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using + clz or popcount. */ + if (direct_internal_fn_supported_p (IFN_CLZ, vec_type, + OPTIMIZE_FOR_SPEED)) + break; + if (direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type, + OPTIMIZE_FOR_SPEED)) + break; + return NULL; + default: + gcc_unreachable (); + } + + vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern", + call_stmt); + /* Create B = .POPCOUNT (A). */ new_var = vect_recog_temp_ssa_var (lhs_type, NULL); pattern_stmt = gimple_build_call_internal (ifn, 1, unprom_diff.op); @@ -1702,11 +1989,26 @@ vect_recog_popcount_clz_ctz_ffs_pattern (vec_info *vinfo, if (addend) { + gcc_assert (supported); append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, vec_type); tree ret_var = vect_recog_temp_ssa_var (lhs_type, NULL); pattern_stmt = gimple_build_assign (ret_var, PLUS_EXPR, new_var, build_int_cst (lhs_type, addend)); } + else if (!supported) + { + stmt_vec_info new_stmt_info = vinfo->add_stmt (pattern_stmt); + STMT_VINFO_VECTYPE (new_stmt_info) = vec_type; + pattern_stmt + = vect_recog_ctz_ffs_pattern (vinfo, new_stmt_info, type_out); + if (pattern_stmt == NULL) + return NULL; + if (gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info)) + { + gimple_seq *pseq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo); + gimple_seq_add_seq_without_update (pseq, seq); + } + } return pattern_stmt; } @@ -6150,6 +6452,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { { vect_recog_widen_sum_pattern, "widen_sum" }, { vect_recog_pow_pattern, "pow" }, { vect_recog_popcount_clz_ctz_ffs_pattern, "popcount_clz_ctz_ffs" }, + { vect_recog_ctz_ffs_pattern, "ctz_ffs" }, { vect_recog_widen_shift_pattern, "widen_shift" }, { vect_recog_rotate_pattern, "rotate" }, { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, -- cgit v1.1 From 1edcb2ea0eb29f1a85cd9ba7bb933c4a260cba44 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Apr 2023 13:02:52 +0200 Subject: tree: Add 3+ argument fndecl_built_in_p On Wed, Feb 22, 2023 at 09:52:06AM +0000, Richard Biener wrote: > > The following testcase ICEs because we still have some spots that > > treat BUILT_IN_UNREACHABLE specially but not BUILT_IN_UNREACHABLE_TRAP > > the same. This patch uses (fndecl_built_in_p (node, BUILT_IN_UNREACHABLE) || fndecl_built_in_p (node, BUILT_IN_UNREACHABLE_TRAP)) a lot and from grepping around, we do something like that in lots of other places, or in some spots instead as (fndecl_built_in_p (node, BUILT_IN_NORMAL) && (DECL_FUNCTION_CODE (node) == BUILT_IN_WHATEVER1 || DECL_FUNCTION_CODE (node) == BUILT_IN_WHATEVER2)) The following patch adds an overload for this case, so we can write it in a shorter way, using C++11 argument packs so that it supports as many codes as one needs. 2023-04-20 Jakub Jelinek Jonathan Wakely * tree.h (built_in_function_equal_p): New helper function. (fndecl_built_in_p): Turn into variadic template to support 1 or more built_in_function arguments. * builtins.cc (fold_builtin_expect): Use 3 argument fndecl_built_in_p. * gimplify.cc (goa_stabilize_expr): Likewise. * cgraphclones.cc (cgraph_node::create_clone): Likewise. * ipa-fnsummary.cc (compute_fn_summary): Likewise. * omp-low.cc (setjmp_or_longjmp_p): Likewise. * cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee, cgraph_update_edges_for_call_stmt_node, cgraph_edge::verify_corresponds_to_fndecl, cgraph_node::verify_node): Likewise. * tree-stdarg.cc (optimize_va_list_gpr_fpr_size): Likewise. * gimple-ssa-warn-access.cc (matching_alloc_calls_p): Likewise. * ipa-prop.cc (try_make_edge_direct_virtual_call): Likewise. --- gcc/builtins.cc | 4 ++-- gcc/cgraph.cc | 19 ++++++++----------- gcc/cgraphclones.cc | 5 ++--- gcc/gimple-ssa-warn-access.cc | 7 +++---- gcc/gimplify.cc | 4 ++-- gcc/ipa-fnsummary.cc | 4 ++-- gcc/ipa-prop.cc | 4 ++-- gcc/omp-low.cc | 3 +-- gcc/tree-stdarg.cc | 4 ++-- gcc/tree.h | 27 ++++++++++++++++++++++++--- 10 files changed, 48 insertions(+), 33 deletions(-) (limited to 'gcc') diff --git a/gcc/builtins.cc b/gcc/builtins.cc index 80b8b89..0e06fa5 100644 --- a/gcc/builtins.cc +++ b/gcc/builtins.cc @@ -8645,8 +8645,8 @@ fold_builtin_expect (location_t loc, tree arg0, tree arg1, tree arg2, if (TREE_CODE (inner) == CALL_EXPR && (fndecl = get_callee_fndecl (inner)) - && (fndecl_built_in_p (fndecl, BUILT_IN_EXPECT) - || fndecl_built_in_p (fndecl, BUILT_IN_EXPECT_WITH_PROBABILITY))) + && fndecl_built_in_p (fndecl, BUILT_IN_EXPECT, + BUILT_IN_EXPECT_WITH_PROBABILITY)) return arg0; inner = inner_arg0; diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc index ec663d2..e8f9bec 100644 --- a/gcc/cgraph.cc +++ b/gcc/cgraph.cc @@ -1548,8 +1548,8 @@ cgraph_edge::redirect_call_stmt_to_callee (cgraph_edge *e) else { if (flag_checking - && !fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE) - && !fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE_TRAP)) + && !fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP)) ipa_verify_edge_has_no_modifications (e); new_stmt = e->call_stmt; gimple_call_set_fndecl (new_stmt, e->callee->decl); @@ -1635,9 +1635,8 @@ cgraph_update_edges_for_call_stmt_node (cgraph_node *node, { /* Keep calls marked as dead dead. */ if (new_stmt && is_gimple_call (new_stmt) && e->callee - && (fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE) - || fndecl_built_in_p (e->callee->decl, - BUILT_IN_UNREACHABLE_TRAP))) + && fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP)) { cgraph_edge::set_call_stmt (node->get_edge (old_stmt), as_a (new_stmt)); @@ -3259,9 +3258,8 @@ cgraph_edge::verify_corresponds_to_fndecl (tree decl) /* Optimizers can redirect unreachable calls or calls triggering undefined behavior to __builtin_unreachable or __builtin_unreachable trap. */ - if (fndecl_built_in_p (callee->decl, BUILT_IN_NORMAL) - && (DECL_FUNCTION_CODE (callee->decl) == BUILT_IN_UNREACHABLE - || DECL_FUNCTION_CODE (callee->decl) == BUILT_IN_UNREACHABLE_TRAP)) + if (fndecl_built_in_p (callee->decl, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP)) return false; if (callee->former_clone_of != node->decl @@ -3601,9 +3599,8 @@ cgraph_node::verify_node (void) /* Optimized out calls are redirected to __builtin_unreachable. */ && (e->count.nonzero_p () || ! e->callee->decl - || !(fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE) - || fndecl_built_in_p (e->callee->decl, - BUILT_IN_UNREACHABLE_TRAP))) + || !fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP)) && count == ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (decl))->count && (!e->count.ipa_p () diff --git a/gcc/cgraphclones.cc b/gcc/cgraphclones.cc index 7c5d3b2..29d28ef 100644 --- a/gcc/cgraphclones.cc +++ b/gcc/cgraphclones.cc @@ -435,9 +435,8 @@ cgraph_node::create_clone (tree new_decl, profile_count prof_count, version. The only exception is when the edge was proved to be unreachable during the cloning procedure. */ if (!e->callee - || !(fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE) - || fndecl_built_in_p (e->callee->decl, - BUILT_IN_UNREACHABLE_TRAP))) + || !fndecl_built_in_p (e->callee->decl, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP)) e->redirect_callee_duplicating_thunks (new_node); } new_node->expand_all_artificial_thunks (); diff --git a/gcc/gimple-ssa-warn-access.cc b/gcc/gimple-ssa-warn-access.cc index 48e85e9..33bc4a8 100644 --- a/gcc/gimple-ssa-warn-access.cc +++ b/gcc/gimple-ssa-warn-access.cc @@ -1787,8 +1787,7 @@ matching_alloc_calls_p (tree alloc_decl, tree dealloc_decl) /* Return false for deallocation functions that are known not to match. */ - if (fndecl_built_in_p (dealloc_decl, BUILT_IN_FREE) - || fndecl_built_in_p (dealloc_decl, BUILT_IN_REALLOC)) + if (fndecl_built_in_p (dealloc_decl, BUILT_IN_FREE, BUILT_IN_REALLOC)) return false; /* Otherwise proceed below to check the deallocation function's "*dealloc" attributes to look for one that mentions this operator @@ -1812,8 +1811,8 @@ matching_alloc_calls_p (tree alloc_decl, tree dealloc_decl) if (DECL_IS_OPERATOR_DELETE_P (dealloc_decl)) return false; - if (fndecl_built_in_p (dealloc_decl, BUILT_IN_FREE) - || fndecl_built_in_p (dealloc_decl, BUILT_IN_REALLOC)) + if (fndecl_built_in_p (dealloc_decl, BUILT_IN_FREE, + BUILT_IN_REALLOC)) return true; alloc_dealloc_kind = alloc_kind_t::builtin; diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index ade6e33..5a83405 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -15830,8 +15830,8 @@ goa_stabilize_expr (tree *expr_p, gimple_seq *pre_p, tree lhs_addr, if (TREE_CODE (expr) == CALL_EXPR) { if (tree fndecl = get_callee_fndecl (expr)) - if (fndecl_built_in_p (fndecl, BUILT_IN_CLEAR_PADDING) - || fndecl_built_in_p (fndecl, BUILT_IN_MEMCMP)) + if (fndecl_built_in_p (fndecl, BUILT_IN_CLEAR_PADDING, + BUILT_IN_MEMCMP)) { int nargs = call_expr_nargs (expr); for (int i = 0; i < nargs; i++) diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index 63bd525..8217039 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -3180,8 +3180,8 @@ compute_fn_summary (struct cgraph_node *node, bool early) for (e = node->callees; e; e = e->next_callee) { tree cdecl = e->callee->decl; - if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS) - || fndecl_built_in_p (cdecl, BUILT_IN_VA_START)) + if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS, + BUILT_IN_VA_START)) break; } node->can_change_signature = !e; diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc index 0d81674..c0143e9 100644 --- a/gcc/ipa-prop.cc +++ b/gcc/ipa-prop.cc @@ -3865,8 +3865,8 @@ try_make_edge_direct_virtual_call (struct cgraph_edge *ie, if (can_refer) { if (!t - || fndecl_built_in_p (t, BUILT_IN_UNREACHABLE) - || fndecl_built_in_p (t, BUILT_IN_UNREACHABLE_TRAP) + || fndecl_built_in_p (t, BUILT_IN_UNREACHABLE, + BUILT_IN_UNREACHABLE_TRAP) || !possible_polymorphic_call_target_p (ie, cgraph_node::get (t))) { diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index 1818132..dddf5b5 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -3992,8 +3992,7 @@ scan_omp_1_op (tree *tp, int *walk_subtrees, void *data) static bool setjmp_or_longjmp_p (const_tree fndecl) { - if (fndecl_built_in_p (fndecl, BUILT_IN_SETJMP) - || fndecl_built_in_p (fndecl, BUILT_IN_LONGJMP)) + if (fndecl_built_in_p (fndecl, BUILT_IN_SETJMP, BUILT_IN_LONGJMP)) return true; tree declname = DECL_NAME (fndecl); diff --git a/gcc/tree-stdarg.cc b/gcc/tree-stdarg.cc index 37c3981..f522181 100644 --- a/gcc/tree-stdarg.cc +++ b/gcc/tree-stdarg.cc @@ -867,8 +867,8 @@ optimize_va_list_gpr_fpr_size (function *fun) tree callee = gimple_call_fndecl (stmt); if (callee - && (fndecl_built_in_p (callee, BUILT_IN_VA_START) - || fndecl_built_in_p (callee, BUILT_IN_VA_END))) + && fndecl_built_in_p (callee, BUILT_IN_VA_START, + BUILT_IN_VA_END)) continue; } diff --git a/gcc/tree.h b/gcc/tree.h index abcdb56..8e67e70 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -6585,6 +6585,24 @@ type_has_mode_precision_p (const_tree t) return known_eq (TYPE_PRECISION (t), GET_MODE_PRECISION (TYPE_MODE (t))); } +/* Helper functions for fndecl_built_in_p. */ + +inline bool +built_in_function_equal_p (built_in_function name0, built_in_function name1) +{ + return name0 == name1; +} + +/* Recursive case for two or more names. */ + +template +inline bool +built_in_function_equal_p (built_in_function name0, built_in_function name1, + built_in_function name2, F... names) +{ + return name0 == name1 || built_in_function_equal_p (name0, name2, names...); +} + /* Return true if a FUNCTION_DECL NODE is a GCC built-in function. Note that it is different from the DECL_IS_UNDECLARED_BUILTIN @@ -6616,13 +6634,16 @@ fndecl_built_in_p (const_tree node, unsigned int name, built_in_class klass) } /* Return true if a FUNCTION_DECL NODE is a GCC built-in function - of BUILT_IN_NORMAL class with name equal to NAME. */ + of BUILT_IN_NORMAL class with name equal to NAME1 (or other mentioned + NAMES). */ +template inline bool -fndecl_built_in_p (const_tree node, built_in_function name) +fndecl_built_in_p (const_tree node, built_in_function name1, F... names) { return (fndecl_built_in_p (node, BUILT_IN_NORMAL) - && DECL_FUNCTION_CODE (node) == name); + && built_in_function_equal_p (DECL_FUNCTION_CODE (node), + name1, names...)); } /* A struct for encapsulating location information about an operator -- cgit v1.1 From 09751f52bfa6757405c85faede627129fdd0884f Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Tue, 18 Apr 2023 12:03:43 +0100 Subject: amdgcn: update target-supports.exp The backend can now vectorize more things. gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_call_copysignf): Add amdgcn. (check_effective_target_vect_call_sqrtf): Add amdgcn. (check_effective_target_vect_call_ceilf): Add amdgcn. (check_effective_target_vect_call_floor): Add amdgcn. (check_effective_target_vect_logical_reduc): Add amdgcn. --- gcc/testsuite/lib/target-supports.exp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index ad68af5..868e2c4 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8555,7 +8555,8 @@ proc check_effective_target_vect_call_copysignf { } { return [check_cached_effective_target_indexed vect_call_copysignf { expr { [istarget i?86-*-*] || [istarget x86_64-*-*] || [istarget powerpc*-*-*] - || [istarget aarch64*-*-*] }}] + || [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] }}] } # Return 1 if the target supports hardware square root instructions. @@ -8591,7 +8592,8 @@ proc check_effective_target_vect_call_sqrtf { } { || [istarget i?86-*-*] || [istarget x86_64-*-*] || ([istarget powerpc*-*-*] && [check_vsx_hw_available]) || ([istarget s390*-*-*] - && [check_effective_target_s390_vx]) }}] + && [check_effective_target_s390_vx]) + || [istarget amdgcn-*-*] }}] } # Return 1 if the target supports vector lrint calls. @@ -8636,14 +8638,16 @@ proc check_effective_target_vect_call_ceil { } { proc check_effective_target_vect_call_ceilf { } { return [check_cached_effective_target_indexed vect_call_ceilf { - expr { [istarget aarch64*-*-*] }}] + expr { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] }}] } # Return 1 if the target supports vector floor calls. proc check_effective_target_vect_call_floor { } { return [check_cached_effective_target_indexed vect_call_floor { - expr { [istarget aarch64*-*-*] }}] + expr { [istarget aarch64*-*-*] + || [istarget amdgcn-*-*] }}] } # Return 1 if the target supports vector floorf calls. @@ -8699,7 +8703,8 @@ proc check_effective_target_vect_call_roundf { } { # Return 1 if the target supports AND, OR and XOR reduction. proc check_effective_target_vect_logical_reduc { } { - return [check_effective_target_aarch64_sve] + return [expr { [check_effective_target_aarch64_sve] + || [istarget amdgcn-*-*] }] } # Return 1 if the target supports the fold_extract_last optab. -- cgit v1.1 From 0be4fbeaa6a7a2db466a6fd2efad2afdb642bac0 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 20 Apr 2023 11:11:13 +0100 Subject: amdgcn: bug fix ldexp insn The vop3 instructions don't support B constraint immediates. Also, take the use the SV_FP iterator to delete a redundant pattern. gcc/ChangeLog: * config/gcn/gcn-valu.md (vnsi, VnSI): Add scalar modes. (ldexp3): Delete. (ldexp3): Change "B" to "A". --- gcc/config/gcn/gcn-valu.md | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 4a40a9d..44c4846 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -208,7 +208,9 @@ (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) (define_mode_attr vnsi - [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") + [(QI "si") (HI "si") (SI "si") + (HF "si") (SF "si") (DI "si") (DF "si") + (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si") (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") @@ -222,7 +224,9 @@ (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) (define_mode_attr VnSI - [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") + [(QI "SI") (HI "SI") (SI "SI") + (HF "SI") (SF "SI") (DI "SI") (DF "SI") + (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI") (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") @@ -3043,21 +3047,10 @@ ; Implement ldexp pattern -(define_insn "ldexp3" - [(set (match_operand:FP 0 "register_operand" "=v") - (unspec:FP - [(match_operand:FP 1 "gcn_alu_operand" "vB") - (match_operand:SI 2 "gcn_alu_operand" "vSvA")] - UNSPEC_LDEXP))] - "" - "v_ldexp%i0\t%0, %1, %2" - [(set_attr "type" "vop3a") - (set_attr "length" "8")]) - (define_insn "ldexp3" - [(set (match_operand:V_FP 0 "register_operand" "= v") - (unspec:V_FP - [(match_operand:V_FP 1 "gcn_alu_operand" " vB") + [(set (match_operand:SV_FP 0 "register_operand" "= v") + (unspec:SV_FP + [(match_operand:SV_FP 1 "gcn_alu_operand" " vA") (match_operand: 2 "gcn_alu_operand" "vSvA")] UNSPEC_LDEXP))] "" -- cgit v1.1 From 98ebdda3fd81c2c87ef0e73de9c94135fb49210f Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Thu, 20 Apr 2023 20:03:24 +0800 Subject: RISC-V: Fix simplify_ior_optimization.c on rv32 GCC will complaint if target ABI isn't have corresponding multi-lib on glibc toolchain, use stdint-gcc.h to suppress that. gcc/testsuite/ChangeLog: * gcc.target/riscv/simplify_ior_optimization.c: Use stdint-gcc.h rather than stdint.h --- gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c index ec3bd0b..b94e1ee 100644 --- a/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c +++ b/gcc/testsuite/gcc.target/riscv/simplify_ior_optimization.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-march=rv64gc -mabi=lp64 -O2" } */ -#include +#include "stdint-gcc.h" uint8_t test_simplify_ior_scalar_case_0 (uint8_t a) { -- cgit v1.1 From a2d12abedc89a9439fd6aadc38730fdadca0684f Mon Sep 17 00:00:00 2001 From: Ju-Zhe Zhong Date: Wed, 19 Apr 2023 18:41:51 +0800 Subject: RISC-V: Fix wrong check of register occurrences [PR109535] count_occurrences will conly count same RTX (same code and same mode), but what we want to track is the occurrence of a register, a register might appeared in the insn with different mode or contain in SUBREG. Testcase coming from Kito. gcc/ChangeLog: PR target/109535 * config/riscv/riscv-vsetvl.cc (count_regno_occurrences): New function. (pass_vsetvl::cleanup_insns): Fix bug. gcc/testsuite/ChangeLog: PR target/109535 * g++.target/riscv/rvv/base/pr109535.C: New test. * gcc.target/riscv/rvv/base/pr109535.c: New test. Signed-off-by: Ju-Zhe Zhong Co-authored-by: kito-cheng --- gcc/config/riscv/riscv-vsetvl.cc | 14 +- gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C | 144 +++++++++++++++++++++ gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c | 11 ++ 3 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 1b66e3b..9c356ce 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1592,6 +1592,18 @@ backward_propagate_worthwhile_p (const basic_block cfg_bb, return true; } +/* Count the number of REGNO in RINSN. */ +static int +count_regno_occurrences (rtx_insn *rinsn, unsigned int regno) +{ + int count = 0; + extract_insn (rinsn); + for (int i = 0; i < recog_data.n_operands; i++) + if (refers_to_regno_p (regno, recog_data.operand[i])) + count++; + return count; +} + avl_info::avl_info (const avl_info &other) { m_value = other.get_value (); @@ -3924,7 +3936,7 @@ pass_vsetvl::cleanup_insns (void) const if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) continue; rtx avl = get_vl (rinsn); - if (count_occurrences (PATTERN (rinsn), avl, 0) == 1) + if (count_regno_occurrences (rinsn, REGNO (avl)) == 1) { /* Get the list of uses for the new instruction. */ auto attempt = crtl->ssa->new_change_attempt (); diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C new file mode 100644 index 0000000..7013cfc --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/rvv/base/pr109535.C @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +typedef long size_t; +typedef signed char int8_t; +typedef char uint8_t + +; +template < typename > struct Relations{ using Unsigned = uint8_t; }; +template < typename T > using MakeUnsigned = typename Relations< T >::Unsigned; +#pragma riscv intrinsic "vector" +size_t ScaleByPower() { return 0;} +template < typename Lane, size_t , int > struct Simd { +using T = Lane; + +template < typename NewT > using Rebind = Simd< NewT, 1, 0 >; +}; +template < typename T > struct ClampNAndPow2 { +using type = Simd< T, 65536, 0 > +; +}; +struct CappedTagChecker { +using type = ClampNAndPow2< signed char >::type; +}; +template < typename , size_t , int > +using CappedTag = CappedTagChecker::type; +template < class D > using TFromD = typename D::T; +template < class T, class D > using Rebind = typename D::Rebind< T >; +template < class D > +using RebindToUnsigned = Rebind< MakeUnsigned< D >, D >; +template < size_t N > +size_t +Lanes(Simd< uint8_t, N, 0 > ) { +size_t kFull = 0; +size_t kCap ; +size_t actual = + __riscv_vsetvl_e8m1(kCap); +return actual; +} +template < size_t N > +size_t +Lanes(Simd< int8_t, N, 0 > ) { +size_t kFull ; +size_t kCap ; +size_t actual = + __riscv_vsetvl_e8m1(kCap); +return actual; +} +template < size_t N > +vuint8m1_t +Set(Simd< uint8_t, N, 0 > d, uint8_t arg) { +size_t __trans_tmp_1 = Lanes(d); +return __riscv_vmv_v_x_u8m1(arg, __trans_tmp_1); +} +template < size_t N > +vint8m1_t Set(Simd< int8_t, N, 0 > , int8_t ); +template < class D > using VFromD = decltype(Set(D(), TFromD< D >())); +template < class D > +VFromD< D > +Zero(D ) +; + +template < size_t N > +vint8m1_t +BitCastFromByte(Simd< int8_t, N, 0 >, vuint8m1_t v) { +return __riscv_vreinterpret_v_u8m1_i8m1(v); +} +template < class D, class FromV > +VFromD< D > +BitCast(D d, FromV v) { +return BitCastFromByte(d, v) + +; +} +template < size_t N > +void +Store(vint8m1_t v, Simd< int8_t, N, 0 > d) { +int8_t *p ; +__riscv_vse8_v_i8m1(p, v, Lanes(d)); +} +template < class V, class D > +void +StoreU(V v, D d) { +Store(v, d) +; +} +template < class D > using Vec = decltype(Zero(D())); +size_t Generate_count; +template < class D, class Func> +void Generate(D d, Func func) { +RebindToUnsigned< D > du +; +size_t N = Lanes(d); +Vec< decltype(du) > vidx ; +for (; ; ) { + StoreU(func(d, vidx), d); + vidx = (Set(du, N)); +} +} +template < typename T, int , int kMinArg, class Test, int kPow2 > +struct ForeachCappedR { +static void Do(size_t , size_t ) { + CappedTag< T, kMinArg, kPow2 > d; + Test()(T(), d); +} +}; +template < class > struct ForeachCountAndMisalign; +struct TestGenerate; +template < int kPow2 = 1 > class ForExtendableVectors { +public: + +template < typename T > void operator()(T) { + size_t max_lanes ; + ForeachCappedR< T, 0, size_t{} , + ForeachCountAndMisalign< int >, kPow2 >::Do(1, max_lanes); +} +}; +class ForPartialVectors { +public: +template < typename T > void operator()(T t) { + ForExtendableVectors()(t); +} +}; +void ForSignedTypes(ForPartialVectors func) { func(int8_t()); } +template < class > struct ForeachCountAndMisalign { +template < typename T, class D > +void operator()(T, D d) { + int rng + ; + size_t misalignments[1] ; + for ( size_t ma : misalignments) + for (size_t mb : misalignments) + TestGenerate()(d, 0, ma, mb, rng); +} +}; +struct TestGenerate { +template < class D > +void operator()(D d, size_t , size_t , size_t, int ) { + auto gen2 = [](auto d, auto vidx) { + return BitCast(d, (vidx)); + }; + Generate(d, gen2); +} +}; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c new file mode 100644 index 0000000..7582fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr109535.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv32gcv -mabi=ilp32d" } */ + +#include "riscv_vector.h" + +void foo(void *in1, void *in2, void *in3, void *out, size_t vl) { + vint8m1_t a = __riscv_vle8_v_i8m1(in1, vl); + vint8m1_t b = __riscv_vadd_vx_i8m1 (a, vl, vl); + __riscv_vse8_v_i8m1(out, b, vl); +} + -- cgit v1.1 From 9fde76a3be8e1717d9d38492c40675e742611e45 Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Thu, 20 Apr 2023 21:15:37 +0800 Subject: RISC-V: Fix riscv/arch-19.c with different ISA spec version In newer ISA spec, F will implied zicsr, add that into -march option to prevent different test result on different default -misa-spec version. gcc/testsuite/ * gcc.target/riscv/arch-19.c: Add -misa-spec. --- gcc/testsuite/gcc.target/riscv/arch-19.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.target/riscv/arch-19.c b/gcc/testsuite/gcc.target/riscv/arch-19.c index b042e1a..95204ed 100644 --- a/gcc/testsuite/gcc.target/riscv/arch-19.c +++ b/gcc/testsuite/gcc.target/riscv/arch-19.c @@ -1,4 +1,4 @@ /* { dg-do compile } */ -/* { dg-options "-march=rv64if_zfinx -mabi=lp64" } */ +/* { dg-options "-march=rv64if_zicsr_zfinx -mabi=lp64" } */ int foo() {} -/* { dg-error "'-march=rv64if_zfinx': z\\*inx conflicts with floating-point extensions" "" { target *-*-* } 0 } */ +/* { dg-error "'-march=rv64if_zicsr_zfinx': z\\*inx conflicts with floating-point extensions" "" { target *-*-* } 0 } */ -- cgit v1.1 From 7b206ae7f17455b69349767ec48b074db260a2a7 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Fri, 24 Mar 2023 14:57:25 +0800 Subject: RISC-V: Fix RVV register order This patch fixes the issue of incorrect reigster order of RVV. The new register order is coming from kito original RVV GCC implementation. Consider this case: void f (void *base,void *base2,void *out,size_t vl, int n) { vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl); for (int i = 0; i < n; i++){ vbool8_t m = __riscv_vlm_v_b8 (base + i, vl); vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl); vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl); vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl); vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl); __riscv_vse8_v_i8m1 (out + 100*i,v3,vl); __riscv_vse8_v_i8m1 (out + 222*i,v4,vl); } } Before this patch: f: csrr t0,vlenb slli t1,t0,3 sub sp,sp,t1 addi a5,a0,100 vsetvli zero,a3,e64,m8,ta,ma vle64.v v24,0(a5) vs8r.v v24,0(sp) ble a4,zero,.L1 mv a6,a0 add a4,a4,a0 mv a5,a2 .L3: vsetvli zero,zero,e64,m8,ta,ma vl8re64.v v24,0(sp) vlm.v v0,0(a6) vluxei64.v v24,(a0),v24,v0.t addi a6,a6,1 vsetvli zero,zero,e8,m1,tu,ma vmv8r.v v16,v24 vluxei64.v v8,(a0),v24,v0.t vle64.v v16,0(a1) vluxei64.v v24,(a0),v16,v0.t vse8.v v8,0(a2) vse8.v v24,0(a5) addi a1,a1,1 addi a2,a2,100 addi a5,a5,222 bne a4,a6,.L3 .L1: csrr t0,vlenb slli t1,t0,3 add sp,sp,t1 jr ra After this patch: f: addi a5,a0,100 vsetvli zero,a3,e64,m8,ta,ma vle64.v v24,0(a5) ble a4,zero,.L1 mv a6,a0 add a4,a4,a0 mv a5,a2 .L3: vsetvli zero,zero,e64,m8,ta,ma vlm.v v0,0(a6) addi a6,a6,1 vluxei64.v v8,(a0),v24,v0.t vsetvli zero,zero,e8,m1,tu,ma vmv8r.v v16,v8 vluxei64.v v2,(a0),v8,v0.t vle64.v v16,0(a1) vluxei64.v v1,(a0),v16,v0.t vse8.v v2,0(a2) vse8.v v1,0(a5) addi a1,a1,1 addi a2,a2,100 addi a5,a5,222 bne a4,a6,.L3 .L1: ret The redundant register spillings is eliminated. However, there is one more issue need to be addressed which is the redundant move instruction "vmv8r.v". This is another story, and it will be fixed by another patch (Fine tune RVV machine description RA constraint). gcc/ChangeLog: * config/riscv/riscv.h (enum reg_class): Fix RVV register order. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/spill-4.c: Adapt testcase. * gcc.target/riscv/rvv/base/spill-6.c: Adapt testcase. * gcc.target/riscv/rvv/base/reg_order-1.c: New test. Signed-off-by: Ju-Zhe Zhong Co-authored-by: kito-cheng --- gcc/config/riscv/riscv.h | 13 ++++----- .../gcc.target/riscv/rvv/base/reg_order-1.c | 20 ++++++++++++++ gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c | 32 +++++++++++----------- gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c | 16 +++++------ 4 files changed, 50 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 66fb07d..13038a3 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -553,13 +553,12 @@ enum reg_class 60, 61, 62, 63, \ /* Call-saved FPRs. */ \ 40, 41, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, \ - /* V24 ~ V31. */ \ - 120, 121, 122, 123, 124, 125, 126, 127, \ - /* V8 ~ V23. */ \ - 104, 105, 106, 107, 108, 109, 110, 111, \ - 112, 113, 114, 115, 116, 117, 118, 119, \ - /* V0 ~ V7. */ \ - 96, 97, 98, 99, 100, 101, 102, 103, \ + /* v1 ~ v31 vector registers. */ \ + 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, \ + 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, \ + 124, 125, 126, 127, \ + /* The vector mask register. */ \ + 96, \ /* None of the remaining classes have defined call-saved \ registers. */ \ 64, 65, 66, 67 \ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c new file mode 100644 index 0000000..b33f914 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/reg_order-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */ + +#include "riscv_vector.h" + +void f (void *base,void *base2,void *out,size_t vl, int n) +{ + vuint64m8_t bindex = __riscv_vle64_v_u64m8 (base + 100, vl); + for (int i = 0; i < n; i++){ + vbool8_t m = __riscv_vlm_v_b8 (base + i, vl); + vuint64m8_t v = __riscv_vluxei64_v_u64m8_m(m,base,bindex,vl); + vuint64m8_t v2 = __riscv_vle64_v_u64m8_tu (v, base2 + i, vl); + vint8m1_t v3 = __riscv_vluxei64_v_i8m1_m(m,base,v,vl); + vint8m1_t v4 = __riscv_vluxei64_v_i8m1_m(m,base,v2,vl); + __riscv_vse8_v_i8m1 (out + 100*i,v3,vl); + __riscv_vse8_v_i8m1 (out + 222*i,v4,vl); + } +} + +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c index 83c80b0..ad7592f 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-4.c @@ -10,7 +10,7 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re64.v\tv2,0\(sp\) ** vs1r.v\tv2,0\(a1\) @@ -34,7 +34,7 @@ spill_4 (int64_t *in, int64_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re64.v\tv4,0\(sp\) ** vs2r.v\tv4,0\(a1\) @@ -58,10 +58,10 @@ spill_5 (int64_t *in, int64_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... -** vl4re64.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vl4re64.v\tv[0-9]+,0\(sp\) +** vs4r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ @@ -82,10 +82,10 @@ spill_6 (int64_t *in, int64_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... -** vl8re64.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vl8re64.v\tv[0-9]+,0\(sp\) +** vs8r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ @@ -105,7 +105,7 @@ spill_7 (int64_t *in, int64_t *out) ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... ** vl1re64.v\tv2,0\(sp\) ** vs1r.v\tv2,0\(a1\) @@ -129,7 +129,7 @@ spill_11 (uint64_t *in, uint64_t *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re64.v\tv4,0\(sp\) ** vs2r.v\tv4,0\(a1\) @@ -153,10 +153,10 @@ spill_12 (uint64_t *in, uint64_t *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... -** vl4re64.v\tv8,0\(sp\) -** vs4r.v\tv8,0\(a1\) +** vl4re64.v\tv[0-9]+,0\(sp\) +** vs4r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ @@ -177,10 +177,10 @@ spill_13 (uint64_t *in, uint64_t *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... -** vl8re64.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vl8re64.v\tv[0-9]+,0\(sp\) +** vs8r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c index 340029d..07eee61 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/spill-6.c @@ -10,10 +10,10 @@ ** csrr\tt0,vlenb ** sub\tsp,sp,t0 ** ... -** vs1r.v\tv24,0\(sp\) +** vs1r.v\tv[0-9]+,0\(sp\) ** ... -** vl1re64.v\tv2,0\(sp\) -** vs1r.v\tv2,0\(a1\) +** vl1re64.v\tv[0-9]+,0\(sp\) +** vs1r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ @@ -34,7 +34,7 @@ spill_4 (double *in, double *out) ** slli\tt1,t0,1 ** sub\tsp,sp,t1 ** ... -** vs2r.v\tv24,0\(sp\) +** vs2r.v\tv[0-9]+,0\(sp\) ** ... ** vl2re64.v\tv4,0\(sp\) ** vs2r.v\tv4,0\(a1\) @@ -58,7 +58,7 @@ spill_5 (double *in, double *out) ** slli\tt1,t0,2 ** sub\tsp,sp,t1 ** ... -** vs4r.v\tv24,0\(sp\) +** vs4r.v\tv[0-9]+,0\(sp\) ** ... ** vl4re64.v\tv8,0\(sp\) ** vs4r.v\tv8,0\(a1\) @@ -82,10 +82,10 @@ spill_6 (double *in, double *out) ** slli\tt1,t0,3 ** sub\tsp,sp,t1 ** ... -** vs8r.v\tv24,0\(sp\) +** vs8r.v\tv[0-9]+,0\(sp\) ** ... -** vl8re64.v\tv16,0\(sp\) -** vs8r.v\tv16,0\(a1\) +** vl8re64.v\tv[0-9]+,0\(sp\) +** vs8r.v\tv[0-9]+,0\(a1\) ** ... ** jr\tra */ -- cgit v1.1 From 07e2576d6f344acab338deeb051845c90c1cf6a3 Mon Sep 17 00:00:00 2001 From: Raphael Zinsly Date: Thu, 20 Apr 2023 08:48:08 -0600 Subject: [PR target/108248] [RISC-V] Break down some bitmanip insn types This is primarily Raphael's work. All I did was adjust it to apply to the trunk and add the new types to generic.md's scheduling model. The basic idea here is to make sure we have the ability to schedule the bitmanip instructions with a finer degree of control. Some of the bitmanip instructions are likely to have differing scheduler characteristics across different implementations. So rather than assign these instructions a generic "bitmanip" type, this patch assigns them a type based on their RTL code by using the iterator for the type. Naturally we have to add a few new types. It affects clz, ctz, cpop, min, max. We didn't do this for things like shNadd, single bit manipulation, etc. We certainly could if the needs presents itself. I threw all the new types into the generic_alu bucket in the generic scheduling model. Seems as good a place as any. Someone who knows the sifive uarch should probably add these types (and bitmanip) to the sifive scheduling model. We also noticed that the recently added orc.b didn't have a type at all. So we added it as a generic bitmanip type. This has been bootstrapped in a gcc-12 base and I've built and run the testsuite without regressions on the trunk. Given it was primarily Raphael's work I could probably approve & commit it. But I'd like to give the other RISC-V folks a chance to chime in. PR target/108248 gcc/ * config/riscv/bitmanip.md (clz, ctz, pcnt, min, max patterns): Use as the type to allow for fine grained control of scheduling these insns. * config/riscv/generic.md (generic_alu): Add bitmanip, clz, ctz, pcnt, min, max. * config/riscv/riscv.md (type attribute): Add types for clz, ctz, pcnt, signed and unsigned min/max. --- gcc/config/riscv/bitmanip.md | 11 ++++++----- gcc/config/riscv/generic.md | 2 +- gcc/config/riscv/riscv.md | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'gcc') diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index 388ef66..44ad350 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -241,7 +241,7 @@ (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r")))] "TARGET_ZBB" "%~\t%0,%1" - [(set_attr "type" "bitmanip") + [(set_attr "type" "") (set_attr "mode" "SI")]) (define_insn "*disi2" @@ -250,7 +250,7 @@ (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r"))))] "TARGET_64BIT && TARGET_ZBB" "w\t%0,%1" - [(set_attr "type" "bitmanip") + [(set_attr "type" "") (set_attr "mode" "SI")]) (define_insn "*di2" @@ -258,7 +258,7 @@ (clz_ctz_pcnt:DI (match_operand:DI 1 "register_operand" "r")))] "TARGET_64BIT && TARGET_ZBB" "\t%0,%1" - [(set_attr "type" "bitmanip") + [(set_attr "type" "") (set_attr "mode" "DI")]) (define_insn "*zero_extendhi2_bitmanip" @@ -357,7 +357,8 @@ [(set (match_operand:X 0 "register_operand" "=r") (unspec:X [(match_operand:X 1 "register_operand" "r")] UNSPEC_ORC_B))] "TARGET_ZBB" - "orc.b\t%0,%1") + "orc.b\t%0,%1" + [(set_attr "type" "bitmanip")]) (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") @@ -406,7 +407,7 @@ (match_operand:X 2 "reg_or_0_operand" "rJ")))] "TARGET_ZBB" "\t%0,%1,%z2" - [(set_attr "type" "bitmanip")]) + [(set_attr "type" "")]) ;; Optimize the common case of a SImode min/max against a constant ;; that is safe both for sign- and zero-extension. diff --git a/gcc/config/riscv/generic.md b/gcc/config/riscv/generic.md index b98d0ae..db4fabb 100644 --- a/gcc/config/riscv/generic.md +++ b/gcc/config/riscv/generic.md @@ -27,7 +27,7 @@ (define_insn_reservation "generic_alu" 1 (and (eq_attr "tune" "generic") - (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move")) + (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,smin,smax,umin,umax,clz,ctz,cpop")) "alu") (define_insn_reservation "generic_load" 3 diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 1fb29da..0c69407 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -344,6 +344,7 @@ "unknown,branch,jump,call,load,fpload,store,fpstore, mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,ghost,bitmanip,rotate, + min,max,minu,maxu,clz,ctz,cpop, atomic,condmove,crypto,rdvlenb,rdvl,vsetvl,vlde,vste,vldm,vstm,vlds,vsts, vldux,vldox,vstux,vstox,vldff,vldr,vstr, vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax, -- cgit v1.1 From 272484dae6b5264baa0f41eba80a9521e9b7ecf5 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 20 Apr 2023 16:51:56 +0200 Subject: i386: Handle sign-extract for QImode operations with high registers [PR78952] Introduce extract_operator predicate to handle both, zero-extract and sign-extract extract operations with expressions like: (subreg:QI (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "0") (const_int 8) (const_int 8)) 0) As shown in the testcase, this will enable generation of QImode instructions with high registers when signed arguments are used. gcc/ChangeLog: PR target/78952 * config/i386/predicates.md (extract_operator): New predicate. * config/i386/i386.md (any_extract): Remove code iterator. (*cmpqi_ext_1_mem_rex64): Use extract_operator predicate. (*cmpqi_ext_1): Ditto. (*cmpqi_ext_2): Ditto. (*cmpqi_ext_3_mem_rex64): Ditto. (*cmpqi_ext_3): Ditto. (*cmpqi_ext_4): Ditto. (*extzvqi_mem_rex64): Ditto. (*extzvqi): Ditto. (*insvqi_2): Ditto. (*extendqi_ext_1): Ditto. (*addqi_ext_0): Ditto. (*addqi_ext_1): Ditto. (*addqi_ext_2): Ditto. (*subqi_ext_0): Ditto. (*subqi_ext_2): Ditto. (*testqi_ext_1): Ditto. (*testqi_ext_2): Ditto. (*andqi_ext_0): Ditto. (*andqi_ext_1): Ditto. (*andqi_ext_1_cc): Ditto. (*andqi_ext_2): Ditto. (*qi_ext_0): Ditto. (*qi_ext_1): Ditto. (*qi_ext_2): Ditto. (*xorqi_ext_1_cc): Ditto. (*negqi_ext_2): Ditto. (*ashlqi_ext_2): Ditto. (*qi_ext_2): Ditto. gcc/testsuite/ChangeLog: PR target/78952 * gcc.target/i386/pr78952-4.c: New test. --- gcc/config/i386/i386.md | 372 +++++++++++++++--------------- gcc/config/i386/predicates.md | 3 + gcc/testsuite/gcc.target/i386/pr78952-4.c | 48 ++++ 3 files changed, 237 insertions(+), 186 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr78952-4.c (limited to 'gcc') diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f8698ea..d49f1cd 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1008,9 +1008,6 @@ ;; Mapping of extend operators (define_code_iterator any_extend [sign_extend zero_extend]) -;; Mapping of extract operators -(define_code_iterator any_extract [sign_extract zero_extract]) - ;; Mapping of highpart multiply operators (define_code_iterator any_mul_highpart [smul_highpart umul_highpart]) @@ -1465,10 +1462,10 @@ (compare (match_operand:QI 0 "norex_memory_operand" "Bn") (subreg:QI - (any_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)))] + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)))] "TARGET_64BIT && reload_completed && ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %0|%0, %h1}" @@ -1480,10 +1477,10 @@ (compare (match_operand:QI 0 "nonimmediate_operand" "QBc,m") (subreg:QI - (any_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0)))] + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %0|%0, %h1}" [(set_attr "isa" "*,nox64") @@ -1497,29 +1494,29 @@ (match_operator 4 "compare_operator" [(match_dup 0) (subreg:QI - (any_extract:SWI248 - (match_operand 2 "int248_register_operand") - (const_int 8) - (const_int 8)) 0)]))] + (match_operator:SWI248 5 "extract_operator" + [(match_operand 2 "int248_register_operand") + (const_int 8) + (const_int 8)]) 0)]))] "TARGET_64BIT && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 3) (match_op_dup 4 [(match_dup 1) (subreg:QI - (any_extract:SWI248 - (match_dup 2) - (const_int 8) - (const_int 8)) 0)]))]) + (match_op_dup 5 + [(match_dup 2) + (const_int 8) + (const_int 8)]) 0)]))]) (define_insn "*cmpqi_ext_2" [(set (reg FLAGS_REG) (compare (subreg:QI - (any_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "const0_operand")))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t%h0, %h0" @@ -1541,10 +1538,10 @@ [(set (reg FLAGS_REG) (compare (subreg:QI - (any_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "norex_memory_operand" "Bn")))] "TARGET_64BIT && reload_completed && ix86_match_ccmode (insn, CCmode)" @@ -1556,10 +1553,10 @@ [(set (reg FLAGS_REG) (compare (subreg:QI - (any_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "general_operand" "QnBc,m")))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%1, %h0|%h0, %1}" @@ -1573,35 +1570,35 @@ (set (match_operand 3 "flags_reg_operand") (match_operator 4 "compare_operator" [(subreg:QI - (any_extract:SWI248 - (match_operand 2 "int248_register_operand") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 5 "extract_operator" + [(match_operand 2 "int248_register_operand") + (const_int 8) + (const_int 8)]) 0) (match_dup 0)]))] "TARGET_64BIT && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 3) (match_op_dup 4 [(subreg:QI - (any_extract:SWI248 - (match_dup 2) - (const_int 8) - (const_int 8)) 0) + (match_op_dup 5 + [(match_dup 2) + (const_int 8) + (const_int 8)]) 0) (match_dup 1)]))]) (define_insn "*cmpqi_ext_4" [(set (reg FLAGS_REG) (compare (subreg:QI - (any_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (any_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)))] + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)))] "ix86_match_ccmode (insn, CCmode)" "cmp{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "icmp") @@ -3269,18 +3266,6 @@ operands[1] = copy_to_reg (operands[1]); }) -(define_insn "*extzvqi_mem_rex64" - [(set (match_operand:QI 0 "norex_memory_operand" "=Bn") - (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0))] - "TARGET_64BIT && reload_completed" - "mov{b}\t{%h1, %0|%0, %h1}" - [(set_attr "type" "imov") - (set_attr "mode" "QI")]) - (define_insn "*extzv" [(set (match_operand:SWI248 0 "register_operand" "=R") (zero_extract:SWI248 (match_operand 1 "int248_register_operand" "Q") @@ -3291,13 +3276,25 @@ [(set_attr "type" "imovx") (set_attr "mode" "SI")]) +(define_insn "*extzvqi_mem_rex64" + [(set (match_operand:QI 0 "norex_memory_operand" "=Bn") + (subreg:QI + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0))] + "TARGET_64BIT && reload_completed" + "mov{b}\t{%h1, %0|%0, %h1}" + [(set_attr "type" "imov") + (set_attr "mode" "QI")]) + (define_insn "*extzvqi" [(set (match_operand:QI 0 "nonimmediate_operand" "=QBc,?R,m") (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q,Q,Q") - (const_int 8) - (const_int 8)) 0))] + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q,Q,Q") + (const_int 8) + (const_int 8)]) 0))] "" { switch (get_attr_type (insn)) @@ -3323,17 +3320,19 @@ (define_peephole2 [(set (match_operand:QI 0 "register_operand") (subreg:QI - (zero_extract:SWI248 (match_operand 1 "int248_register_operand") - (const_int 8) - (const_int 8)) 0)) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand") + (const_int 8) + (const_int 8)]) 0)) (set (match_operand:QI 2 "norex_memory_operand") (match_dup 0))] "TARGET_64BIT && peep2_reg_dead_p (2, operands[0])" [(set (match_dup 2) (subreg:QI - (zero_extract:SWI248 (match_dup 1) - (const_int 8) - (const_int 8)) 0))]) + (match_op_dup 3 + [(match_dup 1) + (const_int 8) + (const_int 8)]) 0))]) (define_expand "insv" [(set (zero_extract:SWI248 (match_operand:SWI248 0 "register_operand") @@ -3459,10 +3458,10 @@ (match_operand 0 "int248_register_operand" "+Q") (const_int 8) (const_int 8)) - (any_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)))] + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]))] "" "mov{b}\t{%h1, %h0|%h0, %h1}" [(set_attr "type" "imov") @@ -4860,10 +4859,10 @@ [(set (match_operand:SWI24 0 "register_operand" "=R") (sign_extend:SWI24 (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)))] + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)))] "" "movs{b|x}\t{%h1, %0|%0, %h1}" [(set_attr "type" "imovx") @@ -6723,10 +6722,10 @@ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m") (plus:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0"))) (clobber (reg:CC FLAGS_REG))] "" @@ -6757,10 +6756,10 @@ (subreg:SWI248 (plus:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0,0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -6796,15 +6795,15 @@ (subreg:SWI248 (plus:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "%0") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) 0)) + (match_operator:SWI248 4 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ rtx_equal_p (operands[0], operands[1]) @@ -7364,10 +7363,10 @@ (minus:QI (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0") (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0))) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0))) (clobber (reg:CC FLAGS_REG))] "" "sub{b}\t{%h2, %0|%0, %h2}" @@ -7383,15 +7382,15 @@ (subreg:SWI248 (minus:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) 0)) + (match_operator:SWI248 4 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ rtx_equal_p (operands[0], operands[1])" @@ -9975,10 +9974,10 @@ (compare (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "general_x64constmem_operand" "QnBc,m")) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" @@ -9992,15 +9991,15 @@ (compare (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 0 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 0 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)) (const_int 0)))] "ix86_match_ccmode (insn, CCNOmode)" "test{b}\t{%h1, %h0|%h0, %h1}" @@ -10642,10 +10641,10 @@ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m") (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0"))) (clobber (reg:CC FLAGS_REG))] "" @@ -10676,10 +10675,10 @@ (subreg:SWI248 (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0,0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -10696,10 +10695,10 @@ (compare (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0,0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) (const_int 0))) (set (zero_extract:SWI248 @@ -10709,10 +10708,10 @@ (subreg:SWI248 (and:QI (subreg:QI - (zero_extract:SWI248 - (match_dup 1) - (const_int 8) - (const_int 8)) 0) + (match_op_dup 3 + [(match_dup 1) + (const_int 8) + (const_int 8)]) 0) (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode) /* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -10730,15 +10729,15 @@ (subreg:SWI248 (and:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "%0") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) 0)) + (match_operator:SWI248 4 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ rtx_equal_p (operands[0], operands[1]) @@ -11399,10 +11398,10 @@ [(set (match_operand:QI 0 "nonimm_x64constmem_operand" "=QBc,m") (any_or:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q,Q") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q,Q") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 1 "nonimm_x64constmem_operand" "0,0"))) (clobber (reg:CC FLAGS_REG))] "" @@ -11419,10 +11418,10 @@ (subreg:SWI248 (any_or:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0,0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) 0)) (clobber (reg:CC FLAGS_REG))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) @@ -11441,15 +11440,15 @@ (subreg:SWI248 (any_or:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "%0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "%0") + (const_int 8) + (const_int 8)]) 0) (subreg:QI - (zero_extract:SWI248 - (match_operand 2 "int248_register_operand" "Q") - (const_int 8) - (const_int 8)) 0)) 0)) + (match_operator:SWI248 4 "extract_operator" + [(match_operand 2 "int248_register_operand" "Q") + (const_int 8) + (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)) /* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -11547,10 +11546,10 @@ (compare (xor:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0,0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0,0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "general_x64constmem_operand" "QnBc,m")) (const_int 0))) (set (zero_extract:SWI248 @@ -11560,10 +11559,10 @@ (subreg:SWI248 (xor:QI (subreg:QI - (zero_extract:SWI248 - (match_dup 1) - (const_int 8) - (const_int 8)) 0) + (match_op_dup 3 + [(match_dup 1) + (const_int 8) + (const_int 8)]) 0) (match_dup 2)) 0))] "ix86_match_ccmode (insn, CCNOmode) /* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -11957,10 +11956,10 @@ (subreg:SWI248 (neg:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)) 0)) 0)) + (match_operator:SWI248 2 "extract_operator" + [(match_operand 1 "int248_register_operand" "0") + (const_int 8) + (const_int 8)]) 0)) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ rtx_equal_p (operands[0], operands[1])" @@ -13518,10 +13517,10 @@ (subreg:SWI248 (ashift:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -14421,10 +14420,10 @@ (subreg:SWI248 (any_shiftrt:QI (subreg:QI - (zero_extract:SWI248 - (match_operand 1 "int248_register_operand" "0") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 3 "extract_operator" + [(match_operand 1 "int248_register_operand" "0") + (const_int 8) + (const_int 8)]) 0) (match_operand:QI 2 "nonmemory_operand" "cI")) 0)) (clobber (reg:CC FLAGS_REG))] "/* FIXME: without this LRA can't reload this pattern, see PR82524. */ @@ -23291,9 +23290,10 @@ (match_operator 1 "compare_operator" [(and:QI (subreg:QI - (zero_extract:SWI248 (match_operand 2 "int248_register_operand") - (const_int 8) - (const_int 8)) 0) + (match_operator:SWI248 4 "extract_operator" + [(match_operand 2 "int248_register_operand") + (const_int 8) + (const_int 8)]) 0) (match_operand 3 "const_int_operand")) (const_int 0)]))] "! TARGET_PARTIAL_REG_STALL @@ -23305,9 +23305,9 @@ (match_op_dup 1 [(and:QI (subreg:QI - (zero_extract:SWI248 (match_dup 2) - (const_int 8) - (const_int 8)) 0) + (match_op_dup 4 [(match_dup 2) + (const_int 8) + (const_int 8)]) 0) (match_dup 3)) (const_int 0)])) (set (zero_extract:SWI248 (match_dup 2) @@ -23316,9 +23316,9 @@ (subreg:SWI248 (and:QI (subreg:QI - (zero_extract:SWI248 (match_dup 2) - (const_int 8) - (const_int 8)) 0) + (match_op_dup 4 [(match_dup 2) + (const_int 8) + (const_int 8)]) 0) (match_dup 3)) 0))])]) ;; Don't do logical operations with memory inputs. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index b4d9ab4..3f93427 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1684,6 +1684,9 @@ (define_predicate "compare_operator" (match_code "compare")) +(define_predicate "extract_operator" + (match_code "zero_extract,sign_extract")) + ;; Return true if OP is a memory operand, aligned to ;; less than its natural alignment. (define_predicate "misaligned_operand" diff --git a/gcc/testsuite/gcc.target/i386/pr78952-4.c b/gcc/testsuite/gcc.target/i386/pr78952-4.c new file mode 100644 index 0000000..c7bd63c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr78952-4.c @@ -0,0 +1,48 @@ +/* PR target/78952 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -masm=att" } */ +/* { dg-final { scan-assembler-not "mov\[sz\]bl" } } */ +/* { dg-final { scan-assembler-not "movb" } } */ + +struct S1 +{ + signed char pad1; + signed char val; + signed short pad2; +}; + +struct S1 test_and (struct S1 a, struct S1 b) +{ + a.val &= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]andb" } } */ + +struct S1 test_or (struct S1 a, struct S1 b) +{ + a.val |= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]orb" } } */ + +struct S1 test_xor (struct S1 a, struct S1 b) +{ + a.val ^= b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]xorb" } } */ + +struct S1 test_add (struct S1 a, struct S1 b) +{ + a.val += b.val; + + return a; +} + +/* { dg-final { scan-assembler "\[ \t\]addb" } } */ -- cgit v1.1 From cae48a9d80790dcde6ae86122546c89a00d8bb4f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 20 Apr 2023 17:00:24 +0200 Subject: arch: Use VIRTUAL_REGISTER_P predicate. gcc/ChangeLog: * config/arm/arm.cc (thumb1_legitimate_address_p): Use VIRTUAL_REGISTER_P predicate. (arm_eliminable_register): Ditto. * config/avr/avr.md (push_1): Ditto. * config/bfin/predicates.md (register_no_elim_operand): Ditto. * config/h8300/predicates.md (register_no_sp_elim_operand): Ditto. * config/i386/predicates.md (register_no_elim_operand): Ditto. * config/iq2000/predicates.md (call_insn_operand): Ditto. * config/microblaze/microblaze.h (CALL_INSN_OP): Ditto. --- gcc/config/arm/arm.cc | 7 ++----- gcc/config/avr/avr.md | 3 +-- gcc/config/bfin/predicates.md | 5 ++--- gcc/config/h8300/predicates.md | 3 +-- gcc/config/i386/predicates.md | 5 ++--- gcc/config/iq2000/predicates.md | 3 +-- gcc/config/microblaze/microblaze.h | 5 ++--- 7 files changed, 11 insertions(+), 20 deletions(-) (limited to 'gcc') diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index bf7ff9a..1164119 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -9105,9 +9105,7 @@ thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) else if (REG_P (XEXP (x, 0)) && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM - || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER - && REGNO (XEXP (x, 0)) - <= LAST_VIRTUAL_POINTER_REGISTER)) + || VIRTUAL_REGISTER_P (XEXP (x, 0))) && GET_MODE_SIZE (mode) >= 4 && CONST_INT_P (XEXP (x, 1)) && (INTVAL (XEXP (x, 1)) & 3) == 0) @@ -13905,8 +13903,7 @@ arm_eliminable_register (rtx x) { return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM || REGNO (x) == ARG_POINTER_REGNUM - || (REGNO (x) >= FIRST_VIRTUAL_REGISTER - && REGNO (x) <= LAST_VIRTUAL_REGISTER)); + || VIRTUAL_REGISTER_P (x)); } /* Return GENERAL_REGS if a scratch register required to reload x to/from diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md index e581e95..43b7504 100644 --- a/gcc/config/avr/avr.md +++ b/gcc/config/avr/avr.md @@ -417,8 +417,7 @@ operands[0] = copy_to_mode_reg (mode, operands[0]); } else if (REG_P (operands[0]) - && IN_RANGE (REGNO (operands[0]), FIRST_VIRTUAL_REGISTER, - LAST_VIRTUAL_REGISTER)) + && VIRTUAL_REGISTER_P (operands[0])) { // Byte-wise pushing of virtual regs might result in something like // diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md index 09ec5a4..632634e 100644 --- a/gcc/config/bfin/predicates.md +++ b/gcc/config/bfin/predicates.md @@ -175,7 +175,7 @@ (define_predicate "symbol_ref_operand" (match_code "symbol_ref")) -;; True for any non-virtual or eliminable register. Used in places where +;; True for any non-virtual and non-eliminable register. Used in places where ;; instantiation of such a register may cause the pattern to not be recognized. (define_predicate "register_no_elim_operand" (match_operand 0 "register_operand") @@ -184,8 +184,7 @@ op = SUBREG_REG (op); return !(op == arg_pointer_rtx || op == frame_pointer_rtx - || (REGNO (op) >= FIRST_PSEUDO_REGISTER - && REGNO (op) <= LAST_VIRTUAL_REGISTER)); + || VIRTUAL_REGISTER_P (op)); }) ;; Test for an operator valid in a BImode conditional branch diff --git a/gcc/config/h8300/predicates.md b/gcc/config/h8300/predicates.md index 02da8aa..486c4d7 100644 --- a/gcc/config/h8300/predicates.md +++ b/gcc/config/h8300/predicates.md @@ -378,8 +378,7 @@ return !(op == stack_pointer_rtx || op == arg_pointer_rtx || op == frame_pointer_rtx - || IN_RANGE (REGNO (op), - FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER)); + || VIRTUAL_REGISTER_P (op)); }) ;; Return nonzero if X is a constant whose absolute value is greater diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 3f93427..e752e20 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -702,7 +702,7 @@ return register_no_elim_operand (op, mode); }) -;; True for any non-virtual or eliminable register. Used in places where +;; True for any non-virtual and non-eliminable register. Used in places where ;; instantiation of such a register may cause the pattern to not be recognized. (define_predicate "register_no_elim_operand" (match_operand 0 "register_operand") @@ -717,8 +717,7 @@ return !(op == arg_pointer_rtx || op == frame_pointer_rtx - || IN_RANGE (REGNO (op), - FIRST_PSEUDO_REGISTER, LAST_VIRTUAL_REGISTER)); + || VIRTUAL_REGISTER_P (op)); }) ;; Similarly, but include the stack pointer. This is used to prevent esp diff --git a/gcc/config/iq2000/predicates.md b/gcc/config/iq2000/predicates.md index 4adc108..1330f7d6 100644 --- a/gcc/config/iq2000/predicates.md +++ b/gcc/config/iq2000/predicates.md @@ -206,8 +206,7 @@ { return (CONSTANT_ADDRESS_P (op) || (GET_CODE (op) == REG && op != arg_pointer_rtx - && ! (REGNO (op) >= FIRST_PSEUDO_REGISTER - && REGNO (op) <= LAST_VIRTUAL_REGISTER))); + && ! VIRTUAL_REGISTER_P (op))); }) ;; Return nonzero if OP is valid as a source operand for a move diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h index 0398902..8a0e1a76 100644 --- a/gcc/config/microblaze/microblaze.h +++ b/gcc/config/microblaze/microblaze.h @@ -372,9 +372,8 @@ extern enum reg_class microblaze_regno_to_class[]; since they may change into reg + const, which the patterns can't handle yet. */ #define CALL_INSN_OP(X) (CONSTANT_ADDRESS_P (X) \ - || (GET_CODE (X) == REG && X != arg_pointer_rtx\ - && ! (REGNO (X) >= FIRST_PSEUDO_REGISTER \ - && REGNO (X) <= LAST_VIRTUAL_REGISTER))) + || (GET_CODE (X) == REG && X != arg_pointer_rtx \ + && ! VIRTUAL_REGISTER_P (X))) /* True if VALUE is a signed 16-bit number. */ #define SMALL_OPERAND(VALUE) \ -- cgit v1.1 From 1d2aa9a8cb8fd078af930347392cfdfc14e1551c Mon Sep 17 00:00:00 2001 From: "Vladimir N. Makarov" Date: Thu, 20 Apr 2023 10:02:13 -0400 Subject: [LRA]: Exclude some hard regs for multi-reg inout reload pseudos used in asm in different mode See gcc.c-torture/execute/20030222-1.c. Consider the code for 32-bit (e.g. BE) target: int i, v; long x; x = v; asm ("" : "=r" (i) : "0" (x)); We generate the following RTL with reload insns: 1. subreg:si(x:di, 0) = 0; 2. subreg:si(x:di, 4) = v:si; 3. t:di = x:di, dead x; 4. asm ("" : "=r" (subreg:si(t:di,4)) : "0" (t:di)) 5. i:si = subreg:si(t:di,4); If we assign hard reg of x to t, dead code elimination will remove insn #2 and we will use unitialized hard reg. So exclude the hard reg of x for t. We could ignore this problem for non-empty asm using all x value but it is hard to check that the asm are expanded into insn realy using x and setting r. The old reload pass used the same approach. gcc/ChangeLog * lra-constraints.cc (match_reload): Exclude some hard regs for multi-reg inout reload pseudos used in asm in different mode. --- gcc/lra-constraints.cc | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'gcc') diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index b231cb6..4dc2d70 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -1022,6 +1022,34 @@ match_reload (signed char out, signed char *ins, signed char *outs, are ordered. */ if (partial_subreg_p (outmode, inmode)) { + bool asm_p = asm_noperands (PATTERN (curr_insn)) >= 0; + int hr; + HARD_REG_SET temp_hard_reg_set; + + if (asm_p && (hr = get_hard_regno (out_rtx)) >= 0 + && hard_regno_nregs (hr, inmode) > 1) + { + /* See gcc.c-torture/execute/20030222-1.c. + Consider the code for 32-bit (e.g. BE) target: + int i, v; long x; x = v; asm ("" : "=r" (i) : "0" (x)); + We generate the following RTL with reload insns: + 1. subreg:si(x:di, 0) = 0; + 2. subreg:si(x:di, 4) = v:si; + 3. t:di = x:di, dead x; + 4. asm ("" : "=r" (subreg:si(t:di,4)) : "0" (t:di)) + 5. i:si = subreg:si(t:di,4); + If we assign hard reg of x to t, dead code elimination + will remove insn #2 and we will use unitialized hard reg. + So exclude the hard reg of x for t. We could ignore this + problem for non-empty asm using all x value but it is hard to + check that the asm are expanded into insn realy using x + and setting r. */ + CLEAR_HARD_REG_SET (temp_hard_reg_set); + if (exclude_start_hard_regs != NULL) + temp_hard_reg_set = *exclude_start_hard_regs; + SET_HARD_REG_BIT (temp_hard_reg_set, hr); + exclude_start_hard_regs = &temp_hard_reg_set; + } reg = new_in_reg = lra_create_new_reg_with_unique_value (inmode, in_rtx, goal_class, exclude_start_hard_regs, -- cgit v1.1 From 3d7ab53d6c59499624aa41c8dea0664976820b3b Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Apr 2023 19:26:17 +0200 Subject: c: Avoid -Wenum-int-mismatch warning for redeclaration of builtin acc_on_device [PR107041] The new -Wenum-int-mismatch warning triggers with -Wsystem-headers in , for obvious reasons the builtin acc_on_device uses int type argument rather than enum which isn't defined yet when the builtin is created, while the OpenACC spec requires it to have acc_device_t enum argument. The header makes sure it has int underlying type by using negative and __INT_MAX__ enumerators. I've tried to make the builtin typegeneric or just varargs, but that changes behavior e.g. when one calls it with some C++ class which has cast operator to acc_device_t, so the following patch instead disables the warning for this builtin. 2023-04-20 Jakub Jelinek PR c/107041 * c-decl.cc (diagnose_mismatched_decls): Avoid -Wenum-int-mismatch warning on acc_on_device declaration. * gcc.dg/goacc/pr107041.c: New test. --- gcc/c/c-decl.cc | 9 ++++++++- gcc/testsuite/gcc.dg/goacc/pr107041.c | 23 +++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/goacc/pr107041.c (limited to 'gcc') diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index e537d33..1b53f2d 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -2219,7 +2219,14 @@ diagnose_mismatched_decls (tree newdecl, tree olddecl, } /* Warn about enum/integer type mismatches. They are compatible types (C2X 6.7.2.2/5), but may pose portability problems. */ - else if (enum_and_int_p && TREE_CODE (newdecl) != TYPE_DECL) + else if (enum_and_int_p + && TREE_CODE (newdecl) != TYPE_DECL + /* Don't warn about about acc_on_device built-in redeclaration, + the built-in is declared with int rather than enum because + the enum isn't intrinsic. */ + && !(TREE_CODE (olddecl) == FUNCTION_DECL + && fndecl_built_in_p (olddecl, BUILT_IN_ACC_ON_DEVICE) + && !C_DECL_DECLARED_BUILTIN (olddecl))) warned = warning_at (DECL_SOURCE_LOCATION (newdecl), OPT_Wenum_int_mismatch, "conflicting types for %q+D due to enum/integer " diff --git a/gcc/testsuite/gcc.dg/goacc/pr107041.c b/gcc/testsuite/gcc.dg/goacc/pr107041.c new file mode 100644 index 0000000..ed7fb5a --- /dev/null +++ b/gcc/testsuite/gcc.dg/goacc/pr107041.c @@ -0,0 +1,23 @@ +/* PR c/107041 */ +/* { dg-do compile } */ +/* { dg-additional-options "-Wenum-int-mismatch" } */ + +typedef enum acc_device_t { + acc_device_current = -1, + acc_device_none = 0, + acc_device_default = 1, + acc_device_host = 2, + acc_device_not_host = 4, + acc_device_nvidia = 5, + acc_device_radeon = 8, + _ACC_highest = __INT_MAX__ +} acc_device_t; + +int acc_on_device (acc_device_t); /* { dg-bogus "conflicting types for 'acc_on_device' due to enum/integer mismatch; have 'int\\\(acc_device_t\\\)'" } */ +int acc_on_device (acc_device_t); + +int +foo (void) +{ + return acc_on_device (acc_device_host); +} -- cgit v1.1 From 87c9bae4e32b54829dce0a93ff735412d5f684f8 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 20 Apr 2023 19:44:27 +0200 Subject: tree-vect-patterns: One small vect_recog_ctz_ffs_pattern tweak [PR109011] I've noticed I've made a typo, ifn in this function this late is always only IFN_CTZ or IFN_FFS, never IFN_CLZ. Due to this typo, we weren't using the originally intended .CTZ (X) = .POPCOUNT ((X - 1) & ~X) but .CTZ (X) = PREC - .POPCOUNT (X | -X) instead when we want to emit __builtin_ctz*/.CTZ using .POPCOUNT. Both compute the same value, both are defined at 0 with the same value (PREC), both have same number of GIMPLE statements, but I think the former ought to be preferred, because lots of targets have andn as a single operation rather than two, and also putting a -1 constant into a vector register is often cheaper than vector with broadcast PREC power of two value. 2023-04-20 Jakub Jelinek PR tree-optimization/109011 * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): Use .CTZ (X) = .POPCOUNT ((X - 1) & ~X) in preference to .CTZ (X) = PREC - .POPCOUNT (X | -X). --- gcc/tree-vect-patterns.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index d1b86e8..a49b095 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -1630,7 +1630,7 @@ vect_recog_ctz_ffs_pattern (vec_info *vinfo, stmt_vec_info stmt_vinfo, && defined_at_zero_new && val == prec && val_new == prec) - || (ifnnew == IFN_POPCOUNT && ifn == IFN_CLZ)) + || (ifnnew == IFN_POPCOUNT && ifn == IFN_CTZ)) { /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X) .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */ -- cgit v1.1 From 17aa9ddb34581855dd013745c8be27dda024de4a Mon Sep 17 00:00:00 2001 From: Andrew MacLeod Date: Thu, 20 Apr 2023 13:10:40 -0400 Subject: Do not ignore UNDEFINED ranges when determining PHI equivalences. Do not ignore UNDEFINED name arguments when registering two-way equivalences from PHIs. PR tree-optimization/109564 gcc/ * gimple-range-fold.cc (fold_using_range::range_of_phi): Do no ignore UNDEFINED range names when deciding if all PHI arguments are the same, gcc/testsuite/ * gcc.dg/torture/pr109564-1.c: New testcase. * gcc.dg/torture/pr109564-2.c: Likewise. * gcc.dg/tree-ssa/evrp-ignore.c: XFAIL. * gcc.dg/tree-ssa/vrp06.c: Likewise. --- gcc/gimple-range-fold.cc | 16 +++---- gcc/testsuite/gcc.dg/torture/pr109564-1.c | 74 +++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/torture/pr109564-2.c | 33 +++++++++++++ gcc/testsuite/gcc.dg/tree-ssa/evrp-ignore.c | 2 +- gcc/testsuite/gcc.dg/tree-ssa/vrp06.c | 2 +- 5 files changed, 117 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr109564-1.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr109564-2.c (limited to 'gcc') diff --git a/gcc/gimple-range-fold.cc b/gcc/gimple-range-fold.cc index 429734f..180f349 100644 --- a/gcc/gimple-range-fold.cc +++ b/gcc/gimple-range-fold.cc @@ -771,16 +771,16 @@ fold_using_range::range_of_phi (vrange &r, gphi *phi, fur_source &src) if (gimple_range_ssa_p (arg) && src.gori ()) src.gori ()->register_dependency (phi_def, arg); + } - // Track if all arguments are the same. - if (!seen_arg) - { - seen_arg = true; - single_arg = arg; - } - else if (single_arg != arg) - single_arg = NULL_TREE; + // Track if all arguments are the same. + if (!seen_arg) + { + seen_arg = true; + single_arg = arg; } + else if (single_arg != arg) + single_arg = NULL_TREE; // Once the value reaches varying, stop looking. if (r.varying_p () && single_arg == NULL_TREE) diff --git a/gcc/testsuite/gcc.dg/torture/pr109564-1.c b/gcc/testsuite/gcc.dg/torture/pr109564-1.c new file mode 100644 index 0000000..e7c855f --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr109564-1.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ + +struct libkeccak_spec { + long int bitrate; +}; + +struct libkeccak_generalised_spec { + long int bitrate; + long int state_size; + long int word_size; +}; + +int __attribute__((noipa)) +libkeccak_degeneralise_spec(struct libkeccak_generalised_spec *restrict spec, + struct libkeccak_spec *restrict output_spec) +{ + long int state_size, word_size, bitrate, output; + const int have_state_size = spec->state_size != (-65536L); + const int have_word_size = spec->word_size != (-65536L); + const int have_bitrate = spec->bitrate != (-65536L); + + if (have_state_size) + { + state_size = spec->state_size; + if (state_size <= 0) + return 1; + if (state_size > 1600) + return 2; + } + + if (have_word_size) + { + word_size = spec->word_size; + if (word_size <= 0) + return 4; + if (word_size > 64) + return 5; + if (have_state_size && state_size != word_size * 25) + return 6; + else if (!have_state_size) { + spec->state_size = 1; + state_size = word_size * 25; + } + } + + if (have_bitrate) + bitrate = spec->bitrate; + + if (!have_bitrate) + { + state_size = (have_state_size ? state_size : (1600L)); + output = ((state_size << 5) / 100L + 7L) & ~0x07L; + bitrate = output << 1; + } + + output_spec->bitrate = bitrate; + + return 0; +} + +int main () +{ + struct libkeccak_generalised_spec gspec; + struct libkeccak_spec spec; + spec.bitrate = -1; + gspec.bitrate = -65536; + gspec.state_size = -65536; + gspec.word_size = -65536; + if (libkeccak_degeneralise_spec(&gspec, &spec)) + __builtin_abort (); + if (spec.bitrate != 1024) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr109564-2.c b/gcc/testsuite/gcc.dg/torture/pr109564-2.c new file mode 100644 index 0000000..eeab437c --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr109564-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ + +struct libkeccak_generalised_spec { + int state_size; + int word_size; +} main_gspec; + +long gvar; + +int libkeccak_degeneralise_spec(struct libkeccak_generalised_spec *spec) +{ + int state_size; + int have_state_size = spec->state_size != -1; + int have_word_size = spec->word_size; + + if (have_state_size) + state_size = spec->state_size; + if (have_word_size) + gvar = 12345; + if (have_state_size && state_size != spec->word_size) + return 1; + if (spec) + gvar++; + return 0; +} + +int main() +{ + main_gspec.state_size = -1; + if (libkeccak_degeneralise_spec(&main_gspec)) + __builtin_abort(); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp-ignore.c b/gcc/testsuite/gcc.dg/tree-ssa/evrp-ignore.c index 9bfaed6..ee93e5a 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/evrp-ignore.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp-ignore.c @@ -25,4 +25,4 @@ void foo (int x, int y, int z) kill(); } -/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */ +/* { dg-final { scan-tree-dump-not "kill" "evrp" { xfail *-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp06.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp06.c index 898477e..8f5f860 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/vrp06.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp06.c @@ -30,4 +30,4 @@ foo (int i, int j, int a) /* { dg-final { scan-tree-dump-times "Folding predicate \[i|j\]_\[0-9\]+.*0 to 0" 1 "vrp1" } } */ /* { dg-final { scan-tree-dump-times "Folding predicate \[i|j\]_\[0-9\]+.*0 to 1" 1 "vrp1" } } */ -/* { dg-final { scan-tree-dump-times "Folding predicate i_\[0-9]+.*j_\[0-9\]+.* to 0" 1 "vrp1" } } */ +/* { dg-final { scan-tree-dump-times "Folding predicate i_\[0-9]+.*j_\[0-9\]+.* to 0" 1 "vrp1" { xfail *-*-* } } } */ -- cgit v1.1 From d4e8523bf3bfb5f7c23822c23bd2b230030c3d2a Mon Sep 17 00:00:00 2001 From: Alejandro Colomar Date: Thu, 20 Apr 2023 19:18:55 +0200 Subject: doc: Remove repeated word (typo) gcc/ChangeLog: * doc/extend.texi (Common Function Attributes): Remove duplicate word. Signed-off-by: Alejandro Colomar --- gcc/doc/extend.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 84b44cb..ac47680 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -3764,7 +3764,7 @@ take function pointer arguments. The @code{optimize} attribute is used to specify that a function is to be compiled with different optimization options than specified on the command line. The optimize attribute arguments of a function behave -behave as if appended to the command-line. +as if appended to the command-line. Valid arguments are constant non-negative integers and strings. Each numeric argument specifies an optimization @var{level}. -- cgit v1.1 From d180a5524ccdab8ef839ee55efecf60ce5b0240b Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 20 Apr 2023 15:00:04 -0400 Subject: c++: make strip_typedefs generalize strip_typedefs_expr Currently if we have a TREE_VEC of types that we want to strip of typedefs, we unintuitively need to call strip_typedefs_expr instead of strip_typedefs since only strip_typedefs_expr handles TREE_VEC, and it also dispatches to strip_typedefs when given a type. But this seems backwards: arguably strip_typedefs_expr should be the more specialized function, which strip_typedefs dispatches to (and thus generalizes). So this patch makes strip_typedefs subsume strip_typedefs_expr rather than vice versa, which allows for some simplifications. gcc/cp/ChangeLog: * tree.cc (strip_typedefs): Move TREE_LIST handling to strip_typedefs_expr. Dispatch to strip_typedefs_expr for non-type 't'. : Remove manual dispatching to strip_typedefs_expr. : Likewise. (strip_typedefs_expr): Replaces calls to strip_typedefs_expr with strip_typedefs throughout. Don't dispatch to strip_typedefs for type 't'. : Replace this with the better version from strip_typedefs. --- gcc/cp/tree.cc | 84 +++++++++++++++++----------------------------------------- 1 file changed, 25 insertions(+), 59 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc index 2c22fac..6985253 100644 --- a/gcc/cp/tree.cc +++ b/gcc/cp/tree.cc @@ -1562,7 +1562,8 @@ apply_identity_attributes (tree result, tree attribs, bool *remove_attributes) /* Builds a qualified variant of T that is either not a typedef variant (the default behavior) or not a typedef variant of a user-facing type - (if FLAGS contains STF_USER_FACING). + (if FLAGS contains STF_USER_FACING). If T is not a type, then this + just dispatches to strip_typedefs_expr. E.g. consider the following declarations: typedef const int ConstInt; @@ -1596,25 +1597,8 @@ strip_typedefs (tree t, bool *remove_attributes /* = NULL */, if (!t || t == error_mark_node) return t; - if (TREE_CODE (t) == TREE_LIST) - { - bool changed = false; - releasing_vec vec; - tree r = t; - for (; t; t = TREE_CHAIN (t)) - { - gcc_assert (!TREE_PURPOSE (t)); - tree elt = strip_typedefs (TREE_VALUE (t), remove_attributes, flags); - if (elt != TREE_VALUE (t)) - changed = true; - vec_safe_push (vec, elt); - } - if (changed) - r = build_tree_list_vec (vec); - return r; - } - - gcc_assert (TYPE_P (t)); + if (!TYPE_P (t)) + return strip_typedefs_expr (t, remove_attributes, flags); if (t == TYPE_CANONICAL (t)) return t; @@ -1747,12 +1731,7 @@ strip_typedefs (tree t, bool *remove_attributes /* = NULL */, for (int i = 0; i < TREE_VEC_LENGTH (args); ++i) { tree arg = TREE_VEC_ELT (args, i); - tree strip_arg; - if (TYPE_P (arg)) - strip_arg = strip_typedefs (arg, remove_attributes, flags); - else - strip_arg = strip_typedefs_expr (arg, remove_attributes, - flags); + tree strip_arg = strip_typedefs (arg, remove_attributes, flags); TREE_VEC_ELT (new_args, i) = strip_arg; if (strip_arg != arg) changed = true; @@ -1792,11 +1771,8 @@ strip_typedefs (tree t, bool *remove_attributes /* = NULL */, break; case TRAIT_TYPE: { - tree type1 = TRAIT_TYPE_TYPE1 (t); - if (TYPE_P (type1)) - type1 = strip_typedefs (type1, remove_attributes, flags); - else - type1 = strip_typedefs_expr (type1, remove_attributes, flags); + tree type1 = strip_typedefs (TRAIT_TYPE_TYPE1 (t), + remove_attributes, flags); tree type2 = strip_typedefs (TRAIT_TYPE_TYPE2 (t), remove_attributes, flags); if (type1 == TRAIT_TYPE_TYPE1 (t) && type2 == TRAIT_TYPE_TYPE2 (t)) @@ -1883,7 +1859,8 @@ strip_typedefs (tree t, bool *remove_attributes /* = NULL */, return cp_build_qualified_type (result, cp_type_quals (t)); } -/* Like strip_typedefs above, but works on expressions, so that in +/* Like strip_typedefs above, but works on expressions (and other + non-types such as TREE_VEC), so that in template struct A { @@ -1908,11 +1885,6 @@ strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) if (DECL_P (t) || CONSTANT_CLASS_P (t)) return t; - /* Some expressions have type operands, so let's handle types here rather - than check TYPE_P in multiple places below. */ - if (TYPE_P (t)) - return strip_typedefs (t, remove_attributes, flags); - code = TREE_CODE (t); switch (code) { @@ -1940,26 +1912,20 @@ strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) case TREE_LIST: { - releasing_vec vec; bool changed = false; - tree it; - for (it = t; it; it = TREE_CHAIN (it)) + releasing_vec vec; + r = t; + for (; t; t = TREE_CHAIN (t)) { - tree val = strip_typedefs_expr (TREE_VALUE (it), - remove_attributes, flags); - vec_safe_push (vec, val); - if (val != TREE_VALUE (it)) + gcc_assert (!TREE_PURPOSE (t)); + tree elt = strip_typedefs (TREE_VALUE (t), + remove_attributes, flags); + if (elt != TREE_VALUE (t)) changed = true; - gcc_assert (TREE_PURPOSE (it) == NULL_TREE); + vec_safe_push (vec, elt); } if (changed) - { - r = NULL_TREE; - FOR_EACH_VEC_ELT_REVERSE (*vec, i, it) - r = tree_cons (NULL_TREE, it, r); - } - else - r = t; + r = build_tree_list_vec (vec); return r; } @@ -1971,8 +1937,8 @@ strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) vec_safe_reserve (vec, n); for (i = 0; i < n; ++i) { - tree op = strip_typedefs_expr (TREE_VEC_ELT (t, i), - remove_attributes, flags); + tree op = strip_typedefs (TREE_VEC_ELT (t, i), + remove_attributes, flags); vec->quick_push (op); if (op != TREE_VEC_ELT (t, i)) changed = true; @@ -2000,15 +1966,15 @@ strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) for (i = 0; i < n; ++i) { constructor_elt *e = &(*vec)[i]; - tree op = strip_typedefs_expr (e->value, remove_attributes, flags); + tree op = strip_typedefs (e->value, remove_attributes, flags); if (op != e->value) { changed = true; e->value = op; } gcc_checking_assert - (e->index == strip_typedefs_expr (e->index, remove_attributes, - flags)); + (e->index == strip_typedefs (e->index, remove_attributes, + flags)); } if (!changed && type == TREE_TYPE (t)) @@ -2057,8 +2023,8 @@ strip_typedefs_expr (tree t, bool *remove_attributes, unsigned int flags) default: for (i = 0; i < n; ++i) - ops[i] = strip_typedefs_expr (TREE_OPERAND (t, i), - remove_attributes, flags); + ops[i] = strip_typedefs (TREE_OPERAND (t, i), + remove_attributes, flags); break; } -- cgit v1.1 From 76fa66ea397cb255ab1d68a90ff6b878236e9620 Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 20 Apr 2023 15:00:06 -0400 Subject: c++: use TREE_VEC for trailing args of variadic built-in traits This patch makes us use TREE_VEC instead of TREE_LIST to represent the trailing arguments of a variadic built-in trait. These built-ins are typically passed a simple pack expansion as the second argument, e.g. __is_constructible(T, Ts...) and the main benefit of this representation change is that substituting into this argument list is now basically free since tsubst_template_args makes sure we reuse the TREE_VEC of the corresponding ARGUMENT_PACK when expanding such a pack expansion. In the previous TREE_LIST representation we would need need to convert the expanded pack expansion into a TREE_LIST (via tsubst_tree_list). Note that an empty set of trailing arguments is now represented as an empty TREE_VEC instead of NULL_TREE, so now TRAIT_TYPE/EXPR_TYPE2 will be empty only for unary traits. gcc/cp/ChangeLog: * constraint.cc (diagnose_trait_expr): Convert a TREE_VEC of arguments into a TREE_LIST for sake of pretty printing. * cxx-pretty-print.cc (pp_cxx_trait): Handle TREE_VEC instead of TREE_LIST of trailing variadic trait arguments. * method.cc (constructible_expr): Likewise. (is_xible_helper): Likewise. * parser.cc (cp_parser_trait): Represent trailing variadic trait arguments as a TREE_VEC instead of TREE_LIST. * pt.cc (value_dependent_expression_p): Handle TREE_VEC instead of TREE_LIST of trailing variadic trait arguments. * semantics.cc (finish_type_pack_element): Likewise. (check_trait_type): Likewise. --- gcc/cp/constraint.cc | 10 ++++++++++ gcc/cp/cxx-pretty-print.cc | 6 +++--- gcc/cp/method.cc | 17 +++++++++-------- gcc/cp/parser.cc | 14 ++++++++------ gcc/cp/pt.cc | 9 ++++----- gcc/cp/semantics.cc | 15 +++++++++------ 6 files changed, 43 insertions(+), 28 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index 273d15a..675299a 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3675,6 +3675,16 @@ diagnose_trait_expr (tree expr, tree args) tree t1 = TRAIT_EXPR_TYPE1 (expr); tree t2 = TRAIT_EXPR_TYPE2 (expr); + if (t2 && TREE_CODE (t2) == TREE_VEC) + { + /* Convert the TREE_VEC of arguments into a TREE_LIST, since we can't + directly print a TREE_VEC but we can a TREE_LIST via the E format + specifier. */ + tree list = NULL_TREE; + for (tree t : tree_vec_range (t2)) + list = tree_cons (NULL_TREE, t, list); + t2 = nreverse (list); + } switch (TRAIT_EXPR_KIND (expr)) { case CPTK_HAS_NOTHROW_ASSIGN: diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc index c339198..4cda27f 100644 --- a/gcc/cp/cxx-pretty-print.cc +++ b/gcc/cp/cxx-pretty-print.cc @@ -2640,16 +2640,16 @@ pp_cxx_trait (cxx_pretty_printer *pp, tree t) } if (type2) { - if (TREE_CODE (type2) != TREE_LIST) + if (TREE_CODE (type2) != TREE_VEC) { pp_cxx_separate_with (pp, ','); pp->type_id (type2); } else - for (tree arg = type2; arg; arg = TREE_CHAIN (arg)) + for (tree arg : tree_vec_range (type2)) { pp_cxx_separate_with (pp, ','); - pp->type_id (TREE_VALUE (arg)); + pp->type_id (arg); } } if (kind == CPTK_TYPE_PACK_ELEMENT) diff --git a/gcc/cp/method.cc b/gcc/cp/method.cc index 225ec45..00eae56 100644 --- a/gcc/cp/method.cc +++ b/gcc/cp/method.cc @@ -2075,8 +2075,9 @@ constructible_expr (tree to, tree from) if (!TYPE_REF_P (to)) to = cp_build_reference_type (to, /*rval*/false); tree ob = build_stub_object (to); - for (; from; from = TREE_CHAIN (from)) - vec_safe_push (args, build_stub_object (TREE_VALUE (from))); + vec_alloc (args, TREE_VEC_LENGTH (from)); + for (tree arg : tree_vec_range (from)) + args->quick_push (build_stub_object (arg)); expr = build_special_member_call (ob, complete_ctor_identifier, &args, ctype, LOOKUP_NORMAL, tf_none); if (expr == error_mark_node) @@ -2096,9 +2097,9 @@ constructible_expr (tree to, tree from) } else { - if (from == NULL_TREE) + const int len = TREE_VEC_LENGTH (from); + if (len == 0) return build_value_init (strip_array_types (to), tf_none); - const int len = list_length (from); if (len > 1) { if (cxx_dialect < cxx20) @@ -2112,9 +2113,9 @@ constructible_expr (tree to, tree from) should be true. */ vec *v; vec_alloc (v, len); - for (tree t = from; t; t = TREE_CHAIN (t)) + for (tree arg : tree_vec_range (from)) { - tree stub = build_stub_object (TREE_VALUE (t)); + tree stub = build_stub_object (arg); constructor_elt elt = { NULL_TREE, stub }; v->quick_push (elt); } @@ -2123,7 +2124,7 @@ constructible_expr (tree to, tree from) CONSTRUCTOR_IS_PAREN_INIT (from) = true; } else - from = build_stub_object (TREE_VALUE (from)); + from = build_stub_object (TREE_VEC_ELT (from, 0)); expr = perform_direct_initialization_if_possible (to, from, /*cast*/false, tf_none); @@ -2160,7 +2161,7 @@ is_xible_helper (enum tree_code code, tree to, tree from, bool trivial) tree expr; if (code == MODIFY_EXPR) expr = assignable_expr (to, from); - else if (trivial && from && TREE_CHAIN (from) + else if (trivial && TREE_VEC_LENGTH (from) > 1 && cxx_dialect < cxx20) return error_mark_node; // only 0- and 1-argument ctors can be trivial // before C++20 aggregate paren init diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc index ee1497b..e5f032f 100644 --- a/gcc/cp/parser.cc +++ b/gcc/cp/parser.cc @@ -10993,8 +10993,8 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) if (kind == CPTK_TYPE_PACK_ELEMENT) { cp_parser_require (parser, CPP_COMMA, RT_COMMA); - tree rest = cp_parser_enclosed_template_argument_list (parser); - for (tree elt : tree_vec_range (rest)) + tree trailing = cp_parser_enclosed_template_argument_list (parser); + for (tree elt : tree_vec_range (trailing)) { if (!TYPE_P (elt)) { @@ -11003,9 +11003,8 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) "is not a type"); return error_mark_node; } - type2 = tree_cons (NULL_TREE, elt, type2); } - type2 = nreverse (type2); + type2 = trailing; } else if (binary) { @@ -11021,6 +11020,7 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) } else if (variadic) { + auto_vec trailing; while (cp_lexer_next_token_is (parser->lexer, CPP_COMMA)) { cp_lexer_consume_token (parser->lexer); @@ -11032,9 +11032,11 @@ cp_parser_trait (cp_parser* parser, enum rid keyword) } if (elt == error_mark_node) return error_mark_node; - type2 = tree_cons (NULL_TREE, elt, type2); + trailing.safe_push (elt); } - type2 = nreverse (type2); + type2 = make_tree_vec (trailing.length ()); + for (int i = 0; i < TREE_VEC_LENGTH (type2); ++i) + TREE_VEC_ELT (type2, i) = trailing[i]; } location_t finish_loc = cp_lexer_peek_token (parser->lexer)->location; diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index f65f2d5..d393c99 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -28065,19 +28065,18 @@ value_dependent_expression_p (tree expression) case TRAIT_EXPR: { - tree type2 = TRAIT_EXPR_TYPE2 (expression); - if (dependent_type_p (TRAIT_EXPR_TYPE1 (expression))) return true; + tree type2 = TRAIT_EXPR_TYPE2 (expression); if (!type2) return false; - if (TREE_CODE (type2) != TREE_LIST) + if (TREE_CODE (type2) != TREE_VEC) return dependent_type_p (type2); - for (; type2; type2 = TREE_CHAIN (type2)) - if (dependent_type_p (TREE_VALUE (type2))) + for (tree arg : tree_vec_range (type2)) + if (dependent_type_p (arg)) return true; return false; diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc index a4f30fd..9ba316a 100644 --- a/gcc/cp/semantics.cc +++ b/gcc/cp/semantics.cc @@ -4490,14 +4490,13 @@ finish_type_pack_element (tree idx, tree types, tsubst_flags_t complain) error ("%<__type_pack_element%> index is negative"); return error_mark_node; } - tree result = chain_index (val, types); - if (!result) + if (val >= TREE_VEC_LENGTH (types)) { if (complain & tf_error) error ("%<__type_pack_element%> index is out of range"); return error_mark_node; } - return TREE_VALUE (result); + return TREE_VEC_ELT (types, val); } /* Implement the __direct_bases keyword: Return the direct base classes @@ -12121,9 +12120,13 @@ check_trait_type (tree type, int kind = 1) if (type == NULL_TREE) return true; - if (TREE_CODE (type) == TREE_LIST) - return (check_trait_type (TREE_VALUE (type)) - && check_trait_type (TREE_CHAIN (type))); + if (TREE_CODE (type) == TREE_VEC) + { + for (tree arg : tree_vec_range (type)) + if (!check_trait_type (arg, kind)) + return false; + return true; + } if (kind == 1 && TREE_CODE (type) == ARRAY_TYPE && !TYPE_DOMAIN (type)) return true; // Array of unknown bound. Don't care about completeness. -- cgit v1.1 From afc7e20e793ce4071a7fe593ccebb2e6b2b070fa Mon Sep 17 00:00:00 2001 From: Patrick Palka Date: Thu, 20 Apr 2023 15:16:59 -0400 Subject: c++: simplify TEMPLATE_TYPE_PARM level lowering 1. Don't bother recursing when level lowering a cv-qualified type template parameter. 2. Get rid of the recursive loop breaker when level lowering a constrained auto, and enable the TEMPLATE_PARM_DESCENDANTS cache in this case too. This should be safe to do so now that we no longer substitute constraints on an auto. gcc/cp/ChangeLog: * pt.cc (tsubst) : Don't recurse when level lowering a cv-qualified type template parameter. Remove recursive loop breaker in the level lowering case for constrained autos. Use the TEMPLATE_PARM_DESCENDANTS cache in this case as well. --- gcc/cp/pt.cc | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'gcc') diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index d393c99..3e5f010 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -16228,33 +16228,24 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) /* If we get here, we must have been looking at a parm for a more deeply nested template. Make a new version of this template parameter, but with a lower level. */ + int quals; switch (code) { case TEMPLATE_TYPE_PARM: case TEMPLATE_TEMPLATE_PARM: - if (cp_type_quals (t)) + quals = cp_type_quals (t); + if (quals) { - r = tsubst (TYPE_MAIN_VARIANT (t), args, complain, in_decl); - r = cp_build_qualified_type - (r, cp_type_quals (t), - complain | (code == TEMPLATE_TYPE_PARM - ? tf_ignore_bad_quals : 0)); + gcc_checking_assert (code == TEMPLATE_TYPE_PARM); + t = TYPE_MAIN_VARIANT (t); } - else if (TREE_CODE (t) == TEMPLATE_TYPE_PARM - && PLACEHOLDER_TYPE_CONSTRAINTS_INFO (t) - && (r = (TEMPLATE_PARM_DESCENDANTS - (TEMPLATE_TYPE_PARM_INDEX (t)))) - && (r = TREE_TYPE (r)) - && !PLACEHOLDER_TYPE_CONSTRAINTS_INFO (r)) - /* Break infinite recursion when substituting the constraints - of a constrained placeholder. */; - else if (TREE_CODE (t) == TEMPLATE_TYPE_PARM - && !PLACEHOLDER_TYPE_CONSTRAINTS_INFO (t) - && (arg = TEMPLATE_TYPE_PARM_INDEX (t), - r = TEMPLATE_PARM_DESCENDANTS (arg)) - && (TEMPLATE_PARM_LEVEL (r) - == TEMPLATE_PARM_LEVEL (arg) - levels)) - /* Cache the simple case of lowering a type parameter. */ + + if (TREE_CODE (t) == TEMPLATE_TYPE_PARM + && (arg = TEMPLATE_TYPE_PARM_INDEX (t), + r = TEMPLATE_PARM_DESCENDANTS (arg)) + && (TEMPLATE_PARM_LEVEL (r) + == TEMPLATE_PARM_LEVEL (arg) - levels)) + /* Cache the simple case of lowering a type parameter. */ r = TREE_TYPE (r); else { @@ -16278,6 +16269,10 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl) else TYPE_CANONICAL (r) = canonical_type_parameter (r); } + + if (quals) + r = cp_build_qualified_type (r, quals, + complain | tf_ignore_bad_quals); break; case BOUND_TEMPLATE_TEMPLATE_PARM: -- cgit v1.1 From ce4e4f37fd12f4335c25f508f9fa40492a408556 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Fri, 21 Apr 2023 00:17:31 +0000 Subject: Daily bump. --- gcc/ChangeLog | 323 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/c/ChangeLog | 6 + gcc/cp/ChangeLog | 37 ++++++ gcc/testsuite/ChangeLog | 168 +++++++++++++++++++++++++ 5 files changed, 535 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 58c9a91..5845915 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,326 @@ +2023-04-20 Alejandro Colomar + + * doc/extend.texi (Common Function Attributes): Remove duplicate + word. + +2023-04-20 Andrew MacLeod + + PR tree-optimization/109564 + * gimple-range-fold.cc (fold_using_range::range_of_phi): Do no ignore + UNDEFINED range names when deciding if all PHI arguments are the same, + +2023-04-20 Jakub Jelinek + + PR tree-optimization/109011 + * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): Use + .CTZ (X) = .POPCOUNT ((X - 1) & ~X) in preference to + .CTZ (X) = PREC - .POPCOUNT (X | -X). + +2023-04-20 Vladimir N. Makarov + + * lra-constraints.cc (match_reload): Exclude some hard regs for + multi-reg inout reload pseudos used in asm in different mode. + +2023-04-20 Uros Bizjak + + * config/arm/arm.cc (thumb1_legitimate_address_p): + Use VIRTUAL_REGISTER_P predicate. + (arm_eliminable_register): Ditto. + * config/avr/avr.md (push_1): Ditto. + * config/bfin/predicates.md (register_no_elim_operand): Ditto. + * config/h8300/predicates.md (register_no_sp_elim_operand): Ditto. + * config/i386/predicates.md (register_no_elim_operand): Ditto. + * config/iq2000/predicates.md (call_insn_operand): Ditto. + * config/microblaze/microblaze.h (CALL_INSN_OP): Ditto. + +2023-04-20 Uros Bizjak + + PR target/78952 + * config/i386/predicates.md (extract_operator): New predicate. + * config/i386/i386.md (any_extract): Remove code iterator. + (*cmpqi_ext_1_mem_rex64): Use extract_operator predicate. + (*cmpqi_ext_1): Ditto. + (*cmpqi_ext_2): Ditto. + (*cmpqi_ext_3_mem_rex64): Ditto. + (*cmpqi_ext_3): Ditto. + (*cmpqi_ext_4): Ditto. + (*extzvqi_mem_rex64): Ditto. + (*extzvqi): Ditto. + (*insvqi_2): Ditto. + (*extendqi_ext_1): Ditto. + (*addqi_ext_0): Ditto. + (*addqi_ext_1): Ditto. + (*addqi_ext_2): Ditto. + (*subqi_ext_0): Ditto. + (*subqi_ext_2): Ditto. + (*testqi_ext_1): Ditto. + (*testqi_ext_2): Ditto. + (*andqi_ext_0): Ditto. + (*andqi_ext_1): Ditto. + (*andqi_ext_1_cc): Ditto. + (*andqi_ext_2): Ditto. + (*qi_ext_0): Ditto. + (*qi_ext_1): Ditto. + (*qi_ext_2): Ditto. + (*xorqi_ext_1_cc): Ditto. + (*negqi_ext_2): Ditto. + (*ashlqi_ext_2): Ditto. + (*qi_ext_2): Ditto. + +2023-04-20 Raphael Zinsly + + PR target/108248 + * config/riscv/bitmanip.md (clz, ctz, pcnt, min, max patterns): Use + as the type to allow for fine grained control of + scheduling these insns. + * config/riscv/generic.md (generic_alu): Add bitmanip, clz, ctz, pcnt, + min, max. + * config/riscv/riscv.md (type attribute): Add types for clz, ctz, + pcnt, signed and unsigned min/max. + +2023-04-20 Juzhe-Zhong + kito-cheng + + * config/riscv/riscv.h (enum reg_class): Fix RVV register order. + +2023-04-20 Ju-Zhe Zhong + kito-cheng + + PR target/109535 + * config/riscv/riscv-vsetvl.cc (count_regno_occurrences): New function. + (pass_vsetvl::cleanup_insns): Fix bug. + +2023-04-20 Andrew Stubbs + + * config/gcn/gcn-valu.md (vnsi, VnSI): Add scalar modes. + (ldexp3): Delete. + (ldexp3): Change "B" to "A". + +2023-04-20 Jakub Jelinek + Jonathan Wakely + + * tree.h (built_in_function_equal_p): New helper function. + (fndecl_built_in_p): Turn into variadic template to support + 1 or more built_in_function arguments. + * builtins.cc (fold_builtin_expect): Use 3 argument fndecl_built_in_p. + * gimplify.cc (goa_stabilize_expr): Likewise. + * cgraphclones.cc (cgraph_node::create_clone): Likewise. + * ipa-fnsummary.cc (compute_fn_summary): Likewise. + * omp-low.cc (setjmp_or_longjmp_p): Likewise. + * cgraph.cc (cgraph_edge::redirect_call_stmt_to_callee, + cgraph_update_edges_for_call_stmt_node, + cgraph_edge::verify_corresponds_to_fndecl, + cgraph_node::verify_node): Likewise. + * tree-stdarg.cc (optimize_va_list_gpr_fpr_size): Likewise. + * gimple-ssa-warn-access.cc (matching_alloc_calls_p): Likewise. + * ipa-prop.cc (try_make_edge_direct_virtual_call): Likewise. + +2023-04-20 Jakub Jelinek + + PR tree-optimization/109011 + * tree-vect-patterns.cc (vect_recog_ctz_ffs_pattern): New function. + (vect_recog_popcount_clz_ctz_ffs_pattern): Move vect_pattern_detected + call later. Don't punt for IFN_CTZ or IFN_FFS if it doesn't have + direct optab support, but has instead IFN_CLZ, IFN_POPCOUNT or + for IFN_FFS IFN_CTZ support, use vect_recog_ctz_ffs_pattern for that + case. + (vect_vect_recog_func_ptrs): Add ctz_ffs entry. + +2023-04-20 Richard Biener + + * df-core.cc (rest_of_handle_df_initialize): Remove + computation of df->postorder, df->postorder_inverted and + df->n_blocks. + +2023-04-20 Haochen Jiang + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA2_AVX_UNSET): Add OPTION_MASK_ISA2_VAES_UNSET. + (ix86_handle_option): Set AVX flag for VAES. + * config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins): + Add OPTION_MASK_ISA2_VAES_UNSET. + (def_builtin): Share builtin between AES and VAES. + * config/i386/i386-expand.cc (ix86_check_builtin_isa_match): + Ditto. + * config/i386/i386.md (aes): New isa attribute. + * config/i386/sse.md (aesenc): Add pattern for VAES with xmm. + (aesenclast): Ditto. + (aesdec): Ditto. + (aesdeclast): Ditto. + * config/i386/vaesintrin.h: Remove redundant avx target push. + * config/i386/wmmintrin.h (_mm_aesdec_si128): Change to macro. + (_mm_aesdeclast_si128): Ditto. + (_mm_aesenc_si128): Ditto. + (_mm_aesenclast_si128): Ditto. + +2023-04-20 Hu, Lin1 + + * config/i386/avx2intrin.h + (_MM_REDUCE_OPERATOR_BASIC_EPI16): New macro. + (_MM_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto. + (_MM256_REDUCE_OPERATOR_BASIC_EPI16): Ditto. + (_MM256_REDUCE_OPERATOR_MAX_MIN_EP16): Ditto. + (_MM_REDUCE_OPERATOR_BASIC_EPI8): Ditto. + (_MM_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto. + (_MM256_REDUCE_OPERATOR_BASIC_EPI8): Ditto. + (_MM256_REDUCE_OPERATOR_MAX_MIN_EP8): Ditto. + (_mm_reduce_add_epi16): New instrinsics. + (_mm_reduce_mul_epi16): Ditto. + (_mm_reduce_and_epi16): Ditto. + (_mm_reduce_or_epi16): Ditto. + (_mm_reduce_max_epi16): Ditto. + (_mm_reduce_max_epu16): Ditto. + (_mm_reduce_min_epi16): Ditto. + (_mm_reduce_min_epu16): Ditto. + (_mm256_reduce_add_epi16): Ditto. + (_mm256_reduce_mul_epi16): Ditto. + (_mm256_reduce_and_epi16): Ditto. + (_mm256_reduce_or_epi16): Ditto. + (_mm256_reduce_max_epi16): Ditto. + (_mm256_reduce_max_epu16): Ditto. + (_mm256_reduce_min_epi16): Ditto. + (_mm256_reduce_min_epu16): Ditto. + (_mm_reduce_add_epi8): Ditto. + (_mm_reduce_mul_epi8): Ditto. + (_mm_reduce_and_epi8): Ditto. + (_mm_reduce_or_epi8): Ditto. + (_mm_reduce_max_epi8): Ditto. + (_mm_reduce_max_epu8): Ditto. + (_mm_reduce_min_epi8): Ditto. + (_mm_reduce_min_epu8): Ditto. + (_mm256_reduce_add_epi8): Ditto. + (_mm256_reduce_mul_epi8): Ditto. + (_mm256_reduce_and_epi8): Ditto. + (_mm256_reduce_or_epi8): Ditto. + (_mm256_reduce_max_epi8): Ditto. + (_mm256_reduce_max_epu8): Ditto. + (_mm256_reduce_min_epi8): Ditto. + (_mm256_reduce_min_epu8): Ditto. + * config/i386/avx512vlbwintrin.h: + (_mm_mask_reduce_add_epi16): Ditto. + (_mm_mask_reduce_mul_epi16): Ditto. + (_mm_mask_reduce_and_epi16): Ditto. + (_mm_mask_reduce_or_epi16): Ditto. + (_mm_mask_reduce_max_epi16): Ditto. + (_mm_mask_reduce_max_epu16): Ditto. + (_mm_mask_reduce_min_epi16): Ditto. + (_mm_mask_reduce_min_epu16): Ditto. + (_mm256_mask_reduce_add_epi16): Ditto. + (_mm256_mask_reduce_mul_epi16): Ditto. + (_mm256_mask_reduce_and_epi16): Ditto. + (_mm256_mask_reduce_or_epi16): Ditto. + (_mm256_mask_reduce_max_epi16): Ditto. + (_mm256_mask_reduce_max_epu16): Ditto. + (_mm256_mask_reduce_min_epi16): Ditto. + (_mm256_mask_reduce_min_epu16): Ditto. + (_mm_mask_reduce_add_epi8): Ditto. + (_mm_mask_reduce_mul_epi8): Ditto. + (_mm_mask_reduce_and_epi8): Ditto. + (_mm_mask_reduce_or_epi8): Ditto. + (_mm_mask_reduce_max_epi8): Ditto. + (_mm_mask_reduce_max_epu8): Ditto. + (_mm_mask_reduce_min_epi8): Ditto. + (_mm_mask_reduce_min_epu8): Ditto. + (_mm256_mask_reduce_add_epi8): Ditto. + (_mm256_mask_reduce_mul_epi8): Ditto. + (_mm256_mask_reduce_and_epi8): Ditto. + (_mm256_mask_reduce_or_epi8): Ditto. + (_mm256_mask_reduce_max_epi8): Ditto. + (_mm256_mask_reduce_max_epu8): Ditto. + (_mm256_mask_reduce_min_epi8): Ditto. + (_mm256_mask_reduce_min_epu8): Ditto. + +2023-04-20 Haochen Jiang + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA_VPCLMULQDQ_SET): + Add OPTION_MASK_ISA_PCLMUL_SET and OPTION_MASK_ISA_AVX_SET. + (OPTION_MASK_ISA_AVX_UNSET): + Add OPTION_MASK_ISA_VPCLMULQDQ_UNSET. + (OPTION_MASK_ISA_PCLMUL_UNSET): Ditto. + * config/i386/i386.md (vpclmulqdqvl): New. + * config/i386/sse.md (pclmulqdq): Add evex encoding. + * config/i386/vpclmulqdqintrin.h: Remove redudant avx target + push. + +2023-04-20 Haochen Jiang + + * config/i386/avx512vlbwintrin.h + (_mm_mask_blend_epi16): Remove __OPTIMIZE__ wrapper. + (_mm_mask_blend_epi8): Ditto. + (_mm256_mask_blend_epi16): Ditto. + (_mm256_mask_blend_epi8): Ditto. + * config/i386/avx512vlintrin.h + (_mm256_mask_blend_pd): Ditto. + (_mm256_mask_blend_ps): Ditto. + (_mm256_mask_blend_epi64): Ditto. + (_mm256_mask_blend_epi32): Ditto. + (_mm_mask_blend_pd): Ditto. + (_mm_mask_blend_ps): Ditto. + (_mm_mask_blend_epi64): Ditto. + (_mm_mask_blend_epi32): Ditto. + * config/i386/sse.md (VF_AVX512BWHFBF16): Removed. + (VF_AVX512HFBFVL): Move it before the first usage. + (_blendm): Change iterator from VF_AVX512BWHFBF16 + to VF_AVX512HFBFVL. + +2023-04-20 Haochen Jiang + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA_AVX512VBMI2_SET): Change OPTION_MASK_ISA_AVX512F_SET + to OPTION_MASK_ISA_AVX512BW_SET. + (OPTION_MASK_ISA_AVX512F_UNSET): + Remove OPTION_MASK_ISA_AVX512VBMI2_UNSET. + (OPTION_MASK_ISA_AVX512BW_UNSET): + Add OPTION_MASK_ISA_AVX512VBMI2_UNSET. + * config/i386/avx512vbmi2intrin.h: Do not push avx512bw. + * config/i386/avx512vbmi2vlintrin.h: Ditto. + * config/i386/i386-builtin.def: Remove OPTION_MASK_ISA_AVX512BW. + * config/i386/sse.md (VI12_AVX512VLBW): Removed. + (VI12_VI48F_AVX512VLBW): Rename to VI12_VI48F_AVX512VL. + (compress_mask): Change iterator from VI12_AVX512VLBW to + VI12_AVX512VL. + (compressstore_mask): Ditto. + (expand_mask): Ditto. + (expand_maskz): Ditto. + (*expand_mask): Change iterator from VI12_VI48F_AVX512VLBW to + VI12_VI48F_AVX512VL. + +2023-04-20 Haochen Jiang + + * common/config/i386/i386-common.cc + (OPTION_MASK_ISA_AVX512BITALG_SET): + Change OPTION_MASK_ISA_AVX512F_SET + to OPTION_MASK_ISA_AVX512BW_SET. + (OPTION_MASK_ISA_AVX512F_UNSET): + Remove OPTION_MASK_ISA_AVX512BITALG_SET. + (OPTION_MASK_ISA_AVX512BW_UNSET): + Add OPTION_MASK_ISA_AVX512BITALG_SET. + * config/i386/avx512bitalgintrin.h: Do not push avx512bw. + * config/i386/i386-builtin.def: + Remove redundant OPTION_MASK_ISA_AVX512BW. + * config/i386/sse.md (VI1_AVX512VLBW): Removed. + (avx512vl_vpshufbitqmb): + Change the iterator from VI1_AVX512VLBW to VI1_AVX512VL. + +2023-04-20 Haochen Jiang + + * config/i386/i386-expand.cc + (ix86_check_builtin_isa_match): Correct wrong comments. + Add a new macro SHARE_BUILTIN and refactor the current if + clauses to macro. + +2023-04-20 Mo, Zewei + + * config/i386/cpuid.h: Open a new section for Extended Features + Leaf (%eax == 7, %ecx == 0) and Extended Features Sub-leaf (%eax == 7, + %ecx == 1). + +2023-04-20 Hu, Lin1 + + * config/i386/sse.md: Modify insn vperm{i,f} + and vshuf{i,f}. + 2023-04-19 Max Filippov * config/xtensa/xtensa-opts.h: New header. diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 758629b..1bf412b 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20230420 +20230421 diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index f23b3ba..fc9b66d 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2023-04-20 Jakub Jelinek + + PR c/107041 + * c-decl.cc (diagnose_mismatched_decls): Avoid -Wenum-int-mismatch + warning on acc_on_device declaration. + 2023-03-28 David Malcolm PR c/107002 diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index ee08b14..d8da7f0 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,40 @@ +2023-04-20 Patrick Palka + + * pt.cc (tsubst) : Don't recurse when + level lowering a cv-qualified type template parameter. Remove + recursive loop breaker in the level lowering case for constrained + autos. Use the TEMPLATE_PARM_DESCENDANTS cache in this case as + well. + +2023-04-20 Patrick Palka + + * constraint.cc (diagnose_trait_expr): Convert a TREE_VEC + of arguments into a TREE_LIST for sake of pretty printing. + * cxx-pretty-print.cc (pp_cxx_trait): Handle TREE_VEC + instead of TREE_LIST of trailing variadic trait arguments. + * method.cc (constructible_expr): Likewise. + (is_xible_helper): Likewise. + * parser.cc (cp_parser_trait): Represent trailing variadic trait + arguments as a TREE_VEC instead of TREE_LIST. + * pt.cc (value_dependent_expression_p): Handle TREE_VEC + instead of TREE_LIST of trailing variadic trait arguments. + * semantics.cc (finish_type_pack_element): Likewise. + (check_trait_type): Likewise. + +2023-04-20 Patrick Palka + + * tree.cc (strip_typedefs): Move TREE_LIST handling to + strip_typedefs_expr. Dispatch to strip_typedefs_expr for + non-type 't'. + : Remove manual dispatching to + strip_typedefs_expr. + : Likewise. + (strip_typedefs_expr): Replaces calls to strip_typedefs_expr + with strip_typedefs throughout. Don't dispatch to strip_typedefs + for type 't'. + : Replace this with the better version from + strip_typedefs. + 2023-04-19 Patrick Palka Jonathan Wakely diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4fa4c51..353747f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,171 @@ +2023-04-20 Andrew MacLeod + + PR tree-optimization/109564 + * gcc.dg/torture/pr109564-1.c: New testcase. + * gcc.dg/torture/pr109564-2.c: Likewise. + * gcc.dg/tree-ssa/evrp-ignore.c: XFAIL. + * gcc.dg/tree-ssa/vrp06.c: Likewise. + +2023-04-20 Jakub Jelinek + + PR c/107041 + * gcc.dg/goacc/pr107041.c: New test. + +2023-04-20 Uros Bizjak + + PR target/78952 + * gcc.target/i386/pr78952-4.c: New test. + +2023-04-20 Juzhe-Zhong + kito-cheng + + * gcc.target/riscv/rvv/base/spill-4.c: Adapt testcase. + * gcc.target/riscv/rvv/base/spill-6.c: Adapt testcase. + * gcc.target/riscv/rvv/base/reg_order-1.c: New test. + +2023-04-20 Kito Cheng + + * gcc.target/riscv/arch-19.c: Add -misa-spec. + +2023-04-20 Ju-Zhe Zhong + kito-cheng + + PR target/109535 + * g++.target/riscv/rvv/base/pr109535.C: New test. + * gcc.target/riscv/rvv/base/pr109535.c: New test. + +2023-04-20 Kito Cheng + + * gcc.target/riscv/simplify_ior_optimization.c: Use stdint-gcc.h + rather than stdint.h + +2023-04-20 Andrew Stubbs + + * lib/target-supports.exp + (check_effective_target_vect_call_copysignf): Add amdgcn. + (check_effective_target_vect_call_sqrtf): Add amdgcn. + (check_effective_target_vect_call_ceilf): Add amdgcn. + (check_effective_target_vect_call_floor): Add amdgcn. + (check_effective_target_vect_logical_reduc): Add amdgcn. + +2023-04-20 Jakub Jelinek + + PR tree-optimization/109011 + * gcc.dg/vect/pr109011-1.c: Remove -mpower9-vector from + dg-additional-options. + (baz, qux): Remove functions and corresponding dg-final. + * gcc.dg/vect/pr109011-2.c: New test. + * gcc.dg/vect/pr109011-3.c: New test. + * gcc.dg/vect/pr109011-4.c: New test. + * gcc.dg/vect/pr109011-5.c: New test. + +2023-04-20 Jakub Jelinek + + PR c++/108099 + PR testsuite/109560 + * g++.dg/ext/int128-8.C: Require int128 effective target. + +2023-04-20 Jiufu Guo + + PR testsuite/106879 + * gcc.dg/vect/bb-slp-layout-19.c: Modify to guard the check with + vect_hw_misalign on POWERs. + +2023-04-20 Haochen Jiang + + * gcc.target/i386/avx512fvl-vaes-1.c: Add VAES xmm test. + * gcc.target/i386/pr109117-1.c: Modify error message. + +2023-04-20 Hu, Lin1 + + * gcc.target/i386/avx512vlbw-reduce-op-1.c: New test. + +2023-04-20 Haochen Jiang + + * gcc.target/i386/vpclmulqdq.c: Add compile test for xmm. + +2023-04-20 Haochen Jiang + + * gcc.target/i386/avx512bw-pr100267-1.c: Remove avx512f and avx512bw. + * gcc.target/i386/avx512bw-pr100267-b-2.c: Ditto. + * gcc.target/i386/avx512bw-pr100267-d-2.c: Ditto. + * gcc.target/i386/avx512bw-pr100267-q-2.c: Ditto. + * gcc.target/i386/avx512bw-pr100267-w-2.c: Ditto. + * gcc.target/i386/avx512f-vpcompressb-1.c: Ditto. + * gcc.target/i386/avx512f-vpcompressb-2.c: Ditto. + * gcc.target/i386/avx512f-vpcompressw-1.c: Ditto. + * gcc.target/i386/avx512f-vpcompressw-2.c: Ditto. + * gcc.target/i386/avx512f-vpexpandb-1.c: Ditto. + * gcc.target/i386/avx512f-vpexpandb-2.c: Ditto. + * gcc.target/i386/avx512f-vpexpandw-1.c: Ditto. + * gcc.target/i386/avx512f-vpexpandw-2.c: Ditto. + * gcc.target/i386/avx512f-vpshld-1.c: Ditto. + * gcc.target/i386/avx512f-vpshldd-2.c: Ditto. + * gcc.target/i386/avx512f-vpshldq-2.c: Ditto. + * gcc.target/i386/avx512f-vpshldv-1.c: Ditto. + * gcc.target/i386/avx512f-vpshldvd-2.c: Ditto. + * gcc.target/i386/avx512f-vpshldvq-2.c: Ditto. + * gcc.target/i386/avx512f-vpshldvw-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdd-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdq-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdv-1.c: Ditto. + * gcc.target/i386/avx512f-vpshrdvd-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdvq-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdvw-2.c: Ditto. + * gcc.target/i386/avx512f-vpshrdw-2.c: Ditto. + * gcc.target/i386/avx512vbmi2-vpshld-1.c: Ditto. + * gcc.target/i386/avx512vbmi2-vpshrd-1.c: Ditto. + * gcc.target/i386/avx512vl-vpcompressb-1.c: Ditto. + * gcc.target/i386/avx512vl-vpcompressb-2.c: Ditto. + * gcc.target/i386/avx512vl-vpcompressw-2.c: Ditto. + * gcc.target/i386/avx512vl-vpexpandb-1.c: Ditto. + * gcc.target/i386/avx512vl-vpexpandb-2.c: Ditto. + * gcc.target/i386/avx512vl-vpexpandw-1.c: Ditto. + * gcc.target/i386/avx512vl-vpexpandw-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshldd-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshldq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshldv-1.c: Ditto. + * gcc.target/i386/avx512vl-vpshldvd-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshldvq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshldvw-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdd-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdv-1.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdvd-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdvq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdvw-2.c: Ditto. + * gcc.target/i386/avx512vl-vpshrdw-2.c: Ditto. + * gcc.target/i386/avx512vlbw-pr100267-1.c: Ditto. + * gcc.target/i386/avx512vlbw-pr100267-b-2.c: Ditto. + * gcc.target/i386/avx512vlbw-pr100267-w-2.c: Ditto. + +2023-04-20 Haochen Jiang + + * gcc.target/i386/avx512bitalg-vpopcntb-1.c: + Remove avx512bw. + * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. + * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. + * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. + * gcc.target/i386/pr93696-1.c: Ditto. + * gcc.target/i386/pr93696-2.c: Ditto. + +2023-04-20 Hu, Lin1 + + * gcc.target/i386/avx512vl-vshuff32x4-1.c: Modify test. + * gcc.target/i386/avx512vl-vshuff64x2-1.c: Ditto. + * gcc.target/i386/avx512vl-vshufi32x4-1.c: Ditto. + * gcc.target/i386/avx512vl-vshufi64x2-1.c: Ditto. + * gcc.target/i386/opt-vperm-vshuf-1.c: New test. + * gcc.target/i386/opt-vperm-vshuf-2.c: Ditto. + * gcc.target/i386/opt-vperm-vshuf-3.c: Ditto. + 2023-04-19 Patrick Palka Jonathan Wakely -- cgit v1.1 From d51f2456ee51bd59a79b4725ca0e488c25260bbf Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Fri, 7 Apr 2023 09:34:13 +0800 Subject: RISC-V: Add local user vsetvl instruction elimination [PR109547] This patch is to enhance optimization for auto-vectorization. Before this patch: Loop: vsetvl a5,a2... vsetvl zero,a5... vle After this patch: Loop: vsetvl a5,a2 vle gcc/ChangeLog: PR target/109547 * config/riscv/riscv-vsetvl.cc (local_eliminate_vsetvl_insn): New function. (vector_insn_info::skip_avl_compatible_p): Ditto. (vector_insn_info::merge): Remove default value. (pass_vsetvl::compute_local_backward_infos): Ditto. (pass_vsetvl::cleanup_insns): Add local vsetvl elimination. * config/riscv/riscv-vsetvl.h: Ditto. gcc/testsuite/ChangeLog: PR target/109547 * gcc.target/riscv/rvv/vsetvl/pr109547.c: New. * gcc.target/riscv/rvv/vsetvl/vsetvl-17.c: Update scan condition. --- gcc/config/riscv/riscv-vsetvl.cc | 71 +++++++++++++++++++++- gcc/config/riscv/riscv-vsetvl.h | 1 + .../gcc.target/riscv/rvv/vsetvl/pr109547.c | 14 +++++ .../gcc.target/riscv/rvv/vsetvl/vsetvl-17.c | 2 +- 4 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109547.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 9c356ce..2406931 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -1054,6 +1054,51 @@ change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) change_insn (rinsn, new_pat); } +static void +local_eliminate_vsetvl_insn (const vector_insn_info &dem) +{ + const insn_info *insn = dem.get_insn (); + if (!insn || insn->is_artificial ()) + return; + rtx_insn *rinsn = insn->rtl (); + const bb_info *bb = insn->bb (); + if (vsetvl_insn_p (rinsn)) + { + rtx vl = get_vl (rinsn); + for (insn_info *i = insn->next_nondebug_insn (); + real_insn_and_same_bb_p (i, bb); i = i->next_nondebug_insn ()) + { + if (i->is_call () || i->is_asm () + || find_access (i->defs (), VL_REGNUM) + || find_access (i->defs (), VTYPE_REGNUM)) + return; + + if (has_vtype_op (i->rtl ())) + { + if (!vsetvl_discard_result_insn_p (PREV_INSN (i->rtl ()))) + return; + rtx avl = get_avl (i->rtl ()); + if (avl != vl) + return; + set_info *def = find_access (i->uses (), REGNO (avl))->def (); + if (def->insn () != insn) + return; + + vector_insn_info new_info; + new_info.parse_insn (i); + if (!new_info.skip_avl_compatible_p (dem)) + return; + + new_info.set_avl_info (dem.get_avl_info ()); + new_info = dem.merge (new_info, LOCAL_MERGE); + change_vsetvl_insn (insn, new_info); + eliminate_insn (PREV_INSN (i->rtl ())); + return; + } + } + } +} + static bool source_equal_p (insn_info *insn1, insn_info *insn2) { @@ -1997,6 +2042,19 @@ vector_insn_info::compatible_p (const vector_insn_info &other) const } bool +vector_insn_info::skip_avl_compatible_p (const vector_insn_info &other) const +{ + gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () + && "Can't compare invalid demanded infos"); + unsigned array_size = sizeof (incompatible_conds) / sizeof (demands_cond); + /* Bypass AVL incompatible cases. */ + for (unsigned i = 1; i < array_size; i++) + if (incompatible_conds[i].dual_incompatible_p (*this, other)) + return false; + return true; +} + +bool vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const { gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); @@ -2190,7 +2248,7 @@ vector_insn_info::fuse_mask_policy (const vector_insn_info &info1, vector_insn_info vector_insn_info::merge (const vector_insn_info &merge_info, - enum merge_type type = LOCAL_MERGE) const + enum merge_type type) const { if (!vsetvl_insn_p (get_insn ()->rtl ())) gcc_assert (this->compatible_p (merge_info) @@ -2696,7 +2754,7 @@ pass_vsetvl::compute_local_backward_infos (const bb_info *bb) && !reg_available_p (insn, change)) && change.compatible_p (info)) { - info = change.merge (info); + info = change.merge (info, LOCAL_MERGE); /* Fix PR109399, we should update user vsetvl instruction if there is a change in demand fusion. */ if (vsetvl_insn_p (insn->rtl ())) @@ -3925,6 +3983,15 @@ pass_vsetvl::cleanup_insns (void) const for (insn_info *insn : bb->real_nondebug_insns ()) { rtx_insn *rinsn = insn->rtl (); + const auto &dem = m_vector_manager->vector_insn_infos[insn->uid ()]; + /* Eliminate local vsetvl: + bb 0: + vsetvl a5,a6,... + vsetvl zero,a5. + + Eliminate vsetvl in bb2 when a5 is only coming from + bb 0. */ + local_eliminate_vsetvl_insn (dem); if (vlmax_avl_insn_p (rinsn)) { diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h index 237381f..4fe08cf 100644 --- a/gcc/config/riscv/riscv-vsetvl.h +++ b/gcc/config/riscv/riscv-vsetvl.h @@ -380,6 +380,7 @@ public: void fuse_mask_policy (const vector_insn_info &, const vector_insn_info &); bool compatible_p (const vector_insn_info &) const; + bool skip_avl_compatible_p (const vector_insn_info &) const; bool compatible_avl_p (const vl_vtype_info &) const; bool compatible_avl_p (const avl_info &) const; bool compatible_vtype_p (const vl_vtype_info &) const; diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109547.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109547.c new file mode 100644 index 0000000..88dd877 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr109547.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64d -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "riscv_vector.h" + +void func(unsigned char *out, unsigned char *in, unsigned long len) { + unsigned long i = 0; + while (i < len) { + unsigned long vl = __riscv_vsetvl_e8m1(len - i); + vuint8m1_t r = __riscv_vle8_v_u8m1(in + i, vl); + __riscv_vse8_v_u8m1(out + i, r, vl); + i += vl; + } +/* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-17.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-17.c index ee58f9b..8a1bbb4 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-17.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/vsetvl-17.c @@ -11,4 +11,4 @@ void foo(int32_t *in1, int32_t *in2, int32_t *in3, int32_t *out, size_t n, int c __riscv_vse32_v_i32m1(out, c, __riscv_vsetvl_e8mf2 (vl)); } -/* { dg-final { scan-assembler-times {vsetvli} 8 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ \ No newline at end of file +/* { dg-final { scan-assembler-times {vsetvli} 7 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ -- cgit v1.1 From a80c68a08604b0ac625ac7fc59eae40b551b1176 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 19 Apr 2023 16:23:42 +0800 Subject: LoongArch: Fix MUSL_DYNAMIC_LINKER The system based on musl has no '/lib64', so change it. https://wiki.musl-libc.org/guidelines-for-distributions.html, "Multilib/multi-arch" section of this introduces it. gcc/ * config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER): Redefine. Signed-off-by: Peng Fan Suggested-by: Xi Ruoyao --- gcc/config/loongarch/gnu-user.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h index aecaa02..fa1a521 100644 --- a/gcc/config/loongarch/gnu-user.h +++ b/gcc/config/loongarch/gnu-user.h @@ -33,9 +33,14 @@ along with GCC; see the file COPYING3. If not see #define GLIBC_DYNAMIC_LINKER \ "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1" +#define MUSL_ABI_SPEC \ + "%{mabi=lp64d:-lp64d}" \ + "%{mabi=lp64f:-lp64f}" \ + "%{mabi=lp64s:-lp64s}" + #undef MUSL_DYNAMIC_LINKER #define MUSL_DYNAMIC_LINKER \ - "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1" + "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1" #undef GNU_USER_TARGET_LINK_SPEC #define GNU_USER_TARGET_LINK_SPEC \ -- cgit v1.1 From a322f37a57bc164d9ab8445079655afc533ddae9 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Thu, 20 Apr 2023 13:56:21 +0200 Subject: Fix LCM dataflow CFG order The following fixes the initial order the LCM dataflow routines process BBs. For a forward problem you want reverse postorder, for a backward problem you want reverse postorder on the inverted graph. The LCM iteration has very many other issues but this allows to turn inverted_post_order_compute into computing a reverse postorder more easily. * lcm.cc (compute_antinout_edge): Use RPO on the inverted graph. (compute_laterin): Use RPO. (compute_available): Likewise. --- gcc/lcm.cc | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-) (limited to 'gcc') diff --git a/gcc/lcm.cc b/gcc/lcm.cc index d7a86c7..5adb4eb 100644 --- a/gcc/lcm.cc +++ b/gcc/lcm.cc @@ -99,16 +99,20 @@ compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, bitmap_vector_ones (antin, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the - optimistic initialization of ANTIN above. */ - int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); - int postorder_num = post_order_compute (postorder, false, false); - for (int i = 0; i < postorder_num; ++i) + optimistic initialization of ANTIN above. Use reverse postorder + on the inverted graph to make the backward dataflow problem require + less iterations. */ + auto_vec postorder; + inverted_post_order_compute (&postorder); + for (int i = postorder.length () - 1; i >= 0; --i) { bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); + if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) + || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) + continue; *qin++ = bb; bb->aux = bb; } - free (postorder); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; @@ -270,17 +274,15 @@ compute_laterin (struct edge_list *edge_list, sbitmap *earliest, /* Add all the blocks to the worklist. This prevents an early exit from the loop given our optimistic initialization of LATER above. */ - auto_vec postorder; - inverted_post_order_compute (&postorder); - for (unsigned int i = 0; i < postorder.length (); ++i) + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); + int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); + for (int i = 0; i < n; ++i) { - bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); - if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) - || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) - continue; + bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); *qin++ = bb; bb->aux = bb; } + free (rpo); /* Note that we do not use the last allocated element for our queue, as EXIT_BLOCK is never inserted into it. */ @@ -298,13 +300,14 @@ compute_laterin (struct edge_list *edge_list, sbitmap *earliest, if (qout >= qend) qout = worklist; - /* Compute the intersection of LATERIN for each incoming edge to B. */ + /* Compute LATERIN as the intersection of LATER for each incoming + edge to BB. */ bitmap_ones (laterin[bb->index]); FOR_EACH_EDGE (e, ei, bb->preds) bitmap_and (laterin[bb->index], laterin[bb->index], later[(size_t)e->aux]); - /* Calculate LATER for all outgoing edges. */ + /* Calculate LATER for all outgoing edges of BB. */ FOR_EACH_EDGE (e, ei, bb->succs) if (bitmap_ior_and_compl (later[(size_t) e->aux], earliest[(size_t) e->aux], @@ -509,19 +512,17 @@ compute_available (sbitmap *avloc, sbitmap *kill, sbitmap *avout, bitmap_vector_ones (avout, last_basic_block_for_fn (cfun)); /* Put every block on the worklist; this is necessary because of the - optimistic initialization of AVOUT above. Use inverted postorder - to make the dataflow problem require less iterations. */ - auto_vec postorder; - inverted_post_order_compute (&postorder); - for (unsigned int i = 0; i < postorder.length (); ++i) + optimistic initialization of AVOUT above. Use reverse postorder + to make the forward dataflow problem require less iterations. */ + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS); + int n = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, false); + for (int i = 0; i < n; ++i) { - bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); - if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) - || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) - continue; + bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); *qin++ = bb; bb->aux = bb; } + free (rpo); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; -- cgit v1.1 From 4dca6024fb8254117bc1b0ea005a92ee6a7b84be Mon Sep 17 00:00:00 2001 From: Haochen Gui Date: Fri, 21 Apr 2023 16:35:07 +0800 Subject: testsuite: make ppc_cpu_supports_hw as effective target keyword [PR108728] gcc/testsuite/ PR target/108728 * lib/target-supports.exp (is-effective-target-keyword): Add ppc_cpu_supports_hw. --- gcc/testsuite/lib/target-supports.exp | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 868e2c4..95cbb1a 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9175,6 +9175,7 @@ proc is-effective-target-keyword { arg } { "named_sections" { return 1 } "gc_sections" { return 1 } "cxa_atexit" { return 1 } + "ppc_cpu_supports_hw" { return 1 } default { return 0 } } } -- cgit v1.1 From 6afa7d31a0e8865e17b317ada5cc5014b5d07da3 Mon Sep 17 00:00:00 2001 From: Haochen Gui Date: Fri, 21 Apr 2023 16:42:31 +0800 Subject: rs6000: xfail float128 comparison test case that fails on powerpc64. This patch xfails a float128 comparison test case on powerpc64 that fails due to a longstanding issue with floating-point compares. See PR58684 for more information. When float128 hardware is enabled (-mfloat128-hardware), xscmpuqp is generated for comparison which is unexpected. When float128 software emulation is enabled (-mno-float128-hardware), we still have to xfail the hardware version (__lekf2_hw) which finally generates xscmpuqp. gcc/testsuite/ PR target/108728 * gcc.dg/torture/float128-cmp-invalid.c: Add xfail. --- gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c | 1 + 1 file changed, 1 insertion(+) (limited to 'gcc') diff --git a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c index 1f675ef..a86592b 100644 --- a/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c +++ b/gcc/testsuite/gcc.dg/torture/float128-cmp-invalid.c @@ -1,5 +1,6 @@ /* Test for "invalid" exceptions from __float128 comparisons. */ /* { dg-do run } */ +/* { dg-xfail-run-if "xfail for ppc float128_hw" { ppc_float128_hw || { ppc_cpu_supports_hw && p9vector_hw } } } */ /* { dg-options "" } */ /* { dg-require-effective-target __float128 } */ /* { dg-require-effective-target base_quadfloat_support } */ -- cgit v1.1 From 98d66b204932e343bbf940990914b949e8fccbd5 Mon Sep 17 00:00:00 2001 From: Robin Dapp Date: Fri, 21 Apr 2023 09:38:06 +0200 Subject: riscv: Fix fallout. PR109582: Since r14-116 generic.md uses standard names instead of the types defined in the iterator (that match instruction names). Change this. gcc/ChangeLog: PR target/109582 * config/riscv/generic.md: Change standard names to insn names. --- gcc/config/riscv/generic.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'gcc') diff --git a/gcc/config/riscv/generic.md b/gcc/config/riscv/generic.md index db4fabb..2c33766 100644 --- a/gcc/config/riscv/generic.md +++ b/gcc/config/riscv/generic.md @@ -27,7 +27,7 @@ (define_insn_reservation "generic_alu" 1 (and (eq_attr "tune" "generic") - (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,smin,smax,umin,umax,clz,ctz,cpop")) + (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,move,bitmanip,min,max,minu,maxu,clz,ctz,cpop")) "alu") (define_insn_reservation "generic_load" 3 -- cgit v1.1 From d06e9264b0192c2c77e07d7fb0fe090efcb510c0 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Fri, 21 Apr 2023 17:19:12 +0800 Subject: RISC-V: Defer vsetvli insertion to later if possible [PR108270] Fix issue: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108270. Consider the following testcase: void f (void * restrict in, void * restrict out, int l, int n, int m) { for (int i = 0; i < l; i++){ for (int j = 0; j < m; j++){ for (int k = 0; k < n; k++) { vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, 17); __riscv_vse8_v_i8mf8 (out + i + j, v, 17); } } } } Compile option: -O3 Before this patch: mv a7,a2 mv a6,a0 mv t1,a1 mv a2,a3 vsetivli zero,17,e8,mf8,ta,ma ble a7,zero,.L1 ble a4,zero,.L1 ble a3,zero,.L1 ... After this patch: mv a7,a2 mv a6,a0 mv t1,a1 mv a2,a3 ble a7,zero,.L1 ble a4,zero,.L1 ble a3,zero,.L1 add a1,a0,a4 li a0,0 vsetivli zero,17,e8,mf8,ta,ma ... This issue is a missed optmization produced by Phase 3 global backward demand fusion instead of LCM. This patch is fixing poor placement of the vsetvl. This point is seletected not because LCM but by Phase 3 (VL/VTYPE demand info backward fusion and propogation) which is I introduced into VSETVL PASS to enhance LCM && improve vsetvl instruction performance. This patch is to supress the Phase 3 too aggressive backward fusion and propagation to the top of the function program when there is no define instruction of AVL (AVL is 0 ~ 31 imm since vsetivli instruction allows imm value instead of reg). You may want to ask why we need Phase 3 to the job. Well, we have so many situations that pure LCM fails to optimize, here I can show you a simple case to demonstrate it: void f (void * restrict in, void * restrict out, int n, int m, int cond) { size_t vl = 101; for (size_t j = 0; j < m; j++){ if (cond) { for (size_t i = 0; i < n; i++) { vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, vl); __riscv_vse8_v_i8mf8 (out + i, v, vl); } } else { for (size_t i = 0; i < n; i++) { vint32mf2_t v = __riscv_vle32_v_i32mf2 (in + i + j, vl); v = __riscv_vadd_vv_i32mf2 (v,v,vl); __riscv_vse32_v_i32mf2 (out + i, v, vl); } } } } You can see: The first inner loop needs vsetvli e8 mf8 for vle+vse. The second inner loop need vsetvli e32 mf2 for vle+vadd+vse. If we don't have Phase 3 (Only handled by LCM (Phase 4)), we will end up with : outerloop: ... vsetvli e8mf8 inner loop 1: .... vsetvli e32mf2 inner loop 2: .... However, if we have Phase 3, Phase 3 is going to fuse the vsetvli e32 mf2 of inner loop 2 into vsetvli e8 mf8, then we will end up with this result after phase 3: outerloop: ... inner loop 1: vsetvli e32mf2 .... inner loop 2: vsetvli e32mf2 .... Then, this demand information after phase 3 will be well optimized after phase 4 (LCM), after Phase 4 result is: vsetvli e32mf2 outerloop: ... inner loop 1: .... inner loop 2: .... You can see this is the optimal codegen after current VSETVL PASS (Phase 3: Demand backward fusion and propagation + Phase 4: LCM ). This is a known issue when I start to implement VSETVL PASS. gcc/ChangeLog: PR target/108270 * config/riscv/riscv-vsetvl.cc (vector_infos_manager::all_empty_predecessor_p): New function. (pass_vsetvl::backward_demand_fusion): Ditto. * config/riscv/riscv-vsetvl.h: Ditto. gcc/testsuite/ChangeLog: PR target/108270 * gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c: Adapt testcase. * gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c: Ditto. * gcc.target/riscv/rvv/vsetvl/pr108270.c: New test. --- gcc/config/riscv/riscv-vsetvl.cc | 23 ++++++++++++++++++++++ gcc/config/riscv/riscv-vsetvl.h | 2 ++ .../gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c | 2 +- .../gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c | 4 ++-- .../gcc.target/riscv/rvv/vsetvl/pr108270.c | 19 ++++++++++++++++++ 5 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr108270.c (limited to 'gcc') diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc index 2406931..ac99028 100644 --- a/gcc/config/riscv/riscv-vsetvl.cc +++ b/gcc/config/riscv/riscv-vsetvl.cc @@ -2412,6 +2412,21 @@ vector_infos_manager::get_all_available_exprs ( } bool +vector_infos_manager::all_empty_predecessor_p (const basic_block cfg_bb) const +{ + hash_set pred_cfg_bbs = get_all_predecessors (cfg_bb); + for (const basic_block pred_cfg_bb : pred_cfg_bbs) + { + const auto &pred_block_info = vector_block_infos[pred_cfg_bb->index]; + if (!pred_block_info.local_dem.valid_or_dirty_p () + && !pred_block_info.reaching_out.valid_or_dirty_p ()) + continue; + return false; + } + return true; +} + +bool vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const { if (bitmap_empty_p (bitdata)) @@ -3194,6 +3209,14 @@ pass_vsetvl::backward_demand_fusion (void) if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) continue; + /* Fix PR108270: + + bb 0 -> bb 1 + We don't need to backward fuse VL/VTYPE info from bb 1 to bb 0 + if bb 1 is not inside a loop and all predecessors of bb 0 are empty. */ + if (m_vector_manager->all_empty_predecessor_p (cfg_bb)) + continue; + edge e; edge_iterator ei; /* Backward propagate to each predecessor. */ diff --git a/gcc/config/riscv/riscv-vsetvl.h b/gcc/config/riscv/riscv-vsetvl.h index 4fe08cf..9041eee 100644 --- a/gcc/config/riscv/riscv-vsetvl.h +++ b/gcc/config/riscv/riscv-vsetvl.h @@ -451,6 +451,8 @@ public: /* Return true if all expression set in bitmap are same ratio. */ bool all_same_ratio_p (sbitmap) const; + bool all_empty_predecessor_p (const basic_block) const; + void release (void); void create_bitmap_vectors (void); void free_bitmap_vectors (void); diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c index cd4ee7d..ed32a40 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_bb_prop-1.c @@ -29,4 +29,4 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int cond) } } -/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*tu,\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c index 1f7c0f0..2fa29c0 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/imm_conflict-3.c @@ -20,7 +20,7 @@ void f (int8_t * restrict in, int8_t * restrict out, int n, int cond) } } -/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*tu,\s*m[au]} 1 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ +/* { dg-final { scan-assembler-times {vsetivli\s+zero,\s*5,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 2 { target { no-opts "-O0" no-opts "-g" no-opts "-funroll-loops" } } } } */ /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e8,\s*mf8,\s*t[au],\s*m[au]} 1 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */ -/* { dg-final { scan-assembler-times {vsetivli} 1 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */ +/* { dg-final { scan-assembler-times {vsetivli} 2 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */ /* { dg-final { scan-assembler-times {vsetvli} 1 { target { no-opts "-O0" no-opts "-funroll-loops" no-opts "-g" } } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr108270.c b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr108270.c new file mode 100644 index 0000000..d2ae43b --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/vsetvl/pr108270.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gcv -mabi=ilp32 -fno-tree-vectorize -fno-schedule-insns -fno-schedule-insns2" } */ + +#include "riscv_vector.h" + +void f (void * restrict in, void * restrict out, int l, int n, int m) +{ + for (int i = 0; i < l; i++){ + for (int j = 0; j < m; j++){ + for (int k = 0; k < n; k++) + { + vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + j, 17); + __riscv_vse8_v_i8mf8 (out + i + j, v, 17); + } + } + } +} + +/* { dg-final { scan-assembler-not {mv\s+[a-x0-9]+,[a-x0-9]+\s+mv\s+[a-x0-9]+,[a-x0-9]+\s+mv\s+[a-x0-9]+,[a-x0-9]+\s+mv\s+[a-x0-9]+,[a-x0-9]+\s+mv\s+[a-x0-9]+,[a-x0-9]+\s+vsetivli} } } */ -- cgit v1.1 From 94a04c24c33580179e51d3218f2edd2cf88cadcd Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 21 Apr 2023 11:40:23 +0200 Subject: change DF to use the proper CFG order for DF_FORWARD problems This changes DF to use RPO on the forward graph for DF_FORWARD problems. While that naturally maps to pre_and_rev_postorder_compute we use the existing (wrong) CFG order for DF_BACKWARD problems computed by post_order_compute since that provides the required side-effect of deleting unreachable blocks. The change requires turning the inconsistent vec vs int * back to consistent int *. A followup patch will change the inverted_post_order_compute API and change the DF_BACKWARD problem to use the correct RPO on the backward graph together with statistics I produced last year for the combined effect. * df.h (df_d::postorder_inverted): Change back to int *, clarify comments. * df-core.cc (rest_of_handle_df_finish): Adjust. (df_analyze_1): Likewise. (df_analyze): For DF_FORWARD problems use RPO on the forward graph. Adjust. (loop_inverted_post_order_compute): Adjust API. (df_analyze_loop): Adjust. (df_get_n_blocks): Likewise. (df_get_postorder): Likewise. --- gcc/df-core.cc | 58 ++++++++++++++++++++++++++++++---------------------------- gcc/df.h | 8 ++++---- 2 files changed, 34 insertions(+), 32 deletions(-) (limited to 'gcc') diff --git a/gcc/df-core.cc b/gcc/df-core.cc index de5cbd0..2712364 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -810,7 +810,7 @@ rest_of_handle_df_finish (void) } free (df->postorder); - df->postorder_inverted.release (); + free (df->postorder_inverted); free (df->hard_regs_live_count); free (df); df = NULL; @@ -1207,9 +1207,6 @@ df_analyze_1 (void) { int i; - /* These should be the same. */ - gcc_assert ((unsigned) df->n_blocks == df->postorder_inverted.length ()); - /* We need to do this before the df_verify_all because this is not kept incrementally up to date. */ df_compute_regs_ever_live (false); @@ -1232,8 +1229,8 @@ df_analyze_1 (void) if (dflow->problem->dir == DF_FORWARD) df_analyze_problem (dflow, df->blocks_to_analyze, - df->postorder_inverted.address (), - df->postorder_inverted.length ()); + df->postorder_inverted, + df->n_blocks); else df_analyze_problem (dflow, df->blocks_to_analyze, @@ -1261,10 +1258,15 @@ df_analyze (void) bitmap current_all_blocks = BITMAP_ALLOC (&df_bitmap_obstack); free (df->postorder); + free (df->postorder_inverted); df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); df->n_blocks = post_order_compute (df->postorder, true, true); - df->postorder_inverted.truncate (0); - inverted_post_order_compute (&df->postorder_inverted); + /* For DF_FORWARD use a RPO on the forward graph. Since we want to + have unreachable blocks deleted use post_order_compute and reverse + the order. */ + df->postorder_inverted = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + for (int i = 0; i < df->n_blocks; ++i) + df->postorder_inverted[i] = df->postorder[df->n_blocks - 1 - i]; for (int i = 0; i < df->n_blocks; i++) bitmap_set_bit (current_all_blocks, df->postorder[i]); @@ -1273,7 +1275,7 @@ df_analyze (void) { /* Verify that POSTORDER_INVERTED only contains blocks reachable from the ENTRY block. */ - for (unsigned int i = 0; i < df->postorder_inverted.length (); i++) + for (int i = 0; i < df->n_blocks; i++) gcc_assert (bitmap_bit_p (current_all_blocks, df->postorder_inverted[i])); } @@ -1283,12 +1285,11 @@ df_analyze (void) if (df->analyze_subset) { bitmap_and_into (df->blocks_to_analyze, current_all_blocks); - df->n_blocks = df_prune_to_subcfg (df->postorder, - df->n_blocks, df->blocks_to_analyze); - unsigned int newlen = df_prune_to_subcfg (df->postorder_inverted.address (), - df->postorder_inverted.length (), - df->blocks_to_analyze); - df->postorder_inverted.truncate (newlen); + unsigned int newlen = df_prune_to_subcfg (df->postorder, df->n_blocks, + df->blocks_to_analyze); + df_prune_to_subcfg (df->postorder_inverted, df->n_blocks, + df->blocks_to_analyze); + df->n_blocks = newlen; BITMAP_FREE (current_all_blocks); } else @@ -1364,14 +1365,13 @@ loop_post_order_compute (int *post_order, class loop *loop) /* Compute the reverse top sort order of the inverted sub-CFG specified by LOOP. Returns the number of blocks which is always loop->num_nodes. */ -static void -loop_inverted_post_order_compute (vec *post_order, class loop *loop) +static int +loop_inverted_post_order_compute (int *post_order, class loop *loop) { basic_block bb; edge_iterator *stack; int sp; - - post_order->reserve_exact (loop->num_nodes); + int post_order_num = 0; /* Allocate stack for back-tracking up CFG. */ stack = XNEWVEC (edge_iterator, loop->num_nodes + 1); @@ -1408,13 +1408,13 @@ loop_inverted_post_order_compute (vec *post_order, class loop *loop) time, check its predecessors. */ stack[sp++] = ei_start (pred->preds); else - post_order->quick_push (pred->index); + post_order[post_order_num++] = pred->index; } else { if (flow_bb_inside_loop_p (loop, bb) && ei_one_before_end_p (ei)) - post_order->quick_push (bb->index); + post_order[post_order_num++] = bb->index; if (!ei_one_before_end_p (ei)) ei_next (&stack[sp - 1]); @@ -1424,6 +1424,7 @@ loop_inverted_post_order_compute (vec *post_order, class loop *loop) } free (stack); + return post_order_num; } @@ -1433,13 +1434,14 @@ void df_analyze_loop (class loop *loop) { free (df->postorder); + free (df->postorder_inverted); df->postorder = XNEWVEC (int, loop->num_nodes); - df->postorder_inverted.truncate (0); + df->postorder_inverted = XNEWVEC (int, loop->num_nodes); df->n_blocks = loop_post_order_compute (df->postorder, loop); - loop_inverted_post_order_compute (&df->postorder_inverted, loop); + int n = loop_inverted_post_order_compute (df->postorder_inverted, loop); gcc_assert ((unsigned) df->n_blocks == loop->num_nodes); - gcc_assert (df->postorder_inverted.length () == loop->num_nodes); + gcc_assert ((unsigned) n == loop->num_nodes); bitmap blocks = BITMAP_ALLOC (&df_bitmap_obstack); for (int i = 0; i < df->n_blocks; ++i) @@ -1460,8 +1462,8 @@ df_get_n_blocks (enum df_flow_dir dir) if (dir == DF_FORWARD) { - gcc_assert (df->postorder_inverted.length ()); - return df->postorder_inverted.length (); + gcc_assert (df->postorder_inverted); + return df->n_blocks; } gcc_assert (df->postorder); @@ -1480,8 +1482,8 @@ df_get_postorder (enum df_flow_dir dir) if (dir == DF_FORWARD) { - gcc_assert (df->postorder_inverted.length ()); - return df->postorder_inverted.address (); + gcc_assert (df->postorder_inverted); + return df->postorder_inverted; } gcc_assert (df->postorder); return df->postorder; diff --git a/gcc/df.h b/gcc/df.h index aec2223..402657a 100644 --- a/gcc/df.h +++ b/gcc/df.h @@ -581,10 +581,10 @@ public: bitmap_head insns_to_delete; bitmap_head insns_to_rescan; bitmap_head insns_to_notes_rescan; - int *postorder; /* The current set of basic blocks - in reverse postorder. */ - vec postorder_inverted; /* The current set of basic blocks - in reverse postorder of inverted CFG. */ + int *postorder; /* The current set of basic blocks in reverse + postorder for DF_BACKWARD problems. */ + int *postorder_inverted; /* The current set of basic blocks in reverse + postorder for DF_FORWARD problems. */ int n_blocks; /* The number of blocks in reverse postorder. */ /* An array [FIRST_PSEUDO_REGISTER], indexed by regno, of the number -- cgit v1.1 From 773cc925e84b248afa4ed01bf444be0935d33861 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 21 Apr 2023 09:40:01 +0200 Subject: change inverted_post_order_compute to inverted_rev_post_order_compute The following changes the inverted_post_order_compute API back to a plain C array interface and computing a reverse post order since that's what's always required. It will make massaging DF to use the correct iteration orders easier. Elsewhere it requires turning backward iteration over the computed order with forward iteration. * cfganal.h (inverted_rev_post_order_compute): Rename from ... (inverted_post_order_compute): ... this. Add struct function argument, change allocation to a C array. * cfganal.cc (inverted_rev_post_order_compute): Likewise. * lcm.cc (compute_antinout_edge): Adjust. * lra-lives.cc (lra_create_live_ranges_1): Likewise. * tree-ssa-dce.cc (remove_dead_stmt): Likewise. * tree-ssa-pre.cc (compute_antic): Likewise. --- gcc/cfganal.cc | 41 ++++++++++++++++++++++------------------- gcc/cfganal.h | 3 ++- gcc/lcm.cc | 9 +++++---- gcc/lra-lives.cc | 11 ++++++----- gcc/tree-ssa-dce.cc | 15 ++++++++------- gcc/tree-ssa-pre.cc | 18 ++++++++++-------- 6 files changed, 53 insertions(+), 44 deletions(-) (limited to 'gcc') diff --git a/gcc/cfganal.cc b/gcc/cfganal.cc index ef24c5e..cc858b9 100644 --- a/gcc/cfganal.cc +++ b/gcc/cfganal.cc @@ -740,7 +740,7 @@ post_order_compute (int *post_order, bool include_entry_exit, } -/* Helper routine for inverted_post_order_compute +/* Helper routine for inverted_rev_post_order_compute flow_dfs_compute_reverse_execute, and the reverse-CFG deapth first search in dominance.cc. BB has to belong to a region of CFG @@ -820,12 +820,14 @@ dfs_find_deadend (basic_block bb) and start looking for a "dead end" from that block and do another inverted traversal from that block. */ -void -inverted_post_order_compute (vec *post_order, - sbitmap *start_points) +int +inverted_rev_post_order_compute (struct function *fn, + int *rev_post_order, + sbitmap *start_points) { basic_block bb; - post_order->reserve_exact (n_basic_blocks_for_fn (cfun)); + + int rev_post_order_num = n_basic_blocks_for_fn (fn) - 1; if (flag_checking) verify_no_unreachable_blocks (); @@ -855,17 +857,17 @@ inverted_post_order_compute (vec *post_order, } } else - /* Put all blocks that have no successor into the initial work list. */ - FOR_ALL_BB_FN (bb, cfun) - if (EDGE_COUNT (bb->succs) == 0) - { - /* Push the initial edge on to the stack. */ - if (EDGE_COUNT (bb->preds) > 0) - { - stack.quick_push (ei_start (bb->preds)); - bitmap_set_bit (visited, bb->index); - } - } + /* Put all blocks that have no successor into the initial work list. */ + FOR_ALL_BB_FN (bb, cfun) + if (EDGE_COUNT (bb->succs) == 0) + { + /* Push the initial edge on to the stack. */ + if (EDGE_COUNT (bb->preds) > 0) + { + stack.quick_push (ei_start (bb->preds)); + bitmap_set_bit (visited, bb->index); + } + } do { @@ -893,13 +895,13 @@ inverted_post_order_compute (vec *post_order, time, check its predecessors. */ stack.quick_push (ei_start (pred->preds)); else - post_order->quick_push (pred->index); + rev_post_order[rev_post_order_num--] = pred->index; } else { if (bb != EXIT_BLOCK_PTR_FOR_FN (cfun) && ei_one_before_end_p (ei)) - post_order->quick_push (bb->index); + rev_post_order[rev_post_order_num--] = bb->index; if (!ei_one_before_end_p (ei)) ei_next (&stack.last ()); @@ -957,7 +959,8 @@ inverted_post_order_compute (vec *post_order, while (!stack.is_empty ()); /* EXIT_BLOCK is always included. */ - post_order->quick_push (EXIT_BLOCK); + rev_post_order[rev_post_order_num--] = EXIT_BLOCK; + return n_basic_blocks_for_fn (fn); } /* Compute the depth first search order of FN and store in the array diff --git a/gcc/cfganal.h b/gcc/cfganal.h index 0b6c67d..5af917c 100644 --- a/gcc/cfganal.h +++ b/gcc/cfganal.h @@ -66,7 +66,8 @@ extern void add_noreturn_fake_exit_edges (void); extern void connect_infinite_loops_to_exit (void); extern int post_order_compute (int *, bool, bool); extern basic_block dfs_find_deadend (basic_block); -extern void inverted_post_order_compute (vec *postorder, sbitmap *start_points = 0); +extern int inverted_rev_post_order_compute (struct function *, + int *, sbitmap *start_points = 0); extern int pre_and_rev_post_order_compute_fn (struct function *, int *, int *, bool); extern int pre_and_rev_post_order_compute (int *, int *, bool); diff --git a/gcc/lcm.cc b/gcc/lcm.cc index 5adb4eb..94a3ed4 100644 --- a/gcc/lcm.cc +++ b/gcc/lcm.cc @@ -102,17 +102,18 @@ compute_antinout_edge (sbitmap *antloc, sbitmap *transp, sbitmap *antin, optimistic initialization of ANTIN above. Use reverse postorder on the inverted graph to make the backward dataflow problem require less iterations. */ - auto_vec postorder; - inverted_post_order_compute (&postorder); - for (int i = postorder.length () - 1; i >= 0; --i) + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + int n = inverted_rev_post_order_compute (cfun, rpo); + for (int i = 0; i < n; ++i) { - bb = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); + bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; *qin++ = bb; bb->aux = bb; } + free (rpo); qin = worklist; qend = &worklist[n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS]; diff --git a/gcc/lra-lives.cc b/gcc/lra-lives.cc index f7a7066..f7a3ba8 100644 --- a/gcc/lra-lives.cc +++ b/gcc/lra-lives.cc @@ -1405,19 +1405,20 @@ lra_create_live_ranges_1 (bool all_p, bool dead_insn_p) point_freq_vec.truncate (0); point_freq_vec.reserve_exact (new_length); lra_point_freq = point_freq_vec.address (); - auto_vec post_order_rev_cfg; - inverted_post_order_compute (&post_order_rev_cfg); - lra_assert (post_order_rev_cfg.length () == (unsigned) n_basic_blocks_for_fn (cfun)); + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + int n = inverted_rev_post_order_compute (cfun, rpo); + lra_assert (n == n_basic_blocks_for_fn (cfun)); bb_live_change_p = false; - for (i = post_order_rev_cfg.length () - 1; i >= 0; --i) + for (i = 0; i < n; ++i) { - bb = BASIC_BLOCK_FOR_FN (cfun, post_order_rev_cfg[i]); + bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) continue; if (process_bb_lives (bb, curr_point, dead_insn_p)) bb_live_change_p = true; } + free (rpo); if (bb_live_change_p) { /* We need to clear pseudo live info as some pseudos can diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc index bda7808..08876bf 100644 --- a/gcc/tree-ssa-dce.cc +++ b/gcc/tree-ssa-dce.cc @@ -1095,7 +1095,7 @@ remove_dead_stmt (gimple_stmt_iterator *i, basic_block bb, nothing to the program, then we not only remove it, but we need to update the CFG. We can chose any of edges out of BB as long as we are sure to not close infinite loops. This is done by always choosing the edge closer to - exit in inverted_post_order_compute order. */ + exit in inverted_rev_post_order_compute order. */ if (is_ctrl_stmt (stmt)) { edge_iterator ei; @@ -1111,17 +1111,18 @@ remove_dead_stmt (gimple_stmt_iterator *i, basic_block bb, { if (!bb_postorder) { - auto_vec postorder; - inverted_post_order_compute (&postorder, - &bb_contains_live_stmts); + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + int n = inverted_rev_post_order_compute (cfun, rpo, + &bb_contains_live_stmts); bb_postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); - for (unsigned int i = 0; i < postorder.length (); ++i) - bb_postorder[postorder[i]] = i; + for (int i = 0; i < n; ++i) + bb_postorder[rpo[i]] = i; + free (rpo); } FOR_EACH_EDGE (e2, ei, bb->succs) if (!e || e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun) || bb_postorder [e->dest->index] - < bb_postorder [e2->dest->index]) + >= bb_postorder [e2->dest->index]) e = e2; } gcc_assert (e); diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc index 37cad36..943936d 100644 --- a/gcc/tree-ssa-pre.cc +++ b/gcc/tree-ssa-pre.cc @@ -2464,8 +2464,8 @@ compute_antic (void) /* For ANTIC computation we need a postorder that also guarantees that a block with a single successor is visited after its successor. RPO on the inverted CFG has this property. */ - auto_vec postorder; - inverted_post_order_compute (&postorder); + int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + int n = inverted_rev_post_order_compute (cfun, rpo); auto_sbitmap worklist (last_basic_block_for_fn (cfun) + 1); bitmap_clear (worklist); @@ -2481,11 +2481,11 @@ compute_antic (void) for PA ANTIC computation. */ num_iterations++; changed = false; - for (i = postorder.length () - 1; i >= 0; i--) + for (i = 0; i < n; ++i) { - if (bitmap_bit_p (worklist, postorder[i])) + if (bitmap_bit_p (worklist, rpo[i])) { - basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); + basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); bitmap_clear_bit (worklist, block->index); if (compute_antic_aux (block, bitmap_bit_p (has_abnormal_preds, @@ -2513,15 +2513,17 @@ compute_antic (void) if (do_partial_partial) { /* For partial antic we ignore backedges and thus we do not need - to perform any iteration when we process blocks in postorder. */ - for (i = postorder.length () - 1; i >= 0; i--) + to perform any iteration when we process blocks in rpo. */ + for (i = 0; i < n; ++i) { - basic_block block = BASIC_BLOCK_FOR_FN (cfun, postorder[i]); + basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[i]); compute_partial_antic_aux (block, bitmap_bit_p (has_abnormal_preds, block->index)); } } + + free (rpo); } -- cgit v1.1 From 53dddbfeb213ac4ec39f550aa81eaa4264375d2c Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 21 Apr 2023 12:02:28 +0200 Subject: Use correct CFG orders for DF worklist processing This adjusts the remaining three RPO computes in DF. The DF_FORWARD problems should use a RPO on the forward graph, the DF_BACKWARD problems should use a RPO on the inverted graph. Conveniently now inverted_rev_post_order_compute computes a RPO. We still use post_order_compute and reverse its order for its side-effect of deleting unreachable blocks. This resuls in an overall reduction on visited blocks on cc1files by 5.2%. Because on the reverse CFG most regions are irreducible, there's few cases the number of visited blocks increases. For the set of cc1files I have this is for et-forest.i, graph.i, hwint.i, tree-ssa-dom.i, tree-ssa-loop-ch.i and tree-ssa-threadedge.i. For tree-ssa-dse.i it's off-noise and I've more closely investigated and figured it is really bad luck due to the irreducibility. * df-core.cc (df_analyze): Compute RPO on the reverse graph for DF_BACKWARD problems. (loop_post_order_compute): Rename to ... (loop_rev_post_order_compute): ... this, compute a RPO. (loop_inverted_post_order_compute): Rename to ... (loop_inverted_rev_post_order_compute): ... this, compute a RPO. (df_analyze_loop): Use RPO on the forward graph for DF_FORWARD problems, RPO on the inverted graph for DF_BACKWARD. --- gcc/df-core.cc | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) (limited to 'gcc') diff --git a/gcc/df-core.cc b/gcc/df-core.cc index 2712364..d4812b0 100644 --- a/gcc/df-core.cc +++ b/gcc/df-core.cc @@ -1259,14 +1259,18 @@ df_analyze (void) free (df->postorder); free (df->postorder_inverted); - df->postorder = XNEWVEC (int, last_basic_block_for_fn (cfun)); - df->n_blocks = post_order_compute (df->postorder, true, true); /* For DF_FORWARD use a RPO on the forward graph. Since we want to have unreachable blocks deleted use post_order_compute and reverse the order. */ df->postorder_inverted = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); - for (int i = 0; i < df->n_blocks; ++i) - df->postorder_inverted[i] = df->postorder[df->n_blocks - 1 - i]; + df->n_blocks = post_order_compute (df->postorder_inverted, true, true); + for (int i = 0; i < df->n_blocks / 2; ++i) + std::swap (df->postorder_inverted[i], + df->postorder_inverted[df->n_blocks - 1 - i]); + /* For DF_BACKWARD use a RPO on the reverse graph. */ + df->postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); + int n = inverted_rev_post_order_compute (cfun, df->postorder); + gcc_assert (n == df->n_blocks); for (int i = 0; i < df->n_blocks; i++) bitmap_set_bit (current_all_blocks, df->postorder[i]); @@ -1305,11 +1309,11 @@ df_analyze (void) Returns the number of blocks which is always loop->num_nodes. */ static int -loop_post_order_compute (int *post_order, class loop *loop) +loop_rev_post_order_compute (int *post_order, class loop *loop) { edge_iterator *stack; int sp; - int post_order_num = 0; + int post_order_num = loop->num_nodes - 1; /* Allocate stack for back-tracking up CFG. */ stack = XNEWVEC (edge_iterator, loop->num_nodes + 1); @@ -1342,13 +1346,13 @@ loop_post_order_compute (int *post_order, class loop *loop) time, check its successors. */ stack[sp++] = ei_start (dest->succs); else - post_order[post_order_num++] = dest->index; + post_order[post_order_num--] = dest->index; } else { if (ei_one_before_end_p (ei) && src != loop_preheader_edge (loop)->src) - post_order[post_order_num++] = src->index; + post_order[post_order_num--] = src->index; if (!ei_one_before_end_p (ei)) ei_next (&stack[sp - 1]); @@ -1359,19 +1363,19 @@ loop_post_order_compute (int *post_order, class loop *loop) free (stack); - return post_order_num; + return loop->num_nodes; } /* Compute the reverse top sort order of the inverted sub-CFG specified by LOOP. Returns the number of blocks which is always loop->num_nodes. */ static int -loop_inverted_post_order_compute (int *post_order, class loop *loop) +loop_inverted_rev_post_order_compute (int *post_order, class loop *loop) { basic_block bb; edge_iterator *stack; int sp; - int post_order_num = 0; + int post_order_num = loop->num_nodes - 1; /* Allocate stack for back-tracking up CFG. */ stack = XNEWVEC (edge_iterator, loop->num_nodes + 1); @@ -1408,13 +1412,13 @@ loop_inverted_post_order_compute (int *post_order, class loop *loop) time, check its predecessors. */ stack[sp++] = ei_start (pred->preds); else - post_order[post_order_num++] = pred->index; + post_order[post_order_num--] = pred->index; } else { if (flow_bb_inside_loop_p (loop, bb) && ei_one_before_end_p (ei)) - post_order[post_order_num++] = bb->index; + post_order[post_order_num--] = bb->index; if (!ei_one_before_end_p (ei)) ei_next (&stack[sp - 1]); @@ -1424,7 +1428,7 @@ loop_inverted_post_order_compute (int *post_order, class loop *loop) } free (stack); - return post_order_num; + return loop->num_nodes; } @@ -1438,8 +1442,8 @@ df_analyze_loop (class loop *loop) df->postorder = XNEWVEC (int, loop->num_nodes); df->postorder_inverted = XNEWVEC (int, loop->num_nodes); - df->n_blocks = loop_post_order_compute (df->postorder, loop); - int n = loop_inverted_post_order_compute (df->postorder_inverted, loop); + df->n_blocks = loop_rev_post_order_compute (df->postorder_inverted, loop); + int n = loop_inverted_rev_post_order_compute (df->postorder, loop); gcc_assert ((unsigned) df->n_blocks == loop->num_nodes); gcc_assert ((unsigned) n == loop->num_nodes); -- cgit v1.1 From cddfe6bc40b3dc0806e260bbfb4cac82d609a258 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 21 Apr 2023 12:57:17 +0200 Subject: tree-optimization/109573 - avoid ICEing on unexpected live def The following relaxes the assert in vectorizable_live_operation where we catch currently unhandled cases to also allow an intermediate copy as it happens here but also relax the assert to checking only. PR tree-optimization/109573 * tree-vect-loop.cc (vectorizable_live_operation): Allow unhandled SSA copy as well. Demote assert to checking only. * g++.dg/vect/pr109573.cc: New testcase. --- gcc/testsuite/g++.dg/vect/pr109573.cc | 91 +++++++++++++++++++++++++++++++++++ gcc/tree-vect-loop.cc | 7 +-- 2 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/g++.dg/vect/pr109573.cc (limited to 'gcc') diff --git a/gcc/testsuite/g++.dg/vect/pr109573.cc b/gcc/testsuite/g++.dg/vect/pr109573.cc new file mode 100644 index 0000000..d96f86f --- /dev/null +++ b/gcc/testsuite/g++.dg/vect/pr109573.cc @@ -0,0 +1,91 @@ +// { dg-do compile } +// { dg-require-effective-target c++20 } + +void *operator new(__SIZE_TYPE__, void *__p) { return __p; } +template struct _Head_base { + _Head _M_head_impl; +}; +template struct _Tuple_impl; +template +struct _Tuple_impl<_Idx, _Head, _Tail...> : _Tuple_impl<_Idx + 1, _Tail...>, + _Head_base<_Head> { + template + _Tuple_impl(_UHead __head, _UTail... __tail) + : _Tuple_impl<_Idx + 1, _Tail...>(__tail...), _Head_base<_Head>(__head) {} +}; +template struct _Tuple_impl<_Idx, _Head> { + template _Tuple_impl(_UHead); +}; +template struct tuple : _Tuple_impl<0, _Elements...> { + template + tuple(_UElements... __elements) + : _Tuple_impl<0, _Elements...>(__elements...) {} +}; +unsigned long position_; +struct Zone { + template T *New(Args... args) { + return new (reinterpret_cast(position_)) T(args...); + } +}; +struct Label { + int pos_; + int near_link_pos_; +}; +enum Condition { below_equal }; +void bind(Label *); +Zone *zone(); +unsigned long deopt_info_address(); +int MakeDeferredCode___trans_tmp_2, MakeDeferredCode___trans_tmp_3, + Prologue___trans_tmp_6, MakeDeferredCode___trans_tmp_1; +struct MaglevAssembler { + template + void MakeDeferredCode(Function &&, Args &&...); + template + void JumpToDeferredIf(Condition, Function, Args... args) { + MakeDeferredCode(Function(), args...); + } + void Prologue(); +}; +struct ZoneLabelRef { + ZoneLabelRef(Zone *zone) : label_(zone->New