aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2021-08-24 16:42:47 +0200
committerMartin Liska <mliska@suse.cz>2021-08-24 16:42:47 +0200
commit7572f9cd10edd3bc1889a8f513dbf77b7f4e470d (patch)
tree2f77059468d7b182b29483f6fa674fe5d6550652 /gcc
parenteb2de151c582a38efc53ce57416f7bd7a3a9c0eb (diff)
parent8ce18a29ef717f5920ebf5dc1d9e84570a1827d4 (diff)
downloadgcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.zip
gcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.tar.gz
gcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.tar.bz2
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm-cpus.in9
-rw-r--r--gcc/config/arm/arm.c9
-rw-r--r--gcc/config/arm/arm.md11
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/config/arm/vfp.md29
-rw-r--r--gcc/config/i386/i386-expand.c13
-rw-r--r--gcc/config/i386/i386.c5
-rw-r--r--gcc/config/i386/i386.h2
-rw-r--r--gcc/config/i386/predicates.md7
-rw-r--r--gcc/config/i386/sse.md238
-rw-r--r--gcc/config/rs6000/rs6000-builtin-new.def477
-rw-r--r--gcc/doc/invoke.texi16
-rw-r--r--gcc/params.opt4
-rw-r--r--gcc/testsuite/gcc.dg/predict-1.c2
-rw-r--r--gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c144
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c31
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c28
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c27
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c29
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c27
-rw-r--r--gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c29
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-10b.c1
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-4b.c3
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-6b.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr100865-7b.c6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr101989-1.c51
-rw-r--r--gcc/testsuite/gcc.target/i386/pr101989-2.c102
-rw-r--r--gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c31
-rw-r--r--gcc/testsuite/gcc.target/i386/pr102021.c15
-rw-r--r--gcc/testsuite/lib/target-supports.exp15
-rw-r--r--gcc/tree-vect-loop.c7
-rw-r--r--gcc/tree-vect-slp.c70
-rw-r--r--gcc/tree-vectorizer.c20
-rw-r--r--gcc/tree-vectorizer.h2
-rw-r--r--gcc/value-relation.cc287
-rw-r--r--gcc/value-relation.h9
39 files changed, 1720 insertions, 110 deletions
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 249995a..bcc9ebe 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -186,6 +186,9 @@ define feature quirk_armv6kz
# Cortex-M3 LDRD quirk.
define feature quirk_cm3_ldrd
+# v8-m/v8.1-m VLLDM errata.
+define feature quirk_vlldm
+
# Don't use .cpu assembly directive
define feature quirk_no_asmcpu
@@ -322,7 +325,7 @@ define implied vfp_base MVE MVE_FP ALL_FP
# architectures.
# xscale isn't really a 'quirk', but it isn't an architecture either and we
# need to ignore it for matching purposes.
-define fgroup ALL_QUIRKS quirk_no_volatile_ce quirk_armv6kz quirk_cm3_ldrd xscale quirk_no_asmcpu
+define fgroup ALL_QUIRKS quirk_no_volatile_ce quirk_armv6kz quirk_cm3_ldrd quirk_vlldm xscale quirk_no_asmcpu
define fgroup IGNORE_FOR_MULTILIB cdecp0 cdecp1 cdecp2 cdecp3 cdecp4 cdecp5 cdecp6 cdecp7
@@ -1571,6 +1574,7 @@ begin cpu cortex-m33
architecture armv8-m.main+dsp+fp
option nofp remove ALL_FP
option nodsp remove armv7em
+ isa quirk_vlldm
costs v7m
end cpu cortex-m33
@@ -1580,6 +1584,7 @@ begin cpu cortex-m35p
architecture armv8-m.main+dsp+fp
option nofp remove ALL_FP
option nodsp remove armv7em
+ isa quirk_vlldm
costs v7m
end cpu cortex-m35p
@@ -1591,7 +1596,7 @@ begin cpu cortex-m55
option nomve remove mve mve_float
option nofp remove ALL_FP mve_float
option nodsp remove MVE mve_float
- isa quirk_no_asmcpu
+ isa quirk_no_asmcpu quirk_vlldm
costs v7m
vendor 41
end cpu cortex-m55
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 11dafc7..5c92941 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3616,6 +3616,15 @@ arm_option_override (void)
fix_cm3_ldrd = 0;
}
+ /* Enable fix_vlldm by default if required. */
+ if (fix_vlldm == 2)
+ {
+ if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm))
+ fix_vlldm = 1;
+ else
+ fix_vlldm = 0;
+ }
+
/* Hot/Cold partitioning is not currently supported, since we can't
handle literal pool placement in that case. */
if (flag_reorder_blocks_and_partition)
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0646048..5d3f21b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -132,9 +132,12 @@
; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6"
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
; arm_arch6. "v6t2" for Thumb-2 with arm_arch6 and "v8mb" for ARMv8-M
-; Baseline. This attribute is used to compute attribute "enabled",
+; Baseline. "fix_vlldm" is for fixing the v8-m/v8.1-m VLLDM erratum.
+; This attribute is used to compute attribute "enabled",
; use type "any" to enable an alternative in all cases.
-(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon,mve"
+(define_attr "arch" "any, a, t, 32, t1, t2, v6,nov6, v6t2, \
+ v8mb, fix_vlldm, iwmmxt, iwmmxt2, armv6_or_vfpv3, \
+ neon, mve"
(const_string "any"))
(define_attr "arch_enabled" "no,yes"
@@ -177,6 +180,10 @@
(match_test "TARGET_THUMB1 && arm_arch8"))
(const_string "yes")
+ (and (eq_attr "arch" "fix_vlldm")
+ (match_test "fix_vlldm"))
+ (const_string "yes")
+
(and (eq_attr "arch" "iwmmxt2")
(match_test "TARGET_REALLY_IWMMXT2"))
(const_string "yes")
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 7417b55..a7677ee 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -268,6 +268,10 @@ Target Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
that may trigger Cortex-M3 errata.
+mfix-cmse-cve-2021-35465
+Target Var(fix_vlldm) Init(2)
+Mitigate issues with VLLDM on some M-profile devices (CVE-2021-35465).
+
munaligned-access
Target Var(unaligned_access) Init(2) Save
Enable unaligned word and halfword accesses to packed data.
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 93e96369..f0030a8 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -1703,12 +1703,15 @@
(set_attr "type" "mov_reg")]
)
+;; Both this and the next instruction are treated by GCC in the same
+;; way as a blockage pattern. That's perhaps stronger than it needs
+;; to be, but we do not want accesses to the VFP register bank to be
+;; moved across either instruction.
+
(define_insn "lazy_store_multiple_insn"
- [(set (match_operand:SI 0 "s_register_operand" "+&rk")
- (post_dec:SI (match_dup 0)))
- (unspec_volatile [(const_int 0)
- (mem:SI (post_dec:SI (match_dup 0)))]
- VUNSPEC_VLSTM)]
+ [(unspec_volatile
+ [(mem:BLK (match_operand:SI 0 "s_register_operand" "rk"))]
+ VUNSPEC_VLSTM)]
"use_cmse && reload_completed"
"vlstm%?\\t%0"
[(set_attr "predicable" "yes")
@@ -1716,14 +1719,16 @@
)
(define_insn "lazy_load_multiple_insn"
- [(set (match_operand:SI 0 "s_register_operand" "+&rk")
- (post_inc:SI (match_dup 0)))
- (unspec_volatile:SI [(const_int 0)
- (mem:SI (match_dup 0))]
- VUNSPEC_VLLDM)]
+ [(unspec_volatile
+ [(mem:BLK (match_operand:SI 0 "s_register_operand" "rk,rk"))]
+ VUNSPEC_VLLDM)]
"use_cmse && reload_completed"
- "vlldm%?\\t%0"
- [(set_attr "predicable" "yes")
+ "@
+ vscclrm\\t{vpr}\;vlldm\\t%0
+ vlldm\\t%0"
+ [(set_attr "arch" "fix_vlldm,*")
+ (set_attr "predicable" "no")
+ (set_attr "length" "8,4")
(set_attr "type" "load_4")]
)
diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 9bf13db..2500dbf 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -579,19 +579,10 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[])
{
/* Broadcast to XMM/YMM/ZMM register from an integer
constant or scalar mem. */
- /* Hard registers are used for 2 purposes:
- 1. Prevent stack realignment when the original code
- doesn't use vector registers, which is the same for
- memcpy and memset.
- 2. Prevent combine to convert constant broadcast to
- load from constant pool. */
- op1 = ix86_gen_scratch_sse_rtx (mode);
+ op1 = gen_reg_rtx (mode);
if (FLOAT_MODE_P (mode)
|| (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode))
- {
- first = force_const_mem (GET_MODE_INNER (mode), first);
- op1 = gen_reg_rtx (mode);
- }
+ first = force_const_mem (GET_MODE_INNER (mode), first);
bool ok = ix86_expand_vector_init_duplicate (false, mode,
op1, first);
gcc_assert (ok);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5bff131..ebec866 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20542,6 +20542,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
case UNSPEC:
if (XINT (x, 1) == UNSPEC_TP)
*total = 0;
+ else if (XINT(x, 1) == UNSPEC_VTERNLOG)
+ {
+ *total = cost->sse_op;
+ return true;
+ }
return false;
case VEC_SELECT:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 11ac8d0..6511422 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1716,6 +1716,8 @@ typedef struct ix86_args {
#define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X)
+#define STRIP_UNARY(X) (UNARY_P (X) ? XEXP (X, 0) : X)
+
#define SYMBOLIC_CONST(X) \
(GET_CODE (X) == SYMBOL_REF \
|| GET_CODE (X) == LABEL_REF \
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 9321f33..df5acb4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1044,6 +1044,13 @@
(ior (match_test "op == const1_rtx")
(match_test "op == constm1_rtx")))))
+;; True for registers, or (not: registers). Used to optimize 3-operand
+;; bitwise operation.
+(define_predicate "reg_or_notreg_operand"
+ (ior (match_operand 0 "register_operand")
+ (and (match_code "not")
+ (match_test "register_operand (XEXP (op, 0), mode)"))))
+
;; True if OP is acceptable as operand of DImode shift expander.
(define_predicate "shiftdi_operand"
(if_then_else (match_test "TARGET_64BIT")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 95f9582..03fc2df 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -933,7 +933,9 @@
;; Mapping of vector modes to VPTERNLOG suffix
(define_mode_attr ternlogsuffix
[(V8DI "q") (V4DI "q") (V2DI "q")
+ (V8DF "q") (V4DF "q") (V2DF "q")
(V16SI "d") (V8SI "d") (V4SI "d")
+ (V16SF "d") (V8SF "d") (V4SF "d")
(V32HI "d") (V16HI "d") (V8HI "d")
(V64QI "d") (V32QI "d") (V16QI "d")])
@@ -10032,7 +10034,7 @@
(unspec:VI48_AVX512VL
[(match_operand:VI48_AVX512VL 1 "register_operand" "0")
(match_operand:VI48_AVX512VL 2 "register_operand" "v")
- (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
+ (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
"TARGET_AVX512F"
@@ -10041,13 +10043,245 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*<avx512>_vternlog<mode>_all"
+ [(set (match_operand:V 0 "register_operand" "=v")
+ (unspec:V
+ [(match_operand:V 1 "register_operand" "0")
+ (match_operand:V 2 "register_operand" "v")
+ (match_operand:V 3 "bcst_vector_operand" "vmBr")
+ (match_operand:SI 4 "const_0_to_255_operand")]
+ UNSPEC_VTERNLOG))]
+ "TARGET_AVX512F"
+ "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
+
+;; There must be lots of other combinations like
+;;
+;; (any_logic:V
+;; (any_logic:V op1 op2)
+;; (any_logic:V op1 op3))
+;;
+;; (any_logic:V
+;; (any_logic:V
+;; (any_logic:V op1, op2)
+;; op3)
+;; op1)
+;;
+;; and so on.
+
+(define_code_iterator any_logic1 [and ior xor])
+(define_code_iterator any_logic2 [and ior xor])
+(define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_1"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (any_logic2:V
+ (match_operand:V 3 "reg_or_notreg_operand")
+ (match_operand:V 4 "reg_or_notreg_operand"))))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()
+ && (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[3])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 6)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 5)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg6, reg2, reg1, imm8. */
+ int reg6 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg3 = 0;
+ int reg4 = 0;
+ int reg_mask, tmp1, tmp2;
+ if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg1;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg2;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3])))
+ {
+ reg4 = reg6;
+ reg3 = reg1;
+ operands[6] = operands[4];
+ }
+ else
+ {
+ reg4 = reg6;
+ reg3 = reg2;
+ operands[6] = operands[4];
+ }
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+ reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
+
+ tmp1 = reg1 <any_logic1:logic_op> reg2;
+ tmp2 = reg3 <any_logic2:logic_op> reg4;
+ reg_mask = tmp1 <any_logic:logic_op> tmp2;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[6] = STRIP_UNARY (operands[6]);
+ operands[5] = GEN_INT (reg_mask);
+})
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_2"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (any_logic2:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (match_operand:V 3 "reg_or_notreg_operand"))
+ (match_operand:V 4 "reg_or_notreg_operand")))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()
+ && (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4]))
+ || rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3]))
+ || rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[3])))"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 6)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 5)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg6, reg2, reg1, imm8. */
+ int reg6 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg3 = 0;
+ int reg4 = 0;
+ int reg_mask, tmp1, tmp2;
+ if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg1;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[2]),
+ STRIP_UNARY (operands[4])))
+ {
+ reg4 = reg2;
+ reg3 = reg6;
+ operands[6] = operands[3];
+ }
+ else if (rtx_equal_p (STRIP_UNARY (operands[1]),
+ STRIP_UNARY (operands[3])))
+ {
+ reg4 = reg6;
+ reg3 = reg1;
+ operands[6] = operands[4];
+ }
+ else
+ {
+ reg4 = reg6;
+ reg3 = reg2;
+ operands[6] = operands[4];
+ }
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+ reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4;
+
+ tmp1 = reg1 <any_logic2:logic_op> reg2;
+ tmp2 = tmp1 <any_logic1:logic_op> reg3;
+ reg_mask = tmp2 <any_logic:logic_op> reg4;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[6] = STRIP_UNARY (operands[6]);
+ operands[5] = GEN_INT (reg_mask);
+})
+
+(define_insn_and_split "*<avx512>_vpternlog<mode>_3"
+ [(set (match_operand:V 0 "register_operand")
+ (any_logic:V
+ (any_logic1:V
+ (match_operand:V 1 "reg_or_notreg_operand")
+ (match_operand:V 2 "reg_or_notreg_operand"))
+ (match_operand:V 3 "reg_or_notreg_operand")))]
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:V
+ [(match_dup 3)
+ (match_dup 2)
+ (match_dup 1)
+ (match_dup 4)]
+ UNSPEC_VTERNLOG))]
+{
+ /* VPTERNLOGD reg3, reg2, reg1, imm8. */
+ int reg3 = 0xF0;
+ int reg2 = 0xCC;
+ int reg1 = 0xAA;
+ int reg_mask, tmp1;
+
+ reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1;
+ reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2;
+ reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3;
+
+ tmp1 = reg1 <any_logic1:logic_op> reg2;
+ reg_mask = tmp1 <any_logic:logic_op> reg3;
+ reg_mask &= 0xFF;
+
+ operands[1] = STRIP_UNARY (operands[1]);
+ operands[2] = STRIP_UNARY (operands[2]);
+ operands[3] = STRIP_UNARY (operands[3]);
+ operands[4] = GEN_INT (reg_mask);
+})
+
+
(define_insn "<avx512>_vternlog<mode>_mask"
[(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
(vec_merge:VI48_AVX512VL
(unspec:VI48_AVX512VL
[(match_operand:VI48_AVX512VL 1 "register_operand" "0")
(match_operand:VI48_AVX512VL 2 "register_operand" "v")
- (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
+ (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG)
(match_dup 1)
diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 61f5b94..2a2c913 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1961,3 +1961,480 @@
const vsll __builtin_vsx_xxspltd_2di (vsll, const int<1>);
XXSPLTD_V2DI vsx_xxspltd_v2di {}
+
+
+; Power7 builtins (ISA 2.06).
+[power7]
+ const unsigned int __builtin_addg6s (unsigned int, unsigned int);
+ ADDG6S addg6s {}
+
+ const signed long __builtin_bpermd (signed long, signed long);
+ BPERMD bpermd_di {}
+
+ const unsigned int __builtin_cbcdtd (unsigned int);
+ CBCDTD cbcdtd {}
+
+ const unsigned int __builtin_cdtbcd (unsigned int);
+ CDTBCD cdtbcd {}
+
+ const signed int __builtin_divwe (signed int, signed int);
+ DIVWE dive_si {}
+
+ const unsigned int __builtin_divweu (unsigned int, unsigned int);
+ DIVWEU diveu_si {}
+
+ const vsq __builtin_pack_vector_int128 (unsigned long long, unsigned long long);
+ PACK_V1TI packv1ti {}
+
+ void __builtin_ppc_speculation_barrier ();
+ SPECBARR speculation_barrier {}
+
+ const unsigned long __builtin_unpack_vector_int128 (vsq, const int<1>);
+ UNPACK_V1TI unpackv1ti {}
+
+
+; Power7 builtins requiring 64-bit GPRs (even with 32-bit addressing).
+[power7-64]
+ const signed long long __builtin_divde (signed long long, signed long long);
+ DIVDE dive_di {}
+
+ const unsigned long long __builtin_divdeu (unsigned long long, unsigned long long);
+ DIVDEU diveu_di {}
+
+
+; Power8 vector built-ins.
+[power8-vector]
+ const vsll __builtin_altivec_abs_v2di (vsll);
+ ABS_V2DI absv2di2 {}
+
+ const vsc __builtin_altivec_bcddiv10_v16qi (vsc);
+ BCDDIV10_V16QI bcddiv10_v16qi {}
+
+ const vsc __builtin_altivec_bcdmul10_v16qi (vsc);
+ BCDMUL10_V16QI bcdmul10_v16qi {}
+
+ const vsc __builtin_altivec_eqv_v16qi (vsc, vsc);
+ EQV_V16QI eqvv16qi3 {}
+
+ const vuc __builtin_altivec_eqv_v16qi_uns (vuc, vuc);
+ EQV_V16QI_UNS eqvv16qi3 {}
+
+ const vsq __builtin_altivec_eqv_v1ti (vsq, vsq);
+ EQV_V1TI eqvv1ti3 {}
+
+ const vuq __builtin_altivec_eqv_v1ti_uns (vuq, vuq);
+ EQV_V1TI_UNS eqvv1ti3 {}
+
+ const vd __builtin_altivec_eqv_v2df (vd, vd);
+ EQV_V2DF eqvv2df3 {}
+
+ const vsll __builtin_altivec_eqv_v2di (vsll, vsll);
+ EQV_V2DI eqvv2di3 {}
+
+ const vull __builtin_altivec_eqv_v2di_uns (vull, vull);
+ EQV_V2DI_UNS eqvv2di3 {}
+
+ const vf __builtin_altivec_eqv_v4sf (vf, vf);
+ EQV_V4SF eqvv4sf3 {}
+
+ const vsi __builtin_altivec_eqv_v4si (vsi, vsi);
+ EQV_V4SI eqvv4si3 {}
+
+ const vui __builtin_altivec_eqv_v4si_uns (vui, vui);
+ EQV_V4SI_UNS eqvv4si3 {}
+
+ const vss __builtin_altivec_eqv_v8hi (vss, vss);
+ EQV_V8HI eqvv8hi3 {}
+
+ const vus __builtin_altivec_eqv_v8hi_uns (vus, vus);
+ EQV_V8HI_UNS eqvv8hi3 {}
+
+ const vsc __builtin_altivec_nand_v16qi (vsc, vsc);
+ NAND_V16QI nandv16qi3 {}
+
+ const vuc __builtin_altivec_nand_v16qi_uns (vuc, vuc);
+ NAND_V16QI_UNS nandv16qi3 {}
+
+ const vsq __builtin_altivec_nand_v1ti (vsq, vsq);
+ NAND_V1TI nandv1ti3 {}
+
+ const vuq __builtin_altivec_nand_v1ti_uns (vuq, vuq);
+ NAND_V1TI_UNS nandv1ti3 {}
+
+ const vd __builtin_altivec_nand_v2df (vd, vd);
+ NAND_V2DF nandv2df3 {}
+
+ const vsll __builtin_altivec_nand_v2di (vsll, vsll);
+ NAND_V2DI nandv2di3 {}
+
+ const vull __builtin_altivec_nand_v2di_uns (vull, vull);
+ NAND_V2DI_UNS nandv2di3 {}
+
+ const vf __builtin_altivec_nand_v4sf (vf, vf);
+ NAND_V4SF nandv4sf3 {}
+
+ const vsi __builtin_altivec_nand_v4si (vsi, vsi);
+ NAND_V4SI nandv4si3 {}
+
+ const vui __builtin_altivec_nand_v4si_uns (vui, vui);
+ NAND_V4SI_UNS nandv4si3 {}
+
+ const vss __builtin_altivec_nand_v8hi (vss, vss);
+ NAND_V8HI nandv8hi3 {}
+
+ const vus __builtin_altivec_nand_v8hi_uns (vus, vus);
+ NAND_V8HI_UNS nandv8hi3 {}
+
+ const vsc __builtin_altivec_neg_v16qi (vsc);
+ NEG_V16QI negv16qi2 {}
+
+ const vd __builtin_altivec_neg_v2df (vd);
+ NEG_V2DF negv2df2 {}
+
+ const vsll __builtin_altivec_neg_v2di (vsll);
+ NEG_V2DI negv2di2 {}
+
+ const vf __builtin_altivec_neg_v4sf (vf);
+ NEG_V4SF negv4sf2 {}
+
+ const vsi __builtin_altivec_neg_v4si (vsi);
+ NEG_V4SI negv4si2 {}
+
+ const vss __builtin_altivec_neg_v8hi (vss);
+ NEG_V8HI negv8hi2 {}
+
+ const vsc __builtin_altivec_orc_v16qi (vsc, vsc);
+ ORC_V16QI orcv16qi3 {}
+
+ const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc);
+ ORC_V16QI_UNS orcv16qi3 {}
+
+ const vsq __builtin_altivec_orc_v1ti (vsq, vsq);
+ ORC_V1TI orcv1ti3 {}
+
+ const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq);
+ ORC_V1TI_UNS orcv1ti3 {}
+
+ const vd __builtin_altivec_orc_v2df (vd, vd);
+ ORC_V2DF orcv2df3 {}
+
+ const vsll __builtin_altivec_orc_v2di (vsll, vsll);
+ ORC_V2DI orcv2di3 {}
+
+ const vull __builtin_altivec_orc_v2di_uns (vull, vull);
+ ORC_V2DI_UNS orcv2di3 {}
+
+ const vf __builtin_altivec_orc_v4sf (vf, vf);
+ ORC_V4SF orcv4sf3 {}
+
+ const vsi __builtin_altivec_orc_v4si (vsi, vsi);
+ ORC_V4SI orcv4si3 {}
+
+ const vui __builtin_altivec_orc_v4si_uns (vui, vui);
+ ORC_V4SI_UNS orcv4si3 {}
+
+ const vss __builtin_altivec_orc_v8hi (vss, vss);
+ ORC_V8HI orcv8hi3 {}
+
+ const vus __builtin_altivec_orc_v8hi_uns (vus, vus);
+ ORC_V8HI_UNS orcv8hi3 {}
+
+ const vsc __builtin_altivec_vclzb (vsc);
+ VCLZB clzv16qi2 {}
+
+ const vsll __builtin_altivec_vclzd (vsll);
+ VCLZD clzv2di2 {}
+
+ const vss __builtin_altivec_vclzh (vss);
+ VCLZH clzv8hi2 {}
+
+ const vsi __builtin_altivec_vclzw (vsi);
+ VCLZW clzv4si2 {}
+
+ const vuc __builtin_altivec_vgbbd (vuc);
+ VGBBD p8v_vgbbd {}
+
+ const vsq __builtin_altivec_vaddcuq (vsq, vsq);
+ VADDCUQ altivec_vaddcuq {}
+
+ const vsq __builtin_altivec_vaddecuq (vsq, vsq, vsq);
+ VADDECUQ altivec_vaddecuq {}
+
+ const vsq __builtin_altivec_vaddeuqm (vsq, vsq, vsq);
+ VADDEUQM altivec_vaddeuqm {}
+
+ const vsll __builtin_altivec_vaddudm (vsll, vsll);
+ VADDUDM addv2di3 {}
+
+ const vsq __builtin_altivec_vadduqm (vsq, vsq);
+ VADDUQM altivec_vadduqm {}
+
+ const vsll __builtin_altivec_vbpermq (vsc, vsc);
+ VBPERMQ altivec_vbpermq {}
+
+ const vsc __builtin_altivec_vbpermq2 (vsc, vsc);
+ VBPERMQ2 altivec_vbpermq2 {}
+
+ const vsll __builtin_altivec_vmaxsd (vsll, vsll);
+ VMAXSD smaxv2di3 {}
+
+ const vull __builtin_altivec_vmaxud (vull, vull);
+ VMAXUD umaxv2di3 {}
+
+ const vsll __builtin_altivec_vminsd (vsll, vsll);
+ VMINSD sminv2di3 {}
+
+ const vull __builtin_altivec_vminud (vull, vull);
+ VMINUD uminv2di3 {}
+
+ const vd __builtin_altivec_vmrgew_v2df (vd, vd);
+ VMRGEW_V2DF p8_vmrgew_v2df {}
+
+ const vsll __builtin_altivec_vmrgew_v2di (vsll, vsll);
+ VMRGEW_V2DI p8_vmrgew_v2di {}
+
+ const vf __builtin_altivec_vmrgew_v4sf (vf, vf);
+ VMRGEW_V4SF p8_vmrgew_v4sf {}
+
+ const vsi __builtin_altivec_vmrgew_v4si (vsi, vsi);
+ VMRGEW_V4SI p8_vmrgew_v4si {}
+
+ const vd __builtin_altivec_vmrgow_v2df (vd, vd);
+ VMRGOW_V2DF p8_vmrgow_v2df {}
+
+ const vsll __builtin_altivec_vmrgow_v2di (vsll, vsll);
+ VMRGOW_V2DI p8_vmrgow_v2di {}
+
+ const vf __builtin_altivec_vmrgow_v4sf (vf, vf);
+ VMRGOW_V4SF p8_vmrgow_v4sf {}
+
+ const vsi __builtin_altivec_vmrgow_v4si (vsi, vsi);
+ VMRGOW_V4SI p8_vmrgow_v4si {}
+
+ const vsc __builtin_altivec_vpermxor (vsc, vsc, vsc);
+ VPERMXOR altivec_vpermxor {}
+
+ const vsi __builtin_altivec_vpksdss (vsll, vsll);
+ VPKSDSS altivec_vpksdss {}
+
+ const vsi __builtin_altivec_vpksdus (vsll, vsll);
+ VPKSDUS altivec_vpksdus {}
+
+ const vsi __builtin_altivec_vpkudum (vsll, vsll);
+ VPKUDUM altivec_vpkudum {}
+
+ const vsi __builtin_altivec_vpkudus (vsll, vsll);
+ VPKUDUS altivec_vpkudus {}
+
+ const vsc __builtin_altivec_vpmsumb (vsc, vsc);
+ VPMSUMB_A crypto_vpmsumb {}
+
+ const vsll __builtin_altivec_vpmsumd (vsll, vsll);
+ VPMSUMD_A crypto_vpmsumd {}
+
+ const vss __builtin_altivec_vpmsumh (vss, vss);
+ VPMSUMH_A crypto_vpmsumh {}
+
+ const vsi __builtin_altivec_vpmsumw (vsi, vsi);
+ VPMSUMW_A crypto_vpmsumw {}
+
+ const vsc __builtin_altivec_vpopcntb (vsc);
+ VPOPCNTB popcountv16qi2 {}
+
+ const vsll __builtin_altivec_vpopcntd (vsll);
+ VPOPCNTD popcountv2di2 {}
+
+ const vss __builtin_altivec_vpopcnth (vss);
+ VPOPCNTH popcountv8hi2 {}
+
+ const vsc __builtin_altivec_vpopcntub (vsc);
+ VPOPCNTUB popcountv16qi2 {}
+
+ const vsll __builtin_altivec_vpopcntud (vsll);
+ VPOPCNTUD popcountv2di2 {}
+
+ const vss __builtin_altivec_vpopcntuh (vss);
+ VPOPCNTUH popcountv8hi2 {}
+
+ const vsi __builtin_altivec_vpopcntuw (vsi);
+ VPOPCNTUW popcountv4si2 {}
+
+ const vsi __builtin_altivec_vpopcntw (vsi);
+ VPOPCNTW popcountv4si2 {}
+
+ const vsll __builtin_altivec_vrld (vsll, vsll);
+ VRLD vrotlv2di3 {}
+
+ const vsll __builtin_altivec_vsld (vsll, vsll);
+ VSLD vashlv2di3 {}
+
+ const vsll __builtin_altivec_vsrad (vsll, vsll);
+ VSRAD vashrv2di3 {}
+
+ const vsll __builtin_altivec_vsrd (vsll, vull);
+ VSRD vlshrv2di3 {}
+
+ const vsq __builtin_altivec_vsubcuq (vsq, vsq);
+ VSUBCUQ altivec_vsubcuq {}
+
+ const vsq __builtin_altivec_vsubecuq (vsq, vsq, vsq);
+ VSUBECUQ altivec_vsubecuq {}
+
+ const vsq __builtin_altivec_vsubeuqm (vsq, vsq, vsq);
+ VSUBEUQM altivec_vsubeuqm {}
+
+ const vsll __builtin_altivec_vsubudm (vsll, vsll);
+ VSUBUDM subv2di3 {}
+
+ const vsq __builtin_altivec_vsubuqm (vsq, vsq);
+ VSUBUQM altivec_vsubuqm {}
+
+ const vsll __builtin_altivec_vupkhsw (vsi);
+ VUPKHSW altivec_vupkhsw {}
+
+ const vsll __builtin_altivec_vupklsw (vsi);
+ VUPKLSW altivec_vupklsw {}
+
+ const vsq __builtin_bcdadd_v1ti (vsq, vsq, const int<1>);
+ BCDADD_V1TI bcdadd_v1ti {}
+
+ const vsc __builtin_bcdadd_v16qi (vsc, vsc, const int<1>);
+ BCDADD_V16QI bcdadd_v16qi {}
+
+ const signed int __builtin_bcdadd_eq_v1ti (vsq, vsq, const int<1>);
+ BCDADD_EQ_V1TI bcdadd_eq_v1ti {}
+
+ const signed int __builtin_bcdadd_eq_v16qi (vsc, vsc, const int<1>);
+ BCDADD_EQ_V16QI bcdadd_eq_v16qi {}
+
+ const signed int __builtin_bcdadd_gt_v1ti (vsq, vsq, const int<1>);
+ BCDADD_GT_V1TI bcdadd_gt_v1ti {}
+
+ const signed int __builtin_bcdadd_gt_v16qi (vsc, vsc, const int<1>);
+ BCDADD_GT_V16QI bcdadd_gt_v16qi {}
+
+ const signed int __builtin_bcdadd_lt_v1ti (vsq, vsq, const int<1>);
+ BCDADD_LT_V1TI bcdadd_lt_v1ti {}
+
+ const signed int __builtin_bcdadd_lt_v16qi (vsc, vsc, const int<1>);
+ BCDADD_LT_V16QI bcdadd_lt_v16qi {}
+
+ const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>);
+ BCDADD_OV_V1TI bcdadd_unordered_v1ti {}
+
+ const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>);
+ BCDADD_OV_V16QI bcdadd_unordered_v16qi {}
+
+ const signed int __builtin_bcdinvalid_v1ti (vsq);
+ BCDINVALID_V1TI bcdinvalid_v1ti {}
+
+ const signed int __builtin_bcdinvalid_v16qi (vsc);
+ BCDINVALID_V16QI bcdinvalid_v16qi {}
+
+ const vsq __builtin_bcdsub_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_V1TI bcdsub_v1ti {}
+
+ const vsc __builtin_bcdsub_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_V16QI bcdsub_v16qi {}
+
+ const signed int __builtin_bcdsub_eq_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_EQ_V1TI bcdsub_eq_v1ti {}
+
+ const signed int __builtin_bcdsub_eq_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_EQ_V16QI bcdsub_eq_v16qi {}
+
+ const signed int __builtin_bcdsub_ge_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_GE_V1TI bcdsub_ge_v1ti {}
+
+ const signed int __builtin_bcdsub_ge_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_GE_V16QI bcdsub_ge_v16qi {}
+
+ const signed int __builtin_bcdsub_gt_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_GT_V1TI bcdsub_gt_v1ti {}
+
+ const signed int __builtin_bcdsub_gt_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_GT_V16QI bcdsub_gt_v16qi {}
+
+ const signed int __builtin_bcdsub_le_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_LE_V1TI bcdsub_le_v1ti {}
+
+ const signed int __builtin_bcdsub_le_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_LE_V16QI bcdsub_le_v16qi {}
+
+ const signed int __builtin_bcdsub_lt_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_LT_V1TI bcdsub_lt_v1ti {}
+
+ const signed int __builtin_bcdsub_lt_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_LT_V16QI bcdsub_lt_v16qi {}
+
+ const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>);
+ BCDSUB_OV_V1TI bcdsub_unordered_v1ti {}
+
+ const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>);
+ BCDSUB_OV_V16QI bcdsub_unordered_v16qi {}
+
+ const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
+ VPERMXOR_V16QI crypto_vpermxor_v16qi {}
+
+ const vull __builtin_crypto_vpermxor_v2di (vull, vull, vull);
+ VPERMXOR_V2DI crypto_vpermxor_v2di {}
+
+ const vui __builtin_crypto_vpermxor_v4si (vui, vui, vui);
+ VPERMXOR_V4SI crypto_vpermxor_v4si {}
+
+ const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus);
+ VPERMXOR_V8HI crypto_vpermxor_v8hi {}
+
+ const vuc __builtin_crypto_vpmsumb (vuc, vuc);
+ VPMSUMB crypto_vpmsumb {}
+
+ const vull __builtin_crypto_vpmsumd (vull, vull);
+ VPMSUMD crypto_vpmsumd {}
+
+ const vus __builtin_crypto_vpmsumh (vus, vus);
+ VPMSUMH crypto_vpmsumh {}
+
+ const vui __builtin_crypto_vpmsumw (vui, vui);
+ VPMSUMW crypto_vpmsumw {}
+
+ const vf __builtin_vsx_float2_v2df (vd, vd);
+ FLOAT2_V2DF float2_v2df {}
+
+ const vf __builtin_vsx_float2_v2di (vsll, vsll);
+ FLOAT2_V2DI float2_v2di {}
+
+ const vsc __builtin_vsx_revb_v16qi (vsc);
+ REVB_V16QI revb_v16qi {}
+
+ const vsq __builtin_vsx_revb_v1ti (vsq);
+ REVB_V1TI revb_v1ti {}
+
+ const vd __builtin_vsx_revb_v2df (vd);
+ REVB_V2DF revb_v2df {}
+
+ const vsll __builtin_vsx_revb_v2di (vsll);
+ REVB_V2DI revb_v2di {}
+
+ const vf __builtin_vsx_revb_v4sf (vf);
+ REVB_V4SF revb_v4sf {}
+
+ const vsi __builtin_vsx_revb_v4si (vsi);
+ REVB_V4SI revb_v4si {}
+
+ const vss __builtin_vsx_revb_v8hi (vss);
+ REVB_V8HI revb_v8hi {}
+
+ const vf __builtin_vsx_uns_float2_v2di (vsll, vsll);
+ UNS_FLOAT2_V2DI uns_float2_v2di {}
+
+ const vsi __builtin_vsx_vsigned2_v2df (vd, vd);
+ VEC_VSIGNED2_V2DF vsigned2_v2df {}
+
+ const vsi __builtin_vsx_vunsigned2_v2df (vd, vd);
+ VEC_VUNSIGNED2_V2DF vunsigned2_v2df {}
+
+ const vf __builtin_vsx_xscvdpspn (double);
+ XSCVDPSPN vsx_xscvdpspn {}
+
+ const double __builtin_vsx_xscvspdpn (vf);
+ XSCVSPDPN vsx_xscvspdpn {}
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a295ff5..164f586 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -771,6 +771,7 @@ Objective-C and Objective-C++ Dialects}.
-mverbose-cost-dump @gol
-mpure-code @gol
-mcmse @gol
+-mfix-cmse-cve-2021-35465 @gol
-mfdpic}
@emph{AVR Options}
@@ -14349,9 +14350,10 @@ The parameter only has an effect on targets that support partial
vector loads and stores.
@item vect-inner-loop-cost-factor
-The factor which the loop vectorizer applies to the cost of statements
-in an inner loop relative to the loop being vectorized. The default
-value is 50.
+The maximum factor which the loop vectorizer applies to the cost of statements
+in an inner loop relative to the loop being vectorized. The factor applied
+is the maximum of the estimated number of iterations of the inner loop and
+this parameter. The default value of this parameter is 50.
@item avoid-fma-max-bits
Maximum number of bits for which we avoid creating FMAs.
@@ -20701,6 +20703,14 @@ Generate secure code as per the "ARMv8-M Security Extensions: Requirements on
Development Tools Engineering Specification", which can be found on
@url{https://developer.arm.com/documentation/ecm0359818/latest/}.
+@item -mfix-cmse-cve-2021-35465
+@opindex mfix-cmse-cve-2021-35465
+Mitigate against a potential security issue with the @code{VLLDM} instruction
+in some M-profile devices when using CMSE (CVE-2021-365465). This option is
+enabled by default when the option @option{-mcpu=} is used with
+@code{cortex-m33}, @code{cortex-m35p} or @code{cortex-m55}. The option
+@option{-mno-fix-cmse-cve-2021-35465} can be used to disable the mitigation.
+
@item -mfdpic
@itemx -mno-fdpic
@opindex mfdpic
diff --git a/gcc/params.opt b/gcc/params.opt
index f926488..f414dc1 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1114,7 +1114,7 @@ Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange
Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2.
-param=vect-inner-loop-cost-factor=
-Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 999999) Param Optimization
-The factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
+Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization
+The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized.
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/testsuite/gcc.dg/predict-1.c b/gcc/testsuite/gcc.dg/predict-1.c
index 9e5605a..d2e753e 100644
--- a/gcc/testsuite/gcc.dg/predict-1.c
+++ b/gcc/testsuite/gcc.dg/predict-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate --disable-tree-evrp" } */
extern int global;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c b/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c
new file mode 100644
index 0000000..8ee8e3c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c
@@ -0,0 +1,144 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-evrp" } */
+
+/* Simple tests to make sure transitives are working. */
+void keep();
+void kill();
+
+void
+f1 (int x, int y, int z)
+{
+ if (x > y)
+ if (y > z)
+ {
+ if (x > z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f2 (int w, int x, int y, int z)
+{
+ // Test one equivalence.
+ if (w == z)
+ if (x > y)
+ if (y > z)
+ {
+ if (x > w)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f3 (int a, int w, int x, int y, int z)
+{
+ // Test two equivlaences.
+ if (a == x)
+ if (w == z)
+ if (x > y)
+ if (y > z)
+ {
+ if (a > w)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f4 (int x, int y, int z)
+{
+ // test X > Y >= Z
+ if (x > y)
+ if (y >= z)
+ {
+ if (x > z)
+ keep ();
+ else
+ kill ();
+ }
+}
+void
+f5 (int x, int y, int z)
+{
+ // test X >= Y > Z
+ if (x >= y)
+ if (y > z)
+ {
+ if (x > z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f6 (int x, int y, int z)
+{
+ // test X >= Y >= Z
+ if (x >= y)
+ if (y >= z)
+ {
+ if (x > z)
+ keep ();
+ else if (x == z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f7 (int x, int y, int z)
+{
+ // test Y <= X , Z <= Y
+ if (y <= x)
+ if (z <= y)
+ {
+ if (x > z)
+ keep ();
+ else if (x == z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f8 (int x, int y, int z)
+{
+ // test X >= Y, Z <= Y
+ if (x >= y)
+ if (z <= y)
+ {
+ if (x > z)
+ keep ();
+ else if (x == z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+void
+f9 (int x, int y, int z)
+{
+ // test Y <= X Y >= Z
+ if (y <= x)
+ if (y >= z)
+ {
+ if (x > z)
+ keep ();
+ else if (x == z)
+ keep ();
+ else
+ kill ();
+ }
+}
+
+/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */
+/* { dg-final { scan-tree-dump-times "keep" 13 "evrp"} } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c
new file mode 100644
index 0000000..553cc78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */
+
+#include "../../../cmse-13.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r1, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[1,4-9\]|r10|fp|ip), ){9}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[1,4-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c
new file mode 100644
index 0000000..ce02fde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */
+
+#include "../../../cmse-7.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c
new file mode 100644
index 0000000..75e1611
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */
+
+#include "../../../cmse-8.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c
new file mode 100644
index 0000000..dad7266
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16 -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+
+#include "../../../cmse-7.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c
new file mode 100644
index 0000000..faa0448
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16 -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+
+#include "../../../cmse-8.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c
new file mode 100644
index 0000000..bceba44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+
+#include "../../../cmse-13.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r1, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[1,4-9\]|r10|fp|ip), ){9}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[1,4-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c
new file mode 100644
index 0000000..c74ebbd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+
+#include "../../../cmse-7.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c
new file mode 100644
index 0000000..ffb67a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */
+/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+
+#include "../../../cmse-8.x"
+
+/* Checks for saving and clearing prior to function call. */
+/* Shift on the same register as blxns. */
+/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+/* { dg-final { scan-assembler "vlstm\tsp" } } */
+/* Check the right registers are cleared and none appears twice. */
+/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */
+/* Check that the right number of registers is cleared and thus only one
+ register is missing. */
+/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */
+/* Check that no cleared register is used for blxns. */
+/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */
+/* Check for v8.1-m variant of erratum work-around. */
+/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */
+/* { dg-final { scan-assembler "vlldm\tsp" } } */
+/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */
+
+/* Now we check that we use the correct intrinsic to call. */
+/* { dg-final { scan-assembler "blxns" } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
index 78bf5d3..fbc3de0 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c
@@ -1,7 +1,8 @@
/* PR target/95524 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512bw" } */
-/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } } */
+/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 2 } } */
+/* { dg-final { scan-assembler-times "vpternlogd\[^\n\]*%zmm" 1 } } */
typedef char v64qi __attribute__ ((vector_size (64)));
typedef unsigned char v64uqi __attribute__ ((vector_size (64)));
@@ -11,7 +12,6 @@ foo_ashiftrt_512 (v64qi a)
return a >> 2;
}
/* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */
-/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */
/* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */
__attribute__((noipa)) v64qi
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
index 77ace86..e5616d8 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c
@@ -5,4 +5,3 @@
/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */
-/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
index 80e9fdb..6d9cb91 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c
@@ -5,7 +5,6 @@
/* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-6b.c b/gcc/testsuite/gcc.target/i386/pr100865-6b.c
index 35f2e96..9588249 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-6b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-6b.c
@@ -4,9 +4,7 @@
#include "pr100865-6a.c"
/* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu32\[\\t \]%ymm\[0-9\]+, " 8 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7b.c b/gcc/testsuite/gcc.target/i386/pr100865-7b.c
index ad267c4..3b20c68 100644
--- a/gcc/testsuite/gcc.target/i386/pr100865-7b.c
+++ b/gcc/testsuite/gcc.target/i386/pr100865-7b.c
@@ -5,8 +5,6 @@
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 { target ia32 } } } */
-/* { dg-final { scan-assembler-times "vmovdqu64\[\\t \]%ymm\[0-9\]+, " 16 { target { ! ia32 } } } } */
-/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */
-/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */
+/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */
/* { dg-final { scan-assembler-not "vmovdqa" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr101989-1.c b/gcc/testsuite/gcc.target/i386/pr101989-1.c
new file mode 100644
index 0000000..594093e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101989-1.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-times "vpternlog" 6 } } */
+/* { dg-final { scan-assembler-not "vpxor" } } */
+/* { dg-final { scan-assembler-not "vpor" } } */
+/* { dg-final { scan-assembler-not "vpand" } } */
+
+#include<immintrin.h>
+__m256d
+__attribute__((noipa, target("avx512vl")))
+copysign2_pd(__m256d from, __m256d to) {
+ __m256i a = _mm256_castpd_si256(from);
+ __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
+ /* (avx_signbit & from) | (~avx_signbit & to) */
+ return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to));
+}
+
+__m256i
+__attribute__((noipa, target("avx512vl")))
+foo (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & ~src1) | (src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo1 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (src3 & ~src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo2 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (~src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo3 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (~src2 & src1) | (src3 & src1);
+}
+
+__m256i
+__attribute__ ((noipa, target("avx512vl")))
+foo4 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return src3 & src2 ^ src1;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr101989-2.c b/gcc/testsuite/gcc.target/i386/pr101989-2.c
new file mode 100644
index 0000000..9d9759a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101989-2.c
@@ -0,0 +1,102 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+
+#include "avx512f-helper.h"
+
+#include "pr101989-1.c"
+__m256d
+avx2_copysign2_pd (__m256d from, __m256d to) {
+ __m256i a = _mm256_castpd_si256(from);
+ __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
+ /* (avx_signbit & from) | (~avx_signbit & to) */
+ return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to));
+}
+
+__m256i
+avx2_foo (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & ~src1) | (src3 & src1);
+}
+
+__m256i
+avx2_foo1 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (src3 & ~src1);
+}
+
+__m256i
+avx2_foo2 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (src2 & src1) | (~src3 & src1);
+}
+
+__m256i
+avx2_foo3 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return (~src2 & src1) | (src3 & src1);
+}
+
+__m256i
+avx2_foo4 (__m256i src1, __m256i src2, __m256i src3)
+{
+ return src3 & src2 ^ src1;
+}
+
+
+void
+test_256 (void)
+{
+ union256i_q q1, q2, q3, res2, exp2;
+ union256d d1, d2, res1, exp1;
+ int i, sign = 1;
+
+ for (i = 0; i < 4; i++)
+ {
+ d1.a[i] = 12.34 * (i + 2000) * sign;
+ d2.a[i] = 56.78 * (i - 30) * sign;
+ q1.a[i] = 12 * (i + 2000) * sign;
+ q2.a[i] = 56 * (i - 30) * sign;
+ q3.a[i] = 90 * (i + 40) * sign;
+ res1.a[i] = DEFAULT_VALUE;
+ exp1.a[i] = DEFAULT_VALUE;
+ res2.a[i] = exp2.a[i] = -1;
+ sign = -sign;
+ }
+
+ exp1.x = avx2_copysign2_pd (d1.x, d2.x);
+ res1.x = copysign2_pd (d1.x, d2.x);
+ if (UNION_CHECK (256, d) (res1, exp1.a))
+ abort ();
+
+ exp2.x = avx2_foo1 (q1.x, q2.x, q3.x);
+ res2.x = foo1 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo2 (q1.x, q2.x, q3.x);
+ res2.x = foo2 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo3 (q1.x, q2.x, q3.x);
+ res2.x = foo3 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo4 (q1.x, q2.x, q3.x);
+ res2.x = foo4 (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+
+ exp2.x = avx2_foo (q1.x, q2.x, q3.x);
+ res2.x = foo (q1.x, q2.x, q3.x);
+ if (UNION_CHECK (256, i_q) (res2, exp2.a))
+ abort ();
+}
+
+static void
+test_128 ()
+{}
diff --git a/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c b/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c
new file mode 100644
index 0000000..d03d192
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpternlog" 4 } } */
+/* { dg-final { scan-assembler-times "\\\{1to4\\\}" 4 } } */
+#include<immintrin.h>
+extern long long C;
+__m256d
+copysign2_pd(__m256d from, __m256d to) {
+ __m256i a = _mm256_castpd_si256(from);
+ __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63));
+ /* (avx_signbit & from) | (~avx_signbit & to) */
+ return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to));
+}
+
+__m256i
+mask_pternlog (__m256i A, __m256i B, __mmask8 U)
+{
+ return _mm256_mask_ternarylogic_epi64 (A, U, B, _mm256_set1_epi64x (C) ,202);
+}
+
+__m256i
+maskz_pternlog (__m256i A, __m256i B, __mmask8 U)
+{
+ return _mm256_maskz_ternarylogic_epi64 (U, A, B, _mm256_set1_epi64x (C) ,202);
+}
+
+__m256i
+none_pternlog (__m256i A, __m256i B)
+{
+ return _mm256_ternarylogic_epi64 (A, B, _mm256_set1_epi64x (C) ,202);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr102021.c b/gcc/testsuite/gcc.target/i386/pr102021.c
new file mode 100644
index 0000000..6db3f57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr102021.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=skylake-avx512" } */
+
+#include<immintrin.h>
+
+__m256i
+foo ()
+{
+ return _mm256_set1_epi16 (12);
+}
+
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-not "vmovdqa" } } */
+/* { dg-final { scan-assembler-not "vzeroupper" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 66ce48d..06f5b1e 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4878,15 +4878,16 @@ proc check_effective_target_arm_cmse_ok {} {
proc check_effective_target_arm_cmse_hw { } {
return [check_runtime arm_cmse_hw_available {
- int __attribute__ ((cmse_nonsecure_entry)) ns_func(void)
- {
- return 0;
- }
int main (void)
{
- return ns_func();
- }
- } "-mcmse -Wl,--section-start,.gnu.sgstubs=0x00400000"]
+ unsigned id_pfr1;
+ asm ("ldr\t%0, =0xe000ed44\n" \
+ "ldr\t%0, [%0]\n" \
+ "sg" : "=l" (id_pfr1));
+ /* Exit with code 0 iff security extension is available. */
+ return !(id_pfr1 & 0xf0);
+ }
+ } "-mcmse"]
}
# Return 1 if the target supports executing MVE instructions, 0
# otherwise.
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c521b43a..0c8d992 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1519,6 +1519,13 @@ vect_analyze_loop_form (class loop *loop, vec_info_shared *shared)
stmt_vec_info inner_loop_cond_info
= loop_vinfo->lookup_stmt (inner_loop_cond);
STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type;
+ /* If we have an estimate on the number of iterations of the inner
+ loop use that to limit the scale for costing, otherwise use
+ --param vect-inner-loop-cost-factor literally. */
+ widest_int nit;
+ if (estimated_stmt_executions (loop->inner, &nit))
+ LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo)
+ = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
}
gcc_assert (!loop->aux);
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index d2f6a16..edc11c6 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -5233,7 +5233,8 @@ li_cost_vec_cmp (const void *a_, const void *b_)
static bool
vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
- vec<slp_instance> slp_instances)
+ vec<slp_instance> slp_instances,
+ loop_p orig_loop)
{
slp_instance instance;
int i;
@@ -5270,6 +5271,30 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
vector_costs.safe_splice (instance->cost_vec);
instance->cost_vec.release ();
}
+ /* When we're vectorizing an if-converted loop body with the
+ very-cheap cost model make sure we vectorized all if-converted
+ code. */
+ bool force_not_profitable = false;
+ if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP)
+ {
+ gcc_assert (bb_vinfo->bbs.length () == 1);
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ /* The costing above left us with DCEable vectorized scalar
+ stmts having the visited flag set. */
+ if (gimple_visited_p (gsi_stmt (gsi)))
+ continue;
+
+ if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
+ if (gimple_assign_rhs_code (ass) == COND_EXPR)
+ {
+ force_not_profitable = true;
+ break;
+ }
+ }
+ }
+
/* Unset visited flag. */
stmt_info_for_cost *cost;
FOR_EACH_VEC_ELT (scalar_costs, i, cost)
@@ -5394,9 +5419,14 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
return false;
}
+ if (dump_enabled_p () && force_not_profitable)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "not profitable because of unprofitable if-converted "
+ "scalar code\n");
+
scalar_costs.release ();
vector_costs.release ();
- return true;
+ return !force_not_profitable;
}
/* qsort comparator for lane defs. */
@@ -5810,7 +5840,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
static bool
vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
- vec<int> *dataref_groups, unsigned int n_stmts)
+ vec<int> *dataref_groups, unsigned int n_stmts,
+ loop_p orig_loop)
{
bb_vec_info bb_vinfo;
auto_vector_modes vector_modes;
@@ -5859,7 +5890,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
vect_location = instance->location ();
if (!unlimited_cost_model (NULL)
&& !vect_bb_vectorization_profitable_p
- (bb_vinfo, instance->subgraph_entries))
+ (bb_vinfo,
+ orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
+ : instance->subgraph_entries, orig_loop))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5877,7 +5910,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
"using SLP\n");
vectorized = true;
- vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
+ vect_schedule_slp (bb_vinfo,
+ orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo)
+ : instance->subgraph_entries);
unsigned HOST_WIDE_INT bytes;
if (dump_enabled_p ())
@@ -5892,6 +5927,11 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
"basic block part vectorized using "
"variable length vectors\n");
}
+
+ /* When we're called from loop vectorization we're considering
+ all subgraphs at once. */
+ if (orig_loop)
+ break;
}
}
else
@@ -5959,7 +5999,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
true if anything in the basic-block was vectorized. */
static bool
-vect_slp_bbs (const vec<basic_block> &bbs)
+vect_slp_bbs (const vec<basic_block> &bbs, loop_p orig_loop)
{
vec<data_reference_p> datarefs = vNULL;
auto_vec<int> dataref_groups;
@@ -5989,18 +6029,20 @@ vect_slp_bbs (const vec<basic_block> &bbs)
++current_group;
}
- return vect_slp_region (bbs, datarefs, &dataref_groups, insns);
+ return vect_slp_region (bbs, datarefs, &dataref_groups, insns, orig_loop);
}
-/* Main entry for the BB vectorizer. Analyze and transform BB, returns
- true if anything in the basic-block was vectorized. */
+/* Special entry for the BB vectorizer. Analyze and transform a single
+ if-converted BB with ORIG_LOOPs body being the not if-converted
+ representation. Returns true if anything in the basic-block was
+ vectorized. */
bool
-vect_slp_bb (basic_block bb)
+vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop)
{
auto_vec<basic_block> bbs;
bbs.safe_push (bb);
- return vect_slp_bbs (bbs);
+ return vect_slp_bbs (bbs, orig_loop);
}
/* Main entry for the BB vectorizer. Analyze and transform BB, returns
@@ -6051,7 +6093,7 @@ vect_slp_function (function *fun)
if (split && !bbs.is_empty ())
{
- r |= vect_slp_bbs (bbs);
+ r |= vect_slp_bbs (bbs, NULL);
bbs.truncate (0);
bbs.quick_push (bb);
}
@@ -6069,13 +6111,13 @@ vect_slp_function (function *fun)
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"splitting region at control altering "
"definition %G", last);
- r |= vect_slp_bbs (bbs);
+ r |= vect_slp_bbs (bbs, NULL);
bbs.truncate (0);
}
}
if (!bbs.is_empty ())
- r |= vect_slp_bbs (bbs);
+ r |= vect_slp_bbs (bbs, NULL);
free (rpo);
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 813f468..3aa3e2a 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1033,10 +1033,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
only non-if-converted parts took part in BB vectorization. */
if (flag_tree_slp_vectorize != 0
&& loop_vectorized_call
- && ! loop->inner
- /* This would purely be a workaround and should be removed
- once PR100089 is fixed. */
- && flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
+ && ! loop->inner)
{
basic_block bb = loop->header;
bool require_loop_vectorize = false;
@@ -1062,12 +1059,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
gimple_set_uid (stmt, -1);
gimple_set_visited (stmt, false);
}
- if (!require_loop_vectorize && vect_slp_bb (bb))
+ if (!require_loop_vectorize)
{
- fold_loop_internal_call (loop_vectorized_call,
- boolean_true_node);
- loop_vectorized_call = NULL;
- ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+ tree arg = gimple_call_arg (loop_vectorized_call, 1);
+ class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
+ if (vect_slp_if_converted_bb (bb, scalar_loop))
+ {
+ fold_loop_internal_call (loop_vectorized_call,
+ boolean_true_node);
+ loop_vectorized_call = NULL;
+ ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals;
+ }
}
}
/* If outer loop vectorization fails for LOOP_VECTORIZED guarded
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 9c2c29d..72e018e 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2087,7 +2087,7 @@ extern void vect_gather_slp_loads (vec_info *);
extern void vect_get_slp_defs (slp_tree, vec<tree> *);
extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *,
unsigned n = -1U);
-extern bool vect_slp_bb (basic_block);
+extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop);
extern bool vect_slp_function (function *);
extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree);
diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index bcfe388..8edd98b 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -112,7 +112,7 @@ relation_kind rr_intersect_table[VREL_COUNT][VREL_COUNT] = {
{ NE_EXPR, LT_EXPR, LT_EXPR, GT_EXPR, GT_EXPR, VREL_EMPTY, VREL_EMPTY, NE_EXPR } };
-// Intersect relation R! with relation R2 and return the resulting relation.
+// Intersect relation R1 with relation R2 and return the resulting relation.
relation_kind
relation_intersect (relation_kind r1, relation_kind r2)
@@ -155,6 +155,39 @@ relation_union (relation_kind r1, relation_kind r2)
}
+// This table is used to determine transitivity between 2 relations.
+// (A relation0 B) and (B relation1 C) implies (A result C)
+
+relation_kind rr_transitive_table[VREL_COUNT][VREL_COUNT] = {
+// NONE, LT_EXPR, LE_EXPR, GT_EXPR, GE_EXPR, EMPTY, EQ_EXPR, NE_EXPR
+// VREL_NONE
+ { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE },
+// LT_EXPR
+ { VREL_NONE, LT_EXPR, LT_EXPR, VREL_NONE, VREL_NONE, VREL_NONE, LT_EXPR, VREL_NONE },
+// LE_EXPR
+ { VREL_NONE, LT_EXPR, LE_EXPR, VREL_NONE, VREL_NONE, VREL_NONE, LE_EXPR, VREL_NONE },
+// GT_EXPR
+ { VREL_NONE, VREL_NONE, VREL_NONE, GT_EXPR, GT_EXPR, VREL_NONE, GT_EXPR, VREL_NONE },
+// GE_EXPR
+ { VREL_NONE, VREL_NONE, VREL_NONE, GT_EXPR, GE_EXPR, VREL_NONE, GE_EXPR, VREL_NONE },
+// VREL_EMPTY
+ { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE },
+// EQ_EXPR
+ { VREL_NONE, LT_EXPR, LE_EXPR, GT_EXPR, GE_EXPR, VREL_NONE, EQ_EXPR, VREL_NONE },
+// NE_EXPR
+ { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE } };
+
+// Apply transitive operation between relation R1 and relation R2, and
+// return the resulting relation, if any.
+
+relation_kind
+relation_transitive (relation_kind r1, relation_kind r2)
+{
+ vrel_range_assert (r1);
+ vrel_range_assert (r2);
+ return rr_transitive_table[r1 - VREL_FIRST][r2 - VREL_FIRST];
+}
+
// -------------------------------------------------------------------------
// This class represents an equivalency set, and contains a link to the next
@@ -472,7 +505,7 @@ public:
bool union_ (value_relation &p);
bool intersect (value_relation &p);
void negate ();
- void swap ();
+ bool apply_transitive (const value_relation &rel);
void dump (FILE *f) const;
private:
@@ -517,14 +550,6 @@ value_relation::negate ()
related = relation_negate (related);
}
-// Modify the relation as if the operands were being swapped.
-
-void
-value_relation::swap ()
-{
- related = relation_swap (related);
-}
-
// Perform an intersection between 2 relations. *this &&= p.
bool
@@ -561,6 +586,73 @@ value_relation::union_ (value_relation &p)
return old != related;
}
+// Identify and apply any transitive relations between REL
+// and THIS. Return true if there was a transformation.
+
+bool
+value_relation::apply_transitive (const value_relation &rel)
+{
+ relation_kind k = VREL_NONE;
+
+ // Idenity any common operand, and notrmalize the relations to
+ // the form : A < B B < C produces A < C
+ if (rel.op1 () == name2)
+ {
+ // A < B B < C
+ if (rel.op2 () == name1)
+ return false;
+ k = relation_transitive (kind (), rel.kind ());
+ if (k != VREL_NONE)
+ {
+ related = k;
+ name2 = rel.op2 ();
+ return true;
+ }
+ }
+ else if (rel.op1 () == name1)
+ {
+ // B > A B < C
+ if (rel.op2 () == name2)
+ return false;
+ k = relation_transitive (relation_swap (kind ()), rel.kind ());
+ if (k != VREL_NONE)
+ {
+ related = k;
+ name1 = name2;
+ name2 = rel.op2 ();
+ return true;
+ }
+ }
+ else if (rel.op2 () == name2)
+ {
+ // A < B C > B
+ if (rel.op1 () == name1)
+ return false;
+ k = relation_transitive (kind (), relation_swap (rel.kind ()));
+ if (k != VREL_NONE)
+ {
+ related = k;
+ name2 = rel.op1 ();
+ return true;
+ }
+ }
+ else if (rel.op2 () == name1)
+ {
+ // B > A C > B
+ if (rel.op1 () == name2)
+ return false;
+ k = relation_transitive (relation_swap (kind ()),
+ relation_swap (rel.kind ()));
+ if (k != VREL_NONE)
+ {
+ related = k;
+ name1 = name2;
+ name2 = rel.op1 ();
+ return true;
+ }
+ }
+ return false;
+}
// Dump the relation to file F.
@@ -597,6 +689,7 @@ relation_oracle::relation_oracle ()
m_relations.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1);
m_relation_set = BITMAP_ALLOC (&m_bitmaps);
m_tmp = BITMAP_ALLOC (&m_bitmaps);
+ m_tmp2 = BITMAP_ALLOC (&m_bitmaps);
}
// Destruct a relation oracle.
@@ -669,10 +762,12 @@ relation_oracle::register_relation (edge e, relation_kind k, tree op1,
// Register relation K between OP! and OP2 in block BB.
// This creates the record and searches for existing records in the dominator
// tree to merge with.
+// TRANSITIVE_P is true if this is being registered as a transitive operation,
+// and should not try to register further transitives.
void
relation_oracle::register_relation (basic_block bb, relation_kind k, tree op1,
- tree op2)
+ tree op2, bool transitive_p)
{
gcc_checking_assert (k != VREL_NONE);
@@ -710,26 +805,160 @@ relation_oracle::register_relation (basic_block bb, relation_kind k, tree op1,
ptr->dump (dump_file);
fprintf (dump_file, "\n");
}
- return;
+ }
+ else
+ {
+ // Check for an existing relation further up the DOM chain.
+ // By including dominating relations, The first one found in any search
+ // will be the aggregate of all the previous ones.
+ curr = find_relation_dom (bb, v1, v2);
+ if (curr != VREL_NONE)
+ k = relation_intersect (curr, k);
+
+ bitmap_set_bit (bm, v1);
+ bitmap_set_bit (bm, v2);
+ bitmap_set_bit (m_relation_set, v1);
+ bitmap_set_bit (m_relation_set, v2);
+
+ ptr = (relation_chain *) obstack_alloc (&m_chain_obstack,
+ sizeof (relation_chain));
+ ptr->set_relation (k, op1, op2);
+ ptr->m_next = m_relations[bbi].m_head;
+ m_relations[bbi].m_head = ptr;;
}
- // Check for an existing relation further up the DOM chain.
- // By including dominating relations, The first one found in any search
- // will be the aggregate of all the previous ones.
- curr = find_relation_dom (bb, v1, v2);
- if (curr != VREL_NONE)
- k = relation_intersect (curr, k);
-
- bitmap_set_bit (bm, v1);
- bitmap_set_bit (bm, v2);
- bitmap_set_bit (m_relation_set, v1);
- bitmap_set_bit (m_relation_set, v2);
-
- ptr = (relation_chain *) obstack_alloc (&m_chain_obstack,
- sizeof (relation_chain));
- ptr->set_relation (k, op1, op2);
- ptr->m_next = m_relations[bbi].m_head;
- m_relations[bbi].m_head = ptr;;
+ if (!transitive_p)
+ register_transitives (bb, *ptr);
+}
+
+// Starting at ROOT_BB search the DOM tree looking for relations which
+// may produce transitive relations to RELATION. EQUIV1 and EQUIV2 are
+// bitmaps for op1/op2 and any of their equivalences that should also be
+// considered.
+
+void
+relation_oracle::register_transitives (basic_block root_bb,
+ const value_relation &relation,
+ const_bitmap equiv1,
+ const_bitmap equiv2)
+{
+ basic_block bb;
+ for (bb = root_bb; bb; bb = get_immediate_dominator (CDI_DOMINATORS, bb))
+ {
+ int bbi = bb->index;
+ if (bbi >= (int)m_relations.length())
+ continue;
+ const_bitmap bm = m_relations[bbi].m_names;
+ if (!bm)
+ continue;
+ if (!bitmap_intersect_p (bm, equiv1) && !bitmap_intersect_p (bm, equiv2))
+ continue;
+ // At least one of the 2 ops has a relation in this block.
+ relation_chain *ptr;
+ for (ptr = m_relations[bbi].m_head; ptr ; ptr = ptr->m_next)
+ {
+ // In the presence of an equivalence, 2 operands may do not
+ // naturally match. ie with equivalence a_2 == b_3
+ // given c_1 < a_2 && b_3 < d_4
+ // convert the second relation (b_3 < d_4) to match any
+ // equivalences to found in the first relation.
+ // ie convert b_3 < d_4 to a_2 < d_4, which then exposes the
+ // transitive operation: c_1 < a_2 && a_2 < d_4 -> c_1 < d_4
+
+ tree r1, r2;
+ tree p1 = ptr->op1 ();
+ tree p2 = ptr->op2 ();
+ // Find which equivalence is in the first operand.
+ if (bitmap_bit_p (equiv1, SSA_NAME_VERSION (p1)))
+ r1 = p1;
+ else if (bitmap_bit_p (equiv1, SSA_NAME_VERSION (p2)))
+ r1 = p2;
+ else
+ r1 = NULL_TREE;
+
+ // Find which equivalence is in the second operand.
+ if (bitmap_bit_p (equiv2, SSA_NAME_VERSION (p1)))
+ r2 = p1;
+ else if (bitmap_bit_p (equiv2, SSA_NAME_VERSION (p2)))
+ r2 = p2;
+ else
+ r2 = NULL_TREE;
+
+ // Ignore if both NULL (not relevant relation) or the same,
+ if (r1 == r2)
+ continue;
+
+ // Any operand not an equivalence, just take the real operand.
+ if (!r1)
+ r1 = relation.op1 ();
+ if (!r2)
+ r2 = relation.op2 ();
+
+ value_relation nr (relation.kind (), r1, r2);
+ if (nr.apply_transitive (*ptr))
+ {
+ register_relation (root_bb, nr.kind (), nr.op1 (), nr.op2 (),
+ true);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, " Registering transitive relation ");
+ nr.dump (dump_file);
+ fputc ('\n', dump_file);
+ }
+ }
+
+ }
+ }
+}
+
+// Find adn register any transitive relations implied by RELATION occuring
+// in block BB.
+
+void
+relation_oracle::register_transitives (basic_block bb,
+ const value_relation &relation)
+{
+ // Only apply transitives to certain kinds of operations.
+ switch (relation.kind ())
+ {
+ case LE_EXPR:
+ case LT_EXPR:
+ case GT_EXPR:
+ case GE_EXPR:
+ break;
+ default:
+ return;
+ }
+
+ // Set up the bitmaps for op1 and op2, and if there are no equivalencies,
+ // set just op1 or op2 in their own bitmap.
+ const_bitmap equiv1 = equiv_set (relation.op1 (), bb);
+ const_bitmap equiv2 = equiv_set (relation.op2 (), bb);
+ if (equiv1)
+ {
+ if (equiv2)
+ register_transitives (bb, relation, equiv1, equiv2);
+ else
+ {
+ bitmap_clear (m_tmp);
+ bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op2 ()));
+ register_transitives (bb, relation, equiv1, m_tmp);
+ }
+ }
+ else if (equiv2)
+ {
+ bitmap_clear (m_tmp);
+ bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op1 ()));
+ register_transitives (bb, relation, m_tmp, equiv2);
+ }
+ else
+ {
+ bitmap_clear (m_tmp);
+ bitmap_clear (m_tmp2);
+ bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op1 ()));
+ bitmap_set_bit (m_tmp2, SSA_NAME_VERSION (relation.op2 ()));
+ register_transitives (bb, relation, m_tmp, m_tmp2);
+ }
}
// Find the relation between any ssa_name in B1 and any name in B2 in block BB.
diff --git a/gcc/value-relation.h b/gcc/value-relation.h
index 1148854..e0e2f82 100644
--- a/gcc/value-relation.h
+++ b/gcc/value-relation.h
@@ -143,7 +143,7 @@ public:
void dump (FILE *f, basic_block bb) const;
void dump (FILE *f) const;
private:
- bitmap m_tmp;
+ bitmap m_tmp, m_tmp2;
bitmap m_relation_set; // Index by ssa-name. True if a relation exists
vec <relation_chain_head> m_relations; // Index by BB, list of relations.
relation_kind find_relation_block (unsigned bb, const_bitmap b1,
@@ -153,7 +153,12 @@ private:
relation_kind find_relation_block (int bb, unsigned v1, unsigned v2,
relation_chain **obj = NULL);
relation_kind find_relation_dom (basic_block bb, unsigned v1, unsigned v2);
- void register_relation (basic_block bb, relation_kind k, tree op1, tree op2);
+ void register_relation (basic_block bb, relation_kind k, tree op1, tree op2,
+ bool transitive_p = false);
+ void register_transitives (basic_block, const class value_relation &);
+ void register_transitives (basic_block, const value_relation &, const_bitmap,
+ const_bitmap);
+
};
#endif /* GCC_VALUE_RELATION_H */