diff options
author | Martin Liska <mliska@suse.cz> | 2021-08-24 16:42:47 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2021-08-24 16:42:47 +0200 |
commit | 7572f9cd10edd3bc1889a8f513dbf77b7f4e470d (patch) | |
tree | 2f77059468d7b182b29483f6fa674fe5d6550652 /gcc | |
parent | eb2de151c582a38efc53ce57416f7bd7a3a9c0eb (diff) | |
parent | 8ce18a29ef717f5920ebf5dc1d9e84570a1827d4 (diff) | |
download | gcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.zip gcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.tar.gz gcc-7572f9cd10edd3bc1889a8f513dbf77b7f4e470d.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc')
39 files changed, 1720 insertions, 110 deletions
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 249995a..bcc9ebe 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -186,6 +186,9 @@ define feature quirk_armv6kz # Cortex-M3 LDRD quirk. define feature quirk_cm3_ldrd +# v8-m/v8.1-m VLLDM errata. +define feature quirk_vlldm + # Don't use .cpu assembly directive define feature quirk_no_asmcpu @@ -322,7 +325,7 @@ define implied vfp_base MVE MVE_FP ALL_FP # architectures. # xscale isn't really a 'quirk', but it isn't an architecture either and we # need to ignore it for matching purposes. -define fgroup ALL_QUIRKS quirk_no_volatile_ce quirk_armv6kz quirk_cm3_ldrd xscale quirk_no_asmcpu +define fgroup ALL_QUIRKS quirk_no_volatile_ce quirk_armv6kz quirk_cm3_ldrd quirk_vlldm xscale quirk_no_asmcpu define fgroup IGNORE_FOR_MULTILIB cdecp0 cdecp1 cdecp2 cdecp3 cdecp4 cdecp5 cdecp6 cdecp7 @@ -1571,6 +1574,7 @@ begin cpu cortex-m33 architecture armv8-m.main+dsp+fp option nofp remove ALL_FP option nodsp remove armv7em + isa quirk_vlldm costs v7m end cpu cortex-m33 @@ -1580,6 +1584,7 @@ begin cpu cortex-m35p architecture armv8-m.main+dsp+fp option nofp remove ALL_FP option nodsp remove armv7em + isa quirk_vlldm costs v7m end cpu cortex-m35p @@ -1591,7 +1596,7 @@ begin cpu cortex-m55 option nomve remove mve mve_float option nofp remove ALL_FP mve_float option nodsp remove MVE mve_float - isa quirk_no_asmcpu + isa quirk_no_asmcpu quirk_vlldm costs v7m vendor 41 end cpu cortex-m55 diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 11dafc7..5c92941 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -3616,6 +3616,15 @@ arm_option_override (void) fix_cm3_ldrd = 0; } + /* Enable fix_vlldm by default if required. */ + if (fix_vlldm == 2) + { + if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_vlldm)) + fix_vlldm = 1; + else + fix_vlldm = 0; + } + /* Hot/Cold partitioning is not currently supported, since we can't handle literal pool placement in that case. */ if (flag_reorder_blocks_and_partition) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 0646048..5d3f21b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -132,9 +132,12 @@ ; TARGET_32BIT, "t1" or "t2" to specify a specific Thumb mode. "v6" ; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without ; arm_arch6. "v6t2" for Thumb-2 with arm_arch6 and "v8mb" for ARMv8-M -; Baseline. This attribute is used to compute attribute "enabled", +; Baseline. "fix_vlldm" is for fixing the v8-m/v8.1-m VLLDM erratum. +; This attribute is used to compute attribute "enabled", ; use type "any" to enable an alternative in all cases. -(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,v6t2,v8mb,iwmmxt,iwmmxt2,armv6_or_vfpv3,neon,mve" +(define_attr "arch" "any, a, t, 32, t1, t2, v6,nov6, v6t2, \ + v8mb, fix_vlldm, iwmmxt, iwmmxt2, armv6_or_vfpv3, \ + neon, mve" (const_string "any")) (define_attr "arch_enabled" "no,yes" @@ -177,6 +180,10 @@ (match_test "TARGET_THUMB1 && arm_arch8")) (const_string "yes") + (and (eq_attr "arch" "fix_vlldm") + (match_test "fix_vlldm")) + (const_string "yes") + (and (eq_attr "arch" "iwmmxt2") (match_test "TARGET_REALLY_IWMMXT2")) (const_string "yes") diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 7417b55..a7677ee 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -268,6 +268,10 @@ Target Var(fix_cm3_ldrd) Init(2) Avoid overlapping destination and address registers on LDRD instructions that may trigger Cortex-M3 errata. +mfix-cmse-cve-2021-35465 +Target Var(fix_vlldm) Init(2) +Mitigate issues with VLLDM on some M-profile devices (CVE-2021-35465). + munaligned-access Target Var(unaligned_access) Init(2) Save Enable unaligned word and halfword accesses to packed data. diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 93e96369..f0030a8 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1703,12 +1703,15 @@ (set_attr "type" "mov_reg")] ) +;; Both this and the next instruction are treated by GCC in the same +;; way as a blockage pattern. That's perhaps stronger than it needs +;; to be, but we do not want accesses to the VFP register bank to be +;; moved across either instruction. + (define_insn "lazy_store_multiple_insn" - [(set (match_operand:SI 0 "s_register_operand" "+&rk") - (post_dec:SI (match_dup 0))) - (unspec_volatile [(const_int 0) - (mem:SI (post_dec:SI (match_dup 0)))] - VUNSPEC_VLSTM)] + [(unspec_volatile + [(mem:BLK (match_operand:SI 0 "s_register_operand" "rk"))] + VUNSPEC_VLSTM)] "use_cmse && reload_completed" "vlstm%?\\t%0" [(set_attr "predicable" "yes") @@ -1716,14 +1719,16 @@ ) (define_insn "lazy_load_multiple_insn" - [(set (match_operand:SI 0 "s_register_operand" "+&rk") - (post_inc:SI (match_dup 0))) - (unspec_volatile:SI [(const_int 0) - (mem:SI (match_dup 0))] - VUNSPEC_VLLDM)] + [(unspec_volatile + [(mem:BLK (match_operand:SI 0 "s_register_operand" "rk,rk"))] + VUNSPEC_VLLDM)] "use_cmse && reload_completed" - "vlldm%?\\t%0" - [(set_attr "predicable" "yes") + "@ + vscclrm\\t{vpr}\;vlldm\\t%0 + vlldm\\t%0" + [(set_attr "arch" "fix_vlldm,*") + (set_attr "predicable" "no") + (set_attr "length" "8,4") (set_attr "type" "load_4")] ) diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 9bf13db..2500dbf 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -579,19 +579,10 @@ ix86_expand_vector_move (machine_mode mode, rtx operands[]) { /* Broadcast to XMM/YMM/ZMM register from an integer constant or scalar mem. */ - /* Hard registers are used for 2 purposes: - 1. Prevent stack realignment when the original code - doesn't use vector registers, which is the same for - memcpy and memset. - 2. Prevent combine to convert constant broadcast to - load from constant pool. */ - op1 = ix86_gen_scratch_sse_rtx (mode); + op1 = gen_reg_rtx (mode); if (FLOAT_MODE_P (mode) || (!TARGET_64BIT && GET_MODE_INNER (mode) == DImode)) - { - first = force_const_mem (GET_MODE_INNER (mode), first); - op1 = gen_reg_rtx (mode); - } + first = force_const_mem (GET_MODE_INNER (mode), first); bool ok = ix86_expand_vector_init_duplicate (false, mode, op1, first); gcc_assert (ok); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 5bff131..ebec866 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -20542,6 +20542,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case UNSPEC: if (XINT (x, 1) == UNSPEC_TP) *total = 0; + else if (XINT(x, 1) == UNSPEC_VTERNLOG) + { + *total = cost->sse_op; + return true; + } return false; case VEC_SELECT: diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 11ac8d0..6511422 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1716,6 +1716,8 @@ typedef struct ix86_args { #define LEGITIMATE_PIC_OPERAND_P(X) legitimate_pic_operand_p (X) +#define STRIP_UNARY(X) (UNARY_P (X) ? XEXP (X, 0) : X) + #define SYMBOLIC_CONST(X) \ (GET_CODE (X) == SYMBOL_REF \ || GET_CODE (X) == LABEL_REF \ diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 9321f33..df5acb4 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1044,6 +1044,13 @@ (ior (match_test "op == const1_rtx") (match_test "op == constm1_rtx"))))) +;; True for registers, or (not: registers). Used to optimize 3-operand +;; bitwise operation. +(define_predicate "reg_or_notreg_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "not") + (match_test "register_operand (XEXP (op, 0), mode)")))) + ;; True if OP is acceptable as operand of DImode shift expander. (define_predicate "shiftdi_operand" (if_then_else (match_test "TARGET_64BIT") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 95f9582..03fc2df 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -933,7 +933,9 @@ ;; Mapping of vector modes to VPTERNLOG suffix (define_mode_attr ternlogsuffix [(V8DI "q") (V4DI "q") (V2DI "q") + (V8DF "q") (V4DF "q") (V2DF "q") (V16SI "d") (V8SI "d") (V4SI "d") + (V16SF "d") (V8SF "d") (V4SF "d") (V32HI "d") (V16HI "d") (V8HI "d") (V64QI "d") (V32QI "d") (V16QI "d")]) @@ -10032,7 +10034,7 @@ (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "0") (match_operand:VI48_AVX512VL 2 "register_operand" "v") - (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") + (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr") (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_VTERNLOG))] "TARGET_AVX512F" @@ -10041,13 +10043,245 @@ (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) +(define_insn "*<avx512>_vternlog<mode>_all" + [(set (match_operand:V 0 "register_operand" "=v") + (unspec:V + [(match_operand:V 1 "register_operand" "0") + (match_operand:V 2 "register_operand" "v") + (match_operand:V 3 "bcst_vector_operand" "vmBr") + (match_operand:SI 4 "const_0_to_255_operand")] + UNSPEC_VTERNLOG))] + "TARGET_AVX512F" + "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}" + [(set_attr "type" "sselog") + (set_attr "prefix" "evex") + (set_attr "mode" "<sseinsnmode>")]) + +;; There must be lots of other combinations like +;; +;; (any_logic:V +;; (any_logic:V op1 op2) +;; (any_logic:V op1 op3)) +;; +;; (any_logic:V +;; (any_logic:V +;; (any_logic:V op1, op2) +;; op3) +;; op1) +;; +;; and so on. + +(define_code_iterator any_logic1 [and ior xor]) +(define_code_iterator any_logic2 [and ior xor]) +(define_code_attr logic_op [(and "&") (ior "|") (xor "^")]) + +(define_insn_and_split "*<avx512>_vpternlog<mode>_1" + [(set (match_operand:V 0 "register_operand") + (any_logic:V + (any_logic1:V + (match_operand:V 1 "reg_or_notreg_operand") + (match_operand:V 2 "reg_or_notreg_operand")) + (any_logic2:V + (match_operand:V 3 "reg_or_notreg_operand") + (match_operand:V 4 "reg_or_notreg_operand"))))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + && ix86_pre_reload_split () + && (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[4])) + || rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[4])) + || rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[3])) + || rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[3])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:V + [(match_dup 6) + (match_dup 2) + (match_dup 1) + (match_dup 5)] + UNSPEC_VTERNLOG))] +{ + /* VPTERNLOGD reg6, reg2, reg1, imm8. */ + int reg6 = 0xF0; + int reg2 = 0xCC; + int reg1 = 0xAA; + int reg3 = 0; + int reg4 = 0; + int reg_mask, tmp1, tmp2; + if (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[4]))) + { + reg4 = reg1; + reg3 = reg6; + operands[6] = operands[3]; + } + else if (rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[4]))) + { + reg4 = reg2; + reg3 = reg6; + operands[6] = operands[3]; + } + else if (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[3]))) + { + reg4 = reg6; + reg3 = reg1; + operands[6] = operands[4]; + } + else + { + reg4 = reg6; + reg3 = reg2; + operands[6] = operands[4]; + } + + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; + reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4; + + tmp1 = reg1 <any_logic1:logic_op> reg2; + tmp2 = reg3 <any_logic2:logic_op> reg4; + reg_mask = tmp1 <any_logic:logic_op> tmp2; + reg_mask &= 0xFF; + + operands[1] = STRIP_UNARY (operands[1]); + operands[2] = STRIP_UNARY (operands[2]); + operands[6] = STRIP_UNARY (operands[6]); + operands[5] = GEN_INT (reg_mask); +}) + +(define_insn_and_split "*<avx512>_vpternlog<mode>_2" + [(set (match_operand:V 0 "register_operand") + (any_logic:V + (any_logic1:V + (any_logic2:V + (match_operand:V 1 "reg_or_notreg_operand") + (match_operand:V 2 "reg_or_notreg_operand")) + (match_operand:V 3 "reg_or_notreg_operand")) + (match_operand:V 4 "reg_or_notreg_operand")))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + && ix86_pre_reload_split () + && (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[4])) + || rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[4])) + || rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[3])) + || rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[3])))" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:V + [(match_dup 6) + (match_dup 2) + (match_dup 1) + (match_dup 5)] + UNSPEC_VTERNLOG))] +{ + /* VPTERNLOGD reg6, reg2, reg1, imm8. */ + int reg6 = 0xF0; + int reg2 = 0xCC; + int reg1 = 0xAA; + int reg3 = 0; + int reg4 = 0; + int reg_mask, tmp1, tmp2; + if (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[4]))) + { + reg4 = reg1; + reg3 = reg6; + operands[6] = operands[3]; + } + else if (rtx_equal_p (STRIP_UNARY (operands[2]), + STRIP_UNARY (operands[4]))) + { + reg4 = reg2; + reg3 = reg6; + operands[6] = operands[3]; + } + else if (rtx_equal_p (STRIP_UNARY (operands[1]), + STRIP_UNARY (operands[3]))) + { + reg4 = reg6; + reg3 = reg1; + operands[6] = operands[4]; + } + else + { + reg4 = reg6; + reg3 = reg2; + operands[6] = operands[4]; + } + + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; + reg4 = UNARY_P (operands[4]) ? ~reg4 : reg4; + + tmp1 = reg1 <any_logic2:logic_op> reg2; + tmp2 = tmp1 <any_logic1:logic_op> reg3; + reg_mask = tmp2 <any_logic:logic_op> reg4; + reg_mask &= 0xFF; + + operands[1] = STRIP_UNARY (operands[1]); + operands[2] = STRIP_UNARY (operands[2]); + operands[6] = STRIP_UNARY (operands[6]); + operands[5] = GEN_INT (reg_mask); +}) + +(define_insn_and_split "*<avx512>_vpternlog<mode>_3" + [(set (match_operand:V 0 "register_operand") + (any_logic:V + (any_logic1:V + (match_operand:V 1 "reg_or_notreg_operand") + (match_operand:V 2 "reg_or_notreg_operand")) + (match_operand:V 3 "reg_or_notreg_operand")))] + "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:V + [(match_dup 3) + (match_dup 2) + (match_dup 1) + (match_dup 4)] + UNSPEC_VTERNLOG))] +{ + /* VPTERNLOGD reg3, reg2, reg1, imm8. */ + int reg3 = 0xF0; + int reg2 = 0xCC; + int reg1 = 0xAA; + int reg_mask, tmp1; + + reg1 = UNARY_P (operands[1]) ? ~reg1 : reg1; + reg2 = UNARY_P (operands[2]) ? ~reg2 : reg2; + reg3 = UNARY_P (operands[3]) ? ~reg3 : reg3; + + tmp1 = reg1 <any_logic1:logic_op> reg2; + reg_mask = tmp1 <any_logic:logic_op> reg3; + reg_mask &= 0xFF; + + operands[1] = STRIP_UNARY (operands[1]); + operands[2] = STRIP_UNARY (operands[2]); + operands[3] = STRIP_UNARY (operands[3]); + operands[4] = GEN_INT (reg_mask); +}) + + (define_insn "<avx512>_vternlog<mode>_mask" [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") (vec_merge:VI48_AVX512VL (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "0") (match_operand:VI48_AVX512VL 2 "register_operand" "v") - (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm") + (match_operand:VI48_AVX512VL 3 "bcst_vector_operand" "vmBr") (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_VTERNLOG) (match_dup 1) diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def index 61f5b94..2a2c913 100644 --- a/gcc/config/rs6000/rs6000-builtin-new.def +++ b/gcc/config/rs6000/rs6000-builtin-new.def @@ -1961,3 +1961,480 @@ const vsll __builtin_vsx_xxspltd_2di (vsll, const int<1>); XXSPLTD_V2DI vsx_xxspltd_v2di {} + + +; Power7 builtins (ISA 2.06). +[power7] + const unsigned int __builtin_addg6s (unsigned int, unsigned int); + ADDG6S addg6s {} + + const signed long __builtin_bpermd (signed long, signed long); + BPERMD bpermd_di {} + + const unsigned int __builtin_cbcdtd (unsigned int); + CBCDTD cbcdtd {} + + const unsigned int __builtin_cdtbcd (unsigned int); + CDTBCD cdtbcd {} + + const signed int __builtin_divwe (signed int, signed int); + DIVWE dive_si {} + + const unsigned int __builtin_divweu (unsigned int, unsigned int); + DIVWEU diveu_si {} + + const vsq __builtin_pack_vector_int128 (unsigned long long, unsigned long long); + PACK_V1TI packv1ti {} + + void __builtin_ppc_speculation_barrier (); + SPECBARR speculation_barrier {} + + const unsigned long __builtin_unpack_vector_int128 (vsq, const int<1>); + UNPACK_V1TI unpackv1ti {} + + +; Power7 builtins requiring 64-bit GPRs (even with 32-bit addressing). +[power7-64] + const signed long long __builtin_divde (signed long long, signed long long); + DIVDE dive_di {} + + const unsigned long long __builtin_divdeu (unsigned long long, unsigned long long); + DIVDEU diveu_di {} + + +; Power8 vector built-ins. +[power8-vector] + const vsll __builtin_altivec_abs_v2di (vsll); + ABS_V2DI absv2di2 {} + + const vsc __builtin_altivec_bcddiv10_v16qi (vsc); + BCDDIV10_V16QI bcddiv10_v16qi {} + + const vsc __builtin_altivec_bcdmul10_v16qi (vsc); + BCDMUL10_V16QI bcdmul10_v16qi {} + + const vsc __builtin_altivec_eqv_v16qi (vsc, vsc); + EQV_V16QI eqvv16qi3 {} + + const vuc __builtin_altivec_eqv_v16qi_uns (vuc, vuc); + EQV_V16QI_UNS eqvv16qi3 {} + + const vsq __builtin_altivec_eqv_v1ti (vsq, vsq); + EQV_V1TI eqvv1ti3 {} + + const vuq __builtin_altivec_eqv_v1ti_uns (vuq, vuq); + EQV_V1TI_UNS eqvv1ti3 {} + + const vd __builtin_altivec_eqv_v2df (vd, vd); + EQV_V2DF eqvv2df3 {} + + const vsll __builtin_altivec_eqv_v2di (vsll, vsll); + EQV_V2DI eqvv2di3 {} + + const vull __builtin_altivec_eqv_v2di_uns (vull, vull); + EQV_V2DI_UNS eqvv2di3 {} + + const vf __builtin_altivec_eqv_v4sf (vf, vf); + EQV_V4SF eqvv4sf3 {} + + const vsi __builtin_altivec_eqv_v4si (vsi, vsi); + EQV_V4SI eqvv4si3 {} + + const vui __builtin_altivec_eqv_v4si_uns (vui, vui); + EQV_V4SI_UNS eqvv4si3 {} + + const vss __builtin_altivec_eqv_v8hi (vss, vss); + EQV_V8HI eqvv8hi3 {} + + const vus __builtin_altivec_eqv_v8hi_uns (vus, vus); + EQV_V8HI_UNS eqvv8hi3 {} + + const vsc __builtin_altivec_nand_v16qi (vsc, vsc); + NAND_V16QI nandv16qi3 {} + + const vuc __builtin_altivec_nand_v16qi_uns (vuc, vuc); + NAND_V16QI_UNS nandv16qi3 {} + + const vsq __builtin_altivec_nand_v1ti (vsq, vsq); + NAND_V1TI nandv1ti3 {} + + const vuq __builtin_altivec_nand_v1ti_uns (vuq, vuq); + NAND_V1TI_UNS nandv1ti3 {} + + const vd __builtin_altivec_nand_v2df (vd, vd); + NAND_V2DF nandv2df3 {} + + const vsll __builtin_altivec_nand_v2di (vsll, vsll); + NAND_V2DI nandv2di3 {} + + const vull __builtin_altivec_nand_v2di_uns (vull, vull); + NAND_V2DI_UNS nandv2di3 {} + + const vf __builtin_altivec_nand_v4sf (vf, vf); + NAND_V4SF nandv4sf3 {} + + const vsi __builtin_altivec_nand_v4si (vsi, vsi); + NAND_V4SI nandv4si3 {} + + const vui __builtin_altivec_nand_v4si_uns (vui, vui); + NAND_V4SI_UNS nandv4si3 {} + + const vss __builtin_altivec_nand_v8hi (vss, vss); + NAND_V8HI nandv8hi3 {} + + const vus __builtin_altivec_nand_v8hi_uns (vus, vus); + NAND_V8HI_UNS nandv8hi3 {} + + const vsc __builtin_altivec_neg_v16qi (vsc); + NEG_V16QI negv16qi2 {} + + const vd __builtin_altivec_neg_v2df (vd); + NEG_V2DF negv2df2 {} + + const vsll __builtin_altivec_neg_v2di (vsll); + NEG_V2DI negv2di2 {} + + const vf __builtin_altivec_neg_v4sf (vf); + NEG_V4SF negv4sf2 {} + + const vsi __builtin_altivec_neg_v4si (vsi); + NEG_V4SI negv4si2 {} + + const vss __builtin_altivec_neg_v8hi (vss); + NEG_V8HI negv8hi2 {} + + const vsc __builtin_altivec_orc_v16qi (vsc, vsc); + ORC_V16QI orcv16qi3 {} + + const vuc __builtin_altivec_orc_v16qi_uns (vuc, vuc); + ORC_V16QI_UNS orcv16qi3 {} + + const vsq __builtin_altivec_orc_v1ti (vsq, vsq); + ORC_V1TI orcv1ti3 {} + + const vuq __builtin_altivec_orc_v1ti_uns (vuq, vuq); + ORC_V1TI_UNS orcv1ti3 {} + + const vd __builtin_altivec_orc_v2df (vd, vd); + ORC_V2DF orcv2df3 {} + + const vsll __builtin_altivec_orc_v2di (vsll, vsll); + ORC_V2DI orcv2di3 {} + + const vull __builtin_altivec_orc_v2di_uns (vull, vull); + ORC_V2DI_UNS orcv2di3 {} + + const vf __builtin_altivec_orc_v4sf (vf, vf); + ORC_V4SF orcv4sf3 {} + + const vsi __builtin_altivec_orc_v4si (vsi, vsi); + ORC_V4SI orcv4si3 {} + + const vui __builtin_altivec_orc_v4si_uns (vui, vui); + ORC_V4SI_UNS orcv4si3 {} + + const vss __builtin_altivec_orc_v8hi (vss, vss); + ORC_V8HI orcv8hi3 {} + + const vus __builtin_altivec_orc_v8hi_uns (vus, vus); + ORC_V8HI_UNS orcv8hi3 {} + + const vsc __builtin_altivec_vclzb (vsc); + VCLZB clzv16qi2 {} + + const vsll __builtin_altivec_vclzd (vsll); + VCLZD clzv2di2 {} + + const vss __builtin_altivec_vclzh (vss); + VCLZH clzv8hi2 {} + + const vsi __builtin_altivec_vclzw (vsi); + VCLZW clzv4si2 {} + + const vuc __builtin_altivec_vgbbd (vuc); + VGBBD p8v_vgbbd {} + + const vsq __builtin_altivec_vaddcuq (vsq, vsq); + VADDCUQ altivec_vaddcuq {} + + const vsq __builtin_altivec_vaddecuq (vsq, vsq, vsq); + VADDECUQ altivec_vaddecuq {} + + const vsq __builtin_altivec_vaddeuqm (vsq, vsq, vsq); + VADDEUQM altivec_vaddeuqm {} + + const vsll __builtin_altivec_vaddudm (vsll, vsll); + VADDUDM addv2di3 {} + + const vsq __builtin_altivec_vadduqm (vsq, vsq); + VADDUQM altivec_vadduqm {} + + const vsll __builtin_altivec_vbpermq (vsc, vsc); + VBPERMQ altivec_vbpermq {} + + const vsc __builtin_altivec_vbpermq2 (vsc, vsc); + VBPERMQ2 altivec_vbpermq2 {} + + const vsll __builtin_altivec_vmaxsd (vsll, vsll); + VMAXSD smaxv2di3 {} + + const vull __builtin_altivec_vmaxud (vull, vull); + VMAXUD umaxv2di3 {} + + const vsll __builtin_altivec_vminsd (vsll, vsll); + VMINSD sminv2di3 {} + + const vull __builtin_altivec_vminud (vull, vull); + VMINUD uminv2di3 {} + + const vd __builtin_altivec_vmrgew_v2df (vd, vd); + VMRGEW_V2DF p8_vmrgew_v2df {} + + const vsll __builtin_altivec_vmrgew_v2di (vsll, vsll); + VMRGEW_V2DI p8_vmrgew_v2di {} + + const vf __builtin_altivec_vmrgew_v4sf (vf, vf); + VMRGEW_V4SF p8_vmrgew_v4sf {} + + const vsi __builtin_altivec_vmrgew_v4si (vsi, vsi); + VMRGEW_V4SI p8_vmrgew_v4si {} + + const vd __builtin_altivec_vmrgow_v2df (vd, vd); + VMRGOW_V2DF p8_vmrgow_v2df {} + + const vsll __builtin_altivec_vmrgow_v2di (vsll, vsll); + VMRGOW_V2DI p8_vmrgow_v2di {} + + const vf __builtin_altivec_vmrgow_v4sf (vf, vf); + VMRGOW_V4SF p8_vmrgow_v4sf {} + + const vsi __builtin_altivec_vmrgow_v4si (vsi, vsi); + VMRGOW_V4SI p8_vmrgow_v4si {} + + const vsc __builtin_altivec_vpermxor (vsc, vsc, vsc); + VPERMXOR altivec_vpermxor {} + + const vsi __builtin_altivec_vpksdss (vsll, vsll); + VPKSDSS altivec_vpksdss {} + + const vsi __builtin_altivec_vpksdus (vsll, vsll); + VPKSDUS altivec_vpksdus {} + + const vsi __builtin_altivec_vpkudum (vsll, vsll); + VPKUDUM altivec_vpkudum {} + + const vsi __builtin_altivec_vpkudus (vsll, vsll); + VPKUDUS altivec_vpkudus {} + + const vsc __builtin_altivec_vpmsumb (vsc, vsc); + VPMSUMB_A crypto_vpmsumb {} + + const vsll __builtin_altivec_vpmsumd (vsll, vsll); + VPMSUMD_A crypto_vpmsumd {} + + const vss __builtin_altivec_vpmsumh (vss, vss); + VPMSUMH_A crypto_vpmsumh {} + + const vsi __builtin_altivec_vpmsumw (vsi, vsi); + VPMSUMW_A crypto_vpmsumw {} + + const vsc __builtin_altivec_vpopcntb (vsc); + VPOPCNTB popcountv16qi2 {} + + const vsll __builtin_altivec_vpopcntd (vsll); + VPOPCNTD popcountv2di2 {} + + const vss __builtin_altivec_vpopcnth (vss); + VPOPCNTH popcountv8hi2 {} + + const vsc __builtin_altivec_vpopcntub (vsc); + VPOPCNTUB popcountv16qi2 {} + + const vsll __builtin_altivec_vpopcntud (vsll); + VPOPCNTUD popcountv2di2 {} + + const vss __builtin_altivec_vpopcntuh (vss); + VPOPCNTUH popcountv8hi2 {} + + const vsi __builtin_altivec_vpopcntuw (vsi); + VPOPCNTUW popcountv4si2 {} + + const vsi __builtin_altivec_vpopcntw (vsi); + VPOPCNTW popcountv4si2 {} + + const vsll __builtin_altivec_vrld (vsll, vsll); + VRLD vrotlv2di3 {} + + const vsll __builtin_altivec_vsld (vsll, vsll); + VSLD vashlv2di3 {} + + const vsll __builtin_altivec_vsrad (vsll, vsll); + VSRAD vashrv2di3 {} + + const vsll __builtin_altivec_vsrd (vsll, vull); + VSRD vlshrv2di3 {} + + const vsq __builtin_altivec_vsubcuq (vsq, vsq); + VSUBCUQ altivec_vsubcuq {} + + const vsq __builtin_altivec_vsubecuq (vsq, vsq, vsq); + VSUBECUQ altivec_vsubecuq {} + + const vsq __builtin_altivec_vsubeuqm (vsq, vsq, vsq); + VSUBEUQM altivec_vsubeuqm {} + + const vsll __builtin_altivec_vsubudm (vsll, vsll); + VSUBUDM subv2di3 {} + + const vsq __builtin_altivec_vsubuqm (vsq, vsq); + VSUBUQM altivec_vsubuqm {} + + const vsll __builtin_altivec_vupkhsw (vsi); + VUPKHSW altivec_vupkhsw {} + + const vsll __builtin_altivec_vupklsw (vsi); + VUPKLSW altivec_vupklsw {} + + const vsq __builtin_bcdadd_v1ti (vsq, vsq, const int<1>); + BCDADD_V1TI bcdadd_v1ti {} + + const vsc __builtin_bcdadd_v16qi (vsc, vsc, const int<1>); + BCDADD_V16QI bcdadd_v16qi {} + + const signed int __builtin_bcdadd_eq_v1ti (vsq, vsq, const int<1>); + BCDADD_EQ_V1TI bcdadd_eq_v1ti {} + + const signed int __builtin_bcdadd_eq_v16qi (vsc, vsc, const int<1>); + BCDADD_EQ_V16QI bcdadd_eq_v16qi {} + + const signed int __builtin_bcdadd_gt_v1ti (vsq, vsq, const int<1>); + BCDADD_GT_V1TI bcdadd_gt_v1ti {} + + const signed int __builtin_bcdadd_gt_v16qi (vsc, vsc, const int<1>); + BCDADD_GT_V16QI bcdadd_gt_v16qi {} + + const signed int __builtin_bcdadd_lt_v1ti (vsq, vsq, const int<1>); + BCDADD_LT_V1TI bcdadd_lt_v1ti {} + + const signed int __builtin_bcdadd_lt_v16qi (vsc, vsc, const int<1>); + BCDADD_LT_V16QI bcdadd_lt_v16qi {} + + const signed int __builtin_bcdadd_ov_v1ti (vsq, vsq, const int<1>); + BCDADD_OV_V1TI bcdadd_unordered_v1ti {} + + const signed int __builtin_bcdadd_ov_v16qi (vsc, vsc, const int<1>); + BCDADD_OV_V16QI bcdadd_unordered_v16qi {} + + const signed int __builtin_bcdinvalid_v1ti (vsq); + BCDINVALID_V1TI bcdinvalid_v1ti {} + + const signed int __builtin_bcdinvalid_v16qi (vsc); + BCDINVALID_V16QI bcdinvalid_v16qi {} + + const vsq __builtin_bcdsub_v1ti (vsq, vsq, const int<1>); + BCDSUB_V1TI bcdsub_v1ti {} + + const vsc __builtin_bcdsub_v16qi (vsc, vsc, const int<1>); + BCDSUB_V16QI bcdsub_v16qi {} + + const signed int __builtin_bcdsub_eq_v1ti (vsq, vsq, const int<1>); + BCDSUB_EQ_V1TI bcdsub_eq_v1ti {} + + const signed int __builtin_bcdsub_eq_v16qi (vsc, vsc, const int<1>); + BCDSUB_EQ_V16QI bcdsub_eq_v16qi {} + + const signed int __builtin_bcdsub_ge_v1ti (vsq, vsq, const int<1>); + BCDSUB_GE_V1TI bcdsub_ge_v1ti {} + + const signed int __builtin_bcdsub_ge_v16qi (vsc, vsc, const int<1>); + BCDSUB_GE_V16QI bcdsub_ge_v16qi {} + + const signed int __builtin_bcdsub_gt_v1ti (vsq, vsq, const int<1>); + BCDSUB_GT_V1TI bcdsub_gt_v1ti {} + + const signed int __builtin_bcdsub_gt_v16qi (vsc, vsc, const int<1>); + BCDSUB_GT_V16QI bcdsub_gt_v16qi {} + + const signed int __builtin_bcdsub_le_v1ti (vsq, vsq, const int<1>); + BCDSUB_LE_V1TI bcdsub_le_v1ti {} + + const signed int __builtin_bcdsub_le_v16qi (vsc, vsc, const int<1>); + BCDSUB_LE_V16QI bcdsub_le_v16qi {} + + const signed int __builtin_bcdsub_lt_v1ti (vsq, vsq, const int<1>); + BCDSUB_LT_V1TI bcdsub_lt_v1ti {} + + const signed int __builtin_bcdsub_lt_v16qi (vsc, vsc, const int<1>); + BCDSUB_LT_V16QI bcdsub_lt_v16qi {} + + const signed int __builtin_bcdsub_ov_v1ti (vsq, vsq, const int<1>); + BCDSUB_OV_V1TI bcdsub_unordered_v1ti {} + + const signed int __builtin_bcdsub_ov_v16qi (vsc, vsc, const int<1>); + BCDSUB_OV_V16QI bcdsub_unordered_v16qi {} + + const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc); + VPERMXOR_V16QI crypto_vpermxor_v16qi {} + + const vull __builtin_crypto_vpermxor_v2di (vull, vull, vull); + VPERMXOR_V2DI crypto_vpermxor_v2di {} + + const vui __builtin_crypto_vpermxor_v4si (vui, vui, vui); + VPERMXOR_V4SI crypto_vpermxor_v4si {} + + const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus); + VPERMXOR_V8HI crypto_vpermxor_v8hi {} + + const vuc __builtin_crypto_vpmsumb (vuc, vuc); + VPMSUMB crypto_vpmsumb {} + + const vull __builtin_crypto_vpmsumd (vull, vull); + VPMSUMD crypto_vpmsumd {} + + const vus __builtin_crypto_vpmsumh (vus, vus); + VPMSUMH crypto_vpmsumh {} + + const vui __builtin_crypto_vpmsumw (vui, vui); + VPMSUMW crypto_vpmsumw {} + + const vf __builtin_vsx_float2_v2df (vd, vd); + FLOAT2_V2DF float2_v2df {} + + const vf __builtin_vsx_float2_v2di (vsll, vsll); + FLOAT2_V2DI float2_v2di {} + + const vsc __builtin_vsx_revb_v16qi (vsc); + REVB_V16QI revb_v16qi {} + + const vsq __builtin_vsx_revb_v1ti (vsq); + REVB_V1TI revb_v1ti {} + + const vd __builtin_vsx_revb_v2df (vd); + REVB_V2DF revb_v2df {} + + const vsll __builtin_vsx_revb_v2di (vsll); + REVB_V2DI revb_v2di {} + + const vf __builtin_vsx_revb_v4sf (vf); + REVB_V4SF revb_v4sf {} + + const vsi __builtin_vsx_revb_v4si (vsi); + REVB_V4SI revb_v4si {} + + const vss __builtin_vsx_revb_v8hi (vss); + REVB_V8HI revb_v8hi {} + + const vf __builtin_vsx_uns_float2_v2di (vsll, vsll); + UNS_FLOAT2_V2DI uns_float2_v2di {} + + const vsi __builtin_vsx_vsigned2_v2df (vd, vd); + VEC_VSIGNED2_V2DF vsigned2_v2df {} + + const vsi __builtin_vsx_vunsigned2_v2df (vd, vd); + VEC_VUNSIGNED2_V2DF vunsigned2_v2df {} + + const vf __builtin_vsx_xscvdpspn (double); + XSCVDPSPN vsx_xscvdpspn {} + + const double __builtin_vsx_xscvspdpn (vf); + XSCVSPDPN vsx_xscvspdpn {} diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a295ff5..164f586 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -771,6 +771,7 @@ Objective-C and Objective-C++ Dialects}. -mverbose-cost-dump @gol -mpure-code @gol -mcmse @gol +-mfix-cmse-cve-2021-35465 @gol -mfdpic} @emph{AVR Options} @@ -14349,9 +14350,10 @@ The parameter only has an effect on targets that support partial vector loads and stores. @item vect-inner-loop-cost-factor -The factor which the loop vectorizer applies to the cost of statements -in an inner loop relative to the loop being vectorized. The default -value is 50. +The maximum factor which the loop vectorizer applies to the cost of statements +in an inner loop relative to the loop being vectorized. The factor applied +is the maximum of the estimated number of iterations of the inner loop and +this parameter. The default value of this parameter is 50. @item avoid-fma-max-bits Maximum number of bits for which we avoid creating FMAs. @@ -20701,6 +20703,14 @@ Generate secure code as per the "ARMv8-M Security Extensions: Requirements on Development Tools Engineering Specification", which can be found on @url{https://developer.arm.com/documentation/ecm0359818/latest/}. +@item -mfix-cmse-cve-2021-35465 +@opindex mfix-cmse-cve-2021-35465 +Mitigate against a potential security issue with the @code{VLLDM} instruction +in some M-profile devices when using CMSE (CVE-2021-365465). This option is +enabled by default when the option @option{-mcpu=} is used with +@code{cortex-m33}, @code{cortex-m35p} or @code{cortex-m55}. The option +@option{-mno-fix-cmse-cve-2021-35465} can be used to disable the mitigation. + @item -mfdpic @itemx -mno-fdpic @opindex mfdpic diff --git a/gcc/params.opt b/gcc/params.opt index f926488..f414dc1 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -1114,7 +1114,7 @@ Common Joined UInteger Var(param_vect_partial_vector_usage) Init(2) IntegerRange Controls how loop vectorizer uses partial vectors. 0 means never, 1 means only for loops whose need to iterate can be removed, 2 means for all loops. The default value is 2. -param=vect-inner-loop-cost-factor= -Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 999999) Param Optimization -The factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized. +Common Joined UInteger Var(param_vect_inner_loop_cost_factor) Init(50) IntegerRange(1, 10000) Param Optimization +The maximum factor which the loop vectorizer applies to the cost of statements in an inner loop relative to the loop being vectorized. ; This comment is to ensure we retain the blank line above. diff --git a/gcc/testsuite/gcc.dg/predict-1.c b/gcc/testsuite/gcc.dg/predict-1.c index 9e5605a..d2e753e 100644 --- a/gcc/testsuite/gcc.dg/predict-1.c +++ b/gcc/testsuite/gcc.dg/predict-1.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-profile_estimate" } */ +/* { dg-options "-O2 -fdump-tree-profile_estimate --disable-tree-evrp" } */ extern int global; diff --git a/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c b/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c new file mode 100644 index 0000000..8ee8e3c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/evrp-trans.c @@ -0,0 +1,144 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-evrp" } */ + +/* Simple tests to make sure transitives are working. */ +void keep(); +void kill(); + +void +f1 (int x, int y, int z) +{ + if (x > y) + if (y > z) + { + if (x > z) + keep (); + else + kill (); + } +} + +void +f2 (int w, int x, int y, int z) +{ + // Test one equivalence. + if (w == z) + if (x > y) + if (y > z) + { + if (x > w) + keep (); + else + kill (); + } +} + +void +f3 (int a, int w, int x, int y, int z) +{ + // Test two equivlaences. + if (a == x) + if (w == z) + if (x > y) + if (y > z) + { + if (a > w) + keep (); + else + kill (); + } +} + +void +f4 (int x, int y, int z) +{ + // test X > Y >= Z + if (x > y) + if (y >= z) + { + if (x > z) + keep (); + else + kill (); + } +} +void +f5 (int x, int y, int z) +{ + // test X >= Y > Z + if (x >= y) + if (y > z) + { + if (x > z) + keep (); + else + kill (); + } +} + +void +f6 (int x, int y, int z) +{ + // test X >= Y >= Z + if (x >= y) + if (y >= z) + { + if (x > z) + keep (); + else if (x == z) + keep (); + else + kill (); + } +} + +void +f7 (int x, int y, int z) +{ + // test Y <= X , Z <= Y + if (y <= x) + if (z <= y) + { + if (x > z) + keep (); + else if (x == z) + keep (); + else + kill (); + } +} + +void +f8 (int x, int y, int z) +{ + // test X >= Y, Z <= Y + if (x >= y) + if (z <= y) + { + if (x > z) + keep (); + else if (x == z) + keep (); + else + kill (); + } +} + +void +f9 (int x, int y, int z) +{ + // test Y <= X Y >= Z + if (y <= x) + if (y >= z) + { + if (x > z) + keep (); + else if (x == z) + keep (); + else + kill (); + } +} + +/* { dg-final { scan-tree-dump-not "kill" "evrp" } } */ +/* { dg-final { scan-tree-dump-times "keep" 13 "evrp"} } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c new file mode 100644 index 0000000..553cc78 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13a.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */ + +#include "../../../cmse-13.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r1, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[1,4-9\]|r10|fp|ip), ){9}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[1,4-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler-not "vmov" } } */ +/* { dg-final { scan-assembler-not "vmsr" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c new file mode 100644 index 0000000..ce02fde --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7a.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */ + +#include "../../../cmse-7.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler-not "vmov" } } */ +/* { dg-final { scan-assembler-not "vmsr" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c new file mode 100644 index 0000000..75e1611 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=soft -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=soft" } } */ + +#include "../../../cmse-8.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler-not "vmov" } } */ +/* { dg-final { scan-assembler-not "vmsr" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c new file mode 100644 index 0000000..dad7266 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16 -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ +/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ + +#include "../../../cmse-7.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c new file mode 100644 index 0000000..faa0448 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16 -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ +/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */ + +#include "../../../cmse-8.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c new file mode 100644 index 0000000..bceba44 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ +/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ + +#include "../../../cmse-13.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[1,4-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r1, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[1,4-9\]|r10|fp|ip), ){9}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[1,4-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c new file mode 100644 index 0000000..c74ebbd --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7a.c @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ +/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ + +#include "../../../cmse-7.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[0-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r0, )?(r1, )?(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[0-9\]|r10|fp|ip), ){12}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[0-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c new file mode 100644 index 0000000..ffb67a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8a.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16 -mfix-cmse-cve-2021-35465" } */ +/* { dg-skip-if "Incompatible float ABI" { *-*-* } { "-mfloat-abi=*" } { "-mfloat-abi=softfp" } } */ +/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */ + +#include "../../../cmse-8.x" + +/* Checks for saving and clearing prior to function call. */ +/* Shift on the same register as blxns. */ +/* { dg-final { scan-assembler "lsrs\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler "lsls\t(r\[2-9\]|r10|fp|ip), \\1, #1.*blxns\t\\1" } } */ +/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ +/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ +/* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ +/* Check the right registers are cleared and none appears twice. */ +/* { dg-final { scan-assembler "clrm\t\{(r2, )?(r3, )?(r4, )?(r5, )?(r6, )?(r7, )?(r8, )?(r9, )?(r10, )?(fp, )?(ip, )?APSR\}" } } */ +/* Check that the right number of registers is cleared and thus only one + register is missing. */ +/* { dg-final { scan-assembler "clrm\t\{((r\[2-9\]|r10|fp|ip), ){10}APSR\}" } } */ +/* Check that no cleared register is used for blxns. */ +/* { dg-final { scan-assembler-not "clrm\t\{\[^\}\]\+(r\[2-9\]|r10|fp|ip),\[^\}\]\+\}.*blxns\t\\1" } } */ +/* Check for v8.1-m variant of erratum work-around. */ +/* { dg-final { scan-assembler "vscclrm\t\{vpr\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ +/* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ + +/* Now we check that we use the correct intrinsic to call. */ +/* { dg-final { scan-assembler "blxns" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c index 78bf5d3..fbc3de0 100644 --- a/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512bw-shiftqihi-constant-1.c @@ -1,7 +1,8 @@ /* PR target/95524 */ /* { dg-do compile } */ /* { dg-options "-O2 -mavx512bw" } */ -/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 3 } } */ +/* { dg-final { scan-assembler-times "vpand\[^\n\]*%zmm" 2 } } */ +/* { dg-final { scan-assembler-times "vpternlogd\[^\n\]*%zmm" 1 } } */ typedef char v64qi __attribute__ ((vector_size (64))); typedef unsigned char v64uqi __attribute__ ((vector_size (64))); @@ -11,7 +12,6 @@ foo_ashiftrt_512 (v64qi a) return a >> 2; } /* { dg-final { scan-assembler-times "vpsraw\[^\n\]*%zmm" 1 } } */ -/* { dg-final { scan-assembler-times "vpxor\[^\n\]*%zmm" 1 } } */ /* { dg-final { scan-assembler-times "vpsubb\[^\n\]*%zmm" 1 } } */ __attribute__((noipa)) v64qi diff --git a/gcc/testsuite/gcc.target/i386/pr100865-10b.c b/gcc/testsuite/gcc.target/i386/pr100865-10b.c index 77ace86..e5616d8 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-10b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-10b.c @@ -5,4 +5,3 @@ /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 8 } } */ -/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-4b.c b/gcc/testsuite/gcc.target/i386/pr100865-4b.c index 80e9fdb..6d9cb91 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-4b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-4b.c @@ -5,7 +5,6 @@ /* { dg-final { scan-assembler-times "vpbroadcastb\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqu8\[\\t \]%ymm\[0-9\]+, " 2 } } */ -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcastb\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-6b.c b/gcc/testsuite/gcc.target/i386/pr100865-6b.c index 35f2e96..9588249 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-6b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-6b.c @@ -4,9 +4,7 @@ #include "pr100865-6a.c" /* { dg-final { scan-assembler-times "vpbroadcastd\[\\t \]+%(?:r|e)\[^\n\]*, %ymm\[0-9\]+" 1 } } */ -/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vmovdqu32\[\\t \]%ymm\[0-9\]+, " 8 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 8 } } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ /* { dg-final { scan-assembler-not "vpbroadcastd\[\\t \]+%xmm\[0-9\]+, %ymm\[0-9\]+" } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr100865-7b.c b/gcc/testsuite/gcc.target/i386/pr100865-7b.c index ad267c4..3b20c68 100644 --- a/gcc/testsuite/gcc.target/i386/pr100865-7b.c +++ b/gcc/testsuite/gcc.target/i386/pr100865-7b.c @@ -5,8 +5,6 @@ /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ /* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 { target ia32 } } } */ -/* { dg-final { scan-assembler-times "vmovdqu64\[\\t \]%ymm\[0-9\]+, " 16 { target { ! ia32 } } } } */ -/* { dg-final { scan-assembler-times "vzeroupper" 1 { target ia32 } } } */ -/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vmovdqu\[\\t \]%ymm\[0-9\]+, " 16 } } */ +/* { dg-final { scan-assembler-times "vzeroupper" 1 } } */ /* { dg-final { scan-assembler-not "vmovdqa" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr101989-1.c b/gcc/testsuite/gcc.target/i386/pr101989-1.c new file mode 100644 index 0000000..594093e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101989-1.c @@ -0,0 +1,51 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "vpternlog" 6 } } */ +/* { dg-final { scan-assembler-not "vpxor" } } */ +/* { dg-final { scan-assembler-not "vpor" } } */ +/* { dg-final { scan-assembler-not "vpand" } } */ + +#include<immintrin.h> +__m256d +__attribute__((noipa, target("avx512vl"))) +copysign2_pd(__m256d from, __m256d to) { + __m256i a = _mm256_castpd_si256(from); + __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63)); + /* (avx_signbit & from) | (~avx_signbit & to) */ + return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to)); +} + +__m256i +__attribute__((noipa, target("avx512vl"))) +foo (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & ~src1) | (src3 & src1); +} + +__m256i +__attribute__ ((noipa, target("avx512vl"))) +foo1 (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & src1) | (src3 & ~src1); +} + +__m256i +__attribute__ ((noipa, target("avx512vl"))) +foo2 (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & src1) | (~src3 & src1); +} + +__m256i +__attribute__ ((noipa, target("avx512vl"))) +foo3 (__m256i src1, __m256i src2, __m256i src3) +{ + return (~src2 & src1) | (src3 & src1); +} + +__m256i +__attribute__ ((noipa, target("avx512vl"))) +foo4 (__m256i src1, __m256i src2, __m256i src3) +{ + return src3 & src2 ^ src1; +} diff --git a/gcc/testsuite/gcc.target/i386/pr101989-2.c b/gcc/testsuite/gcc.target/i386/pr101989-2.c new file mode 100644 index 0000000..9d9759a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101989-2.c @@ -0,0 +1,102 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx2 -mno-avx512f" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512VL + +#include "avx512f-helper.h" + +#include "pr101989-1.c" +__m256d +avx2_copysign2_pd (__m256d from, __m256d to) { + __m256i a = _mm256_castpd_si256(from); + __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63)); + /* (avx_signbit & from) | (~avx_signbit & to) */ + return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to)); +} + +__m256i +avx2_foo (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & ~src1) | (src3 & src1); +} + +__m256i +avx2_foo1 (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & src1) | (src3 & ~src1); +} + +__m256i +avx2_foo2 (__m256i src1, __m256i src2, __m256i src3) +{ + return (src2 & src1) | (~src3 & src1); +} + +__m256i +avx2_foo3 (__m256i src1, __m256i src2, __m256i src3) +{ + return (~src2 & src1) | (src3 & src1); +} + +__m256i +avx2_foo4 (__m256i src1, __m256i src2, __m256i src3) +{ + return src3 & src2 ^ src1; +} + + +void +test_256 (void) +{ + union256i_q q1, q2, q3, res2, exp2; + union256d d1, d2, res1, exp1; + int i, sign = 1; + + for (i = 0; i < 4; i++) + { + d1.a[i] = 12.34 * (i + 2000) * sign; + d2.a[i] = 56.78 * (i - 30) * sign; + q1.a[i] = 12 * (i + 2000) * sign; + q2.a[i] = 56 * (i - 30) * sign; + q3.a[i] = 90 * (i + 40) * sign; + res1.a[i] = DEFAULT_VALUE; + exp1.a[i] = DEFAULT_VALUE; + res2.a[i] = exp2.a[i] = -1; + sign = -sign; + } + + exp1.x = avx2_copysign2_pd (d1.x, d2.x); + res1.x = copysign2_pd (d1.x, d2.x); + if (UNION_CHECK (256, d) (res1, exp1.a)) + abort (); + + exp2.x = avx2_foo1 (q1.x, q2.x, q3.x); + res2.x = foo1 (q1.x, q2.x, q3.x); + if (UNION_CHECK (256, i_q) (res2, exp2.a)) + abort (); + + exp2.x = avx2_foo2 (q1.x, q2.x, q3.x); + res2.x = foo2 (q1.x, q2.x, q3.x); + if (UNION_CHECK (256, i_q) (res2, exp2.a)) + abort (); + + exp2.x = avx2_foo3 (q1.x, q2.x, q3.x); + res2.x = foo3 (q1.x, q2.x, q3.x); + if (UNION_CHECK (256, i_q) (res2, exp2.a)) + abort (); + + exp2.x = avx2_foo4 (q1.x, q2.x, q3.x); + res2.x = foo4 (q1.x, q2.x, q3.x); + if (UNION_CHECK (256, i_q) (res2, exp2.a)) + abort (); + + exp2.x = avx2_foo (q1.x, q2.x, q3.x); + res2.x = foo (q1.x, q2.x, q3.x); + if (UNION_CHECK (256, i_q) (res2, exp2.a)) + abort (); +} + +static void +test_128 () +{} diff --git a/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c b/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c new file mode 100644 index 0000000..d03d192 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr101989-broadcast-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpternlog" 4 } } */ +/* { dg-final { scan-assembler-times "\\\{1to4\\\}" 4 } } */ +#include<immintrin.h> +extern long long C; +__m256d +copysign2_pd(__m256d from, __m256d to) { + __m256i a = _mm256_castpd_si256(from); + __m256d avx_signbit = _mm256_castsi256_pd(_mm256_slli_epi64(_mm256_cmpeq_epi64(a, a), 63)); + /* (avx_signbit & from) | (~avx_signbit & to) */ + return _mm256_or_pd(_mm256_and_pd(avx_signbit, from), _mm256_andnot_pd(avx_signbit, to)); +} + +__m256i +mask_pternlog (__m256i A, __m256i B, __mmask8 U) +{ + return _mm256_mask_ternarylogic_epi64 (A, U, B, _mm256_set1_epi64x (C) ,202); +} + +__m256i +maskz_pternlog (__m256i A, __m256i B, __mmask8 U) +{ + return _mm256_maskz_ternarylogic_epi64 (U, A, B, _mm256_set1_epi64x (C) ,202); +} + +__m256i +none_pternlog (__m256i A, __m256i B) +{ + return _mm256_ternarylogic_epi64 (A, B, _mm256_set1_epi64x (C) ,202); +} diff --git a/gcc/testsuite/gcc.target/i386/pr102021.c b/gcc/testsuite/gcc.target/i386/pr102021.c new file mode 100644 index 0000000..6db3f57 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr102021.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=skylake-avx512" } */ + +#include<immintrin.h> + +__m256i +foo () +{ + return _mm256_set1_epi16 (12); +} + +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+%r\[^\n\]*, %ymm\[0-9\]+" 1 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times "vpbroadcastq\[\\t \]+\[^\n\]*, %ymm\[0-9\]+" 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-not "vmovdqa" } } */ +/* { dg-final { scan-assembler-not "vzeroupper" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 66ce48d..06f5b1e 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -4878,15 +4878,16 @@ proc check_effective_target_arm_cmse_ok {} { proc check_effective_target_arm_cmse_hw { } { return [check_runtime arm_cmse_hw_available { - int __attribute__ ((cmse_nonsecure_entry)) ns_func(void) - { - return 0; - } int main (void) { - return ns_func(); - } - } "-mcmse -Wl,--section-start,.gnu.sgstubs=0x00400000"] + unsigned id_pfr1; + asm ("ldr\t%0, =0xe000ed44\n" \ + "ldr\t%0, [%0]\n" \ + "sg" : "=l" (id_pfr1)); + /* Exit with code 0 iff security extension is available. */ + return !(id_pfr1 & 0xf0); + } + } "-mcmse"] } # Return 1 if the target supports executing MVE instructions, 0 # otherwise. diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index c521b43a..0c8d992 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1519,6 +1519,13 @@ vect_analyze_loop_form (class loop *loop, vec_info_shared *shared) stmt_vec_info inner_loop_cond_info = loop_vinfo->lookup_stmt (inner_loop_cond); STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type; + /* If we have an estimate on the number of iterations of the inner + loop use that to limit the scale for costing, otherwise use + --param vect-inner-loop-cost-factor literally. */ + widest_int nit; + if (estimated_stmt_executions (loop->inner, &nit)) + LOOP_VINFO_INNER_LOOP_COST_FACTOR (loop_vinfo) + = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi (); } gcc_assert (!loop->aux); diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index d2f6a16..edc11c6 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -5233,7 +5233,8 @@ li_cost_vec_cmp (const void *a_, const void *b_) static bool vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, - vec<slp_instance> slp_instances) + vec<slp_instance> slp_instances, + loop_p orig_loop) { slp_instance instance; int i; @@ -5270,6 +5271,30 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, vector_costs.safe_splice (instance->cost_vec); instance->cost_vec.release (); } + /* When we're vectorizing an if-converted loop body with the + very-cheap cost model make sure we vectorized all if-converted + code. */ + bool force_not_profitable = false; + if (orig_loop && flag_vect_cost_model == VECT_COST_MODEL_VERY_CHEAP) + { + gcc_assert (bb_vinfo->bbs.length () == 1); + for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + /* The costing above left us with DCEable vectorized scalar + stmts having the visited flag set. */ + if (gimple_visited_p (gsi_stmt (gsi))) + continue; + + if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi))) + if (gimple_assign_rhs_code (ass) == COND_EXPR) + { + force_not_profitable = true; + break; + } + } + } + /* Unset visited flag. */ stmt_info_for_cost *cost; FOR_EACH_VEC_ELT (scalar_costs, i, cost) @@ -5394,9 +5419,14 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo, return false; } + if (dump_enabled_p () && force_not_profitable) + dump_printf_loc (MSG_NOTE, vect_location, + "not profitable because of unprofitable if-converted " + "scalar code\n"); + scalar_costs.release (); vector_costs.release (); - return true; + return !force_not_profitable; } /* qsort comparator for lane defs. */ @@ -5810,7 +5840,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal, static bool vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, - vec<int> *dataref_groups, unsigned int n_stmts) + vec<int> *dataref_groups, unsigned int n_stmts, + loop_p orig_loop) { bb_vec_info bb_vinfo; auto_vector_modes vector_modes; @@ -5859,7 +5890,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, vect_location = instance->location (); if (!unlimited_cost_model (NULL) && !vect_bb_vectorization_profitable_p - (bb_vinfo, instance->subgraph_entries)) + (bb_vinfo, + orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo) + : instance->subgraph_entries, orig_loop)) { if (dump_enabled_p ()) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, @@ -5877,7 +5910,9 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, "using SLP\n"); vectorized = true; - vect_schedule_slp (bb_vinfo, instance->subgraph_entries); + vect_schedule_slp (bb_vinfo, + orig_loop ? BB_VINFO_SLP_INSTANCES (bb_vinfo) + : instance->subgraph_entries); unsigned HOST_WIDE_INT bytes; if (dump_enabled_p ()) @@ -5892,6 +5927,11 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, "basic block part vectorized using " "variable length vectors\n"); } + + /* When we're called from loop vectorization we're considering + all subgraphs at once. */ + if (orig_loop) + break; } } else @@ -5959,7 +5999,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs, true if anything in the basic-block was vectorized. */ static bool -vect_slp_bbs (const vec<basic_block> &bbs) +vect_slp_bbs (const vec<basic_block> &bbs, loop_p orig_loop) { vec<data_reference_p> datarefs = vNULL; auto_vec<int> dataref_groups; @@ -5989,18 +6029,20 @@ vect_slp_bbs (const vec<basic_block> &bbs) ++current_group; } - return vect_slp_region (bbs, datarefs, &dataref_groups, insns); + return vect_slp_region (bbs, datarefs, &dataref_groups, insns, orig_loop); } -/* Main entry for the BB vectorizer. Analyze and transform BB, returns - true if anything in the basic-block was vectorized. */ +/* Special entry for the BB vectorizer. Analyze and transform a single + if-converted BB with ORIG_LOOPs body being the not if-converted + representation. Returns true if anything in the basic-block was + vectorized. */ bool -vect_slp_bb (basic_block bb) +vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop) { auto_vec<basic_block> bbs; bbs.safe_push (bb); - return vect_slp_bbs (bbs); + return vect_slp_bbs (bbs, orig_loop); } /* Main entry for the BB vectorizer. Analyze and transform BB, returns @@ -6051,7 +6093,7 @@ vect_slp_function (function *fun) if (split && !bbs.is_empty ()) { - r |= vect_slp_bbs (bbs); + r |= vect_slp_bbs (bbs, NULL); bbs.truncate (0); bbs.quick_push (bb); } @@ -6069,13 +6111,13 @@ vect_slp_function (function *fun) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "splitting region at control altering " "definition %G", last); - r |= vect_slp_bbs (bbs); + r |= vect_slp_bbs (bbs, NULL); bbs.truncate (0); } } if (!bbs.is_empty ()) - r |= vect_slp_bbs (bbs); + r |= vect_slp_bbs (bbs, NULL); free (rpo); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 813f468..3aa3e2a 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -1033,10 +1033,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, only non-if-converted parts took part in BB vectorization. */ if (flag_tree_slp_vectorize != 0 && loop_vectorized_call - && ! loop->inner - /* This would purely be a workaround and should be removed - once PR100089 is fixed. */ - && flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP) + && ! loop->inner) { basic_block bb = loop->header; bool require_loop_vectorize = false; @@ -1062,12 +1059,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, gimple_set_uid (stmt, -1); gimple_set_visited (stmt, false); } - if (!require_loop_vectorize && vect_slp_bb (bb)) + if (!require_loop_vectorize) { - fold_loop_internal_call (loop_vectorized_call, - boolean_true_node); - loop_vectorized_call = NULL; - ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; + tree arg = gimple_call_arg (loop_vectorized_call, 1); + class loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); + if (vect_slp_if_converted_bb (bb, scalar_loop)) + { + fold_loop_internal_call (loop_vectorized_call, + boolean_true_node); + loop_vectorized_call = NULL; + ret |= TODO_cleanup_cfg | TODO_update_ssa_only_virtuals; + } } } /* If outer loop vectorization fails for LOOP_VECTORIZED guarded diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9c2c29d..72e018e 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2087,7 +2087,7 @@ extern void vect_gather_slp_loads (vec_info *); extern void vect_get_slp_defs (slp_tree, vec<tree> *); extern void vect_get_slp_defs (vec_info *, slp_tree, vec<vec<tree> > *, unsigned n = -1U); -extern bool vect_slp_bb (basic_block); +extern bool vect_slp_if_converted_bb (basic_block bb, loop_p orig_loop); extern bool vect_slp_function (function *); extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); extern stmt_vec_info vect_find_first_scalar_stmt_in_slp (slp_tree); diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc index bcfe388..8edd98b 100644 --- a/gcc/value-relation.cc +++ b/gcc/value-relation.cc @@ -112,7 +112,7 @@ relation_kind rr_intersect_table[VREL_COUNT][VREL_COUNT] = { { NE_EXPR, LT_EXPR, LT_EXPR, GT_EXPR, GT_EXPR, VREL_EMPTY, VREL_EMPTY, NE_EXPR } }; -// Intersect relation R! with relation R2 and return the resulting relation. +// Intersect relation R1 with relation R2 and return the resulting relation. relation_kind relation_intersect (relation_kind r1, relation_kind r2) @@ -155,6 +155,39 @@ relation_union (relation_kind r1, relation_kind r2) } +// This table is used to determine transitivity between 2 relations. +// (A relation0 B) and (B relation1 C) implies (A result C) + +relation_kind rr_transitive_table[VREL_COUNT][VREL_COUNT] = { +// NONE, LT_EXPR, LE_EXPR, GT_EXPR, GE_EXPR, EMPTY, EQ_EXPR, NE_EXPR +// VREL_NONE + { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE }, +// LT_EXPR + { VREL_NONE, LT_EXPR, LT_EXPR, VREL_NONE, VREL_NONE, VREL_NONE, LT_EXPR, VREL_NONE }, +// LE_EXPR + { VREL_NONE, LT_EXPR, LE_EXPR, VREL_NONE, VREL_NONE, VREL_NONE, LE_EXPR, VREL_NONE }, +// GT_EXPR + { VREL_NONE, VREL_NONE, VREL_NONE, GT_EXPR, GT_EXPR, VREL_NONE, GT_EXPR, VREL_NONE }, +// GE_EXPR + { VREL_NONE, VREL_NONE, VREL_NONE, GT_EXPR, GE_EXPR, VREL_NONE, GE_EXPR, VREL_NONE }, +// VREL_EMPTY + { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE }, +// EQ_EXPR + { VREL_NONE, LT_EXPR, LE_EXPR, GT_EXPR, GE_EXPR, VREL_NONE, EQ_EXPR, VREL_NONE }, +// NE_EXPR + { VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE, VREL_NONE } }; + +// Apply transitive operation between relation R1 and relation R2, and +// return the resulting relation, if any. + +relation_kind +relation_transitive (relation_kind r1, relation_kind r2) +{ + vrel_range_assert (r1); + vrel_range_assert (r2); + return rr_transitive_table[r1 - VREL_FIRST][r2 - VREL_FIRST]; +} + // ------------------------------------------------------------------------- // This class represents an equivalency set, and contains a link to the next @@ -472,7 +505,7 @@ public: bool union_ (value_relation &p); bool intersect (value_relation &p); void negate (); - void swap (); + bool apply_transitive (const value_relation &rel); void dump (FILE *f) const; private: @@ -517,14 +550,6 @@ value_relation::negate () related = relation_negate (related); } -// Modify the relation as if the operands were being swapped. - -void -value_relation::swap () -{ - related = relation_swap (related); -} - // Perform an intersection between 2 relations. *this &&= p. bool @@ -561,6 +586,73 @@ value_relation::union_ (value_relation &p) return old != related; } +// Identify and apply any transitive relations between REL +// and THIS. Return true if there was a transformation. + +bool +value_relation::apply_transitive (const value_relation &rel) +{ + relation_kind k = VREL_NONE; + + // Idenity any common operand, and notrmalize the relations to + // the form : A < B B < C produces A < C + if (rel.op1 () == name2) + { + // A < B B < C + if (rel.op2 () == name1) + return false; + k = relation_transitive (kind (), rel.kind ()); + if (k != VREL_NONE) + { + related = k; + name2 = rel.op2 (); + return true; + } + } + else if (rel.op1 () == name1) + { + // B > A B < C + if (rel.op2 () == name2) + return false; + k = relation_transitive (relation_swap (kind ()), rel.kind ()); + if (k != VREL_NONE) + { + related = k; + name1 = name2; + name2 = rel.op2 (); + return true; + } + } + else if (rel.op2 () == name2) + { + // A < B C > B + if (rel.op1 () == name1) + return false; + k = relation_transitive (kind (), relation_swap (rel.kind ())); + if (k != VREL_NONE) + { + related = k; + name2 = rel.op1 (); + return true; + } + } + else if (rel.op2 () == name1) + { + // B > A C > B + if (rel.op1 () == name2) + return false; + k = relation_transitive (relation_swap (kind ()), + relation_swap (rel.kind ())); + if (k != VREL_NONE) + { + related = k; + name1 = name2; + name2 = rel.op1 (); + return true; + } + } + return false; +} // Dump the relation to file F. @@ -597,6 +689,7 @@ relation_oracle::relation_oracle () m_relations.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1); m_relation_set = BITMAP_ALLOC (&m_bitmaps); m_tmp = BITMAP_ALLOC (&m_bitmaps); + m_tmp2 = BITMAP_ALLOC (&m_bitmaps); } // Destruct a relation oracle. @@ -669,10 +762,12 @@ relation_oracle::register_relation (edge e, relation_kind k, tree op1, // Register relation K between OP! and OP2 in block BB. // This creates the record and searches for existing records in the dominator // tree to merge with. +// TRANSITIVE_P is true if this is being registered as a transitive operation, +// and should not try to register further transitives. void relation_oracle::register_relation (basic_block bb, relation_kind k, tree op1, - tree op2) + tree op2, bool transitive_p) { gcc_checking_assert (k != VREL_NONE); @@ -710,26 +805,160 @@ relation_oracle::register_relation (basic_block bb, relation_kind k, tree op1, ptr->dump (dump_file); fprintf (dump_file, "\n"); } - return; + } + else + { + // Check for an existing relation further up the DOM chain. + // By including dominating relations, The first one found in any search + // will be the aggregate of all the previous ones. + curr = find_relation_dom (bb, v1, v2); + if (curr != VREL_NONE) + k = relation_intersect (curr, k); + + bitmap_set_bit (bm, v1); + bitmap_set_bit (bm, v2); + bitmap_set_bit (m_relation_set, v1); + bitmap_set_bit (m_relation_set, v2); + + ptr = (relation_chain *) obstack_alloc (&m_chain_obstack, + sizeof (relation_chain)); + ptr->set_relation (k, op1, op2); + ptr->m_next = m_relations[bbi].m_head; + m_relations[bbi].m_head = ptr;; } - // Check for an existing relation further up the DOM chain. - // By including dominating relations, The first one found in any search - // will be the aggregate of all the previous ones. - curr = find_relation_dom (bb, v1, v2); - if (curr != VREL_NONE) - k = relation_intersect (curr, k); - - bitmap_set_bit (bm, v1); - bitmap_set_bit (bm, v2); - bitmap_set_bit (m_relation_set, v1); - bitmap_set_bit (m_relation_set, v2); - - ptr = (relation_chain *) obstack_alloc (&m_chain_obstack, - sizeof (relation_chain)); - ptr->set_relation (k, op1, op2); - ptr->m_next = m_relations[bbi].m_head; - m_relations[bbi].m_head = ptr;; + if (!transitive_p) + register_transitives (bb, *ptr); +} + +// Starting at ROOT_BB search the DOM tree looking for relations which +// may produce transitive relations to RELATION. EQUIV1 and EQUIV2 are +// bitmaps for op1/op2 and any of their equivalences that should also be +// considered. + +void +relation_oracle::register_transitives (basic_block root_bb, + const value_relation &relation, + const_bitmap equiv1, + const_bitmap equiv2) +{ + basic_block bb; + for (bb = root_bb; bb; bb = get_immediate_dominator (CDI_DOMINATORS, bb)) + { + int bbi = bb->index; + if (bbi >= (int)m_relations.length()) + continue; + const_bitmap bm = m_relations[bbi].m_names; + if (!bm) + continue; + if (!bitmap_intersect_p (bm, equiv1) && !bitmap_intersect_p (bm, equiv2)) + continue; + // At least one of the 2 ops has a relation in this block. + relation_chain *ptr; + for (ptr = m_relations[bbi].m_head; ptr ; ptr = ptr->m_next) + { + // In the presence of an equivalence, 2 operands may do not + // naturally match. ie with equivalence a_2 == b_3 + // given c_1 < a_2 && b_3 < d_4 + // convert the second relation (b_3 < d_4) to match any + // equivalences to found in the first relation. + // ie convert b_3 < d_4 to a_2 < d_4, which then exposes the + // transitive operation: c_1 < a_2 && a_2 < d_4 -> c_1 < d_4 + + tree r1, r2; + tree p1 = ptr->op1 (); + tree p2 = ptr->op2 (); + // Find which equivalence is in the first operand. + if (bitmap_bit_p (equiv1, SSA_NAME_VERSION (p1))) + r1 = p1; + else if (bitmap_bit_p (equiv1, SSA_NAME_VERSION (p2))) + r1 = p2; + else + r1 = NULL_TREE; + + // Find which equivalence is in the second operand. + if (bitmap_bit_p (equiv2, SSA_NAME_VERSION (p1))) + r2 = p1; + else if (bitmap_bit_p (equiv2, SSA_NAME_VERSION (p2))) + r2 = p2; + else + r2 = NULL_TREE; + + // Ignore if both NULL (not relevant relation) or the same, + if (r1 == r2) + continue; + + // Any operand not an equivalence, just take the real operand. + if (!r1) + r1 = relation.op1 (); + if (!r2) + r2 = relation.op2 (); + + value_relation nr (relation.kind (), r1, r2); + if (nr.apply_transitive (*ptr)) + { + register_relation (root_bb, nr.kind (), nr.op1 (), nr.op2 (), + true); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, " Registering transitive relation "); + nr.dump (dump_file); + fputc ('\n', dump_file); + } + } + + } + } +} + +// Find adn register any transitive relations implied by RELATION occuring +// in block BB. + +void +relation_oracle::register_transitives (basic_block bb, + const value_relation &relation) +{ + // Only apply transitives to certain kinds of operations. + switch (relation.kind ()) + { + case LE_EXPR: + case LT_EXPR: + case GT_EXPR: + case GE_EXPR: + break; + default: + return; + } + + // Set up the bitmaps for op1 and op2, and if there are no equivalencies, + // set just op1 or op2 in their own bitmap. + const_bitmap equiv1 = equiv_set (relation.op1 (), bb); + const_bitmap equiv2 = equiv_set (relation.op2 (), bb); + if (equiv1) + { + if (equiv2) + register_transitives (bb, relation, equiv1, equiv2); + else + { + bitmap_clear (m_tmp); + bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op2 ())); + register_transitives (bb, relation, equiv1, m_tmp); + } + } + else if (equiv2) + { + bitmap_clear (m_tmp); + bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op1 ())); + register_transitives (bb, relation, m_tmp, equiv2); + } + else + { + bitmap_clear (m_tmp); + bitmap_clear (m_tmp2); + bitmap_set_bit (m_tmp, SSA_NAME_VERSION (relation.op1 ())); + bitmap_set_bit (m_tmp2, SSA_NAME_VERSION (relation.op2 ())); + register_transitives (bb, relation, m_tmp, m_tmp2); + } } // Find the relation between any ssa_name in B1 and any name in B2 in block BB. diff --git a/gcc/value-relation.h b/gcc/value-relation.h index 1148854..e0e2f82 100644 --- a/gcc/value-relation.h +++ b/gcc/value-relation.h @@ -143,7 +143,7 @@ public: void dump (FILE *f, basic_block bb) const; void dump (FILE *f) const; private: - bitmap m_tmp; + bitmap m_tmp, m_tmp2; bitmap m_relation_set; // Index by ssa-name. True if a relation exists vec <relation_chain_head> m_relations; // Index by BB, list of relations. relation_kind find_relation_block (unsigned bb, const_bitmap b1, @@ -153,7 +153,12 @@ private: relation_kind find_relation_block (int bb, unsigned v1, unsigned v2, relation_chain **obj = NULL); relation_kind find_relation_dom (basic_block bb, unsigned v1, unsigned v2); - void register_relation (basic_block bb, relation_kind k, tree op1, tree op2); + void register_relation (basic_block bb, relation_kind k, tree op1, tree op2, + bool transitive_p = false); + void register_transitives (basic_block, const class value_relation &); + void register_transitives (basic_block, const value_relation &, const_bitmap, + const_bitmap); + }; #endif /* GCC_VALUE_RELATION_H */ |